From 1527b076ae2cb6a9c590a02725ed39399fcad1cf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 22 Jun 2023 10:24:35 +0200 Subject: [PATCH 001/333] spi: zynqmp-gqspi: fix clock imbalance on probe failure Make sure that the device is not runtime suspended before explicitly disabling the clocks on probe failure and on driver unbind to avoid a clock enable-count imbalance. Fixes: 9e3a000362ae ("spi: zynqmp: Add pm runtime support") Cc: stable@vger.kernel.org # 4.19 Cc: Naga Sureshkumar Relli Cc: Shubhrajyoti Datta Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/Message-Id: <20230622082435.7873-1-johan+linaro@kernel.org> Signed-off-by: Mark Brown --- drivers/spi/spi-zynqmp-gqspi.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index fb2ca9b90eab..c309dedfd602 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1342,9 +1342,9 @@ static int zynqmp_qspi_probe(struct platform_device *pdev) return 0; clk_dis_all: - pm_runtime_put_sync(&pdev->dev); - pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_dis_pclk: clk_disable_unprepare(xqspi->pclk); @@ -1368,11 +1368,15 @@ static void zynqmp_qspi_remove(struct platform_device *pdev) { struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev); + pm_runtime_get_sync(&pdev->dev); + zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0); + + pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_disable_unprepare(xqspi->pclk); - pm_runtime_set_suspended(&pdev->dev); - pm_runtime_disable(&pdev->dev); } MODULE_DEVICE_TABLE(of, zynqmp_qspi_of_match); From 1f0bbf28940cf5edad90ab57b62aa8197bf5e836 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 9 Aug 2023 15:56:45 +0530 Subject: [PATCH 002/333] nvmet-tcp: pass iov_len instead of sg->length to bvec_set_page() iov_len is the valid data length, so pass iov_len instead of sg->length to bvec_set_page(). Fixes: 5bfaba275ae6 ("nvmet-tcp: don't map pages which can't come from HIGHMEM") Signed-off-by: Rakshana Sridhar Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 868aa4de2e4c..cd92d7ddf5ed 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -348,7 +348,7 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) while (length) { u32 iov_len = min_t(u32, length, sg->length - sg_offset); - bvec_set_page(iov, sg_page(sg), sg->length, + bvec_set_page(iov, sg_page(sg), iov_len, sg->offset + sg_offset); length -= iov_len; From 71be868472dc5beb82feb4da2d3eb9cba785d660 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 20 Aug 2023 11:21:39 +0200 Subject: [PATCH 003/333] nvme: host: hwmon: constify pointers to hwmon_channel_info Statically allocated array of pointed to hwmon_channel_info can be made const for safety. Signed-off-by: Krzysztof Kozlowski Acked-by: Christoph Hellwig Acked-by: Guenter Roeck Signed-off-by: Keith Busch --- drivers/nvme/host/hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c index 316f3e4ca7cc..8df73a0b3980 100644 --- a/drivers/nvme/host/hwmon.c +++ b/drivers/nvme/host/hwmon.c @@ -187,7 +187,7 @@ static umode_t nvme_hwmon_is_visible(const void *_data, return 0; } -static const struct hwmon_channel_info *nvme_hwmon_info[] = { +static const struct hwmon_channel_info *const nvme_hwmon_info[] = { HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | From 8ae5b3a685dc59a8cf7ccfe0e850999ba9727a3c Mon Sep 17 00:00:00 2001 From: Nigel Kirkland Date: Thu, 17 Aug 2023 12:43:01 -0700 Subject: [PATCH 004/333] nvme-fc: Prevent null pointer dereference in nvme_fc_io_getuuid() The nvme_fc_fcp_op structure describing an AEN operation is initialized with a null request structure pointer. An FC LLDD may make a call to nvme_fc_io_getuuid passing a pointer to an nvmefc_fcp_req for an AEN operation. Add validation of the request structure pointer before dereference. Signed-off-by: Nigel Kirkland Reviewed-by: James Smart Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 1cd2bf82319a..a15b37750d6e 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1924,7 +1924,7 @@ char *nvme_fc_io_getuuid(struct nvmefc_fcp_req *req) struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); struct request *rq = op->rq; - if (!IS_ENABLED(CONFIG_BLK_CGROUP_FC_APPID) || !rq->bio) + if (!IS_ENABLED(CONFIG_BLK_CGROUP_FC_APPID) || !rq || !rq->bio) return NULL; return blkcg_get_fc_appid(rq->bio); } From 0c3b063ef4136191312a88ea7a670a6a2a2dae5a Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 24 Aug 2023 08:37:01 +0100 Subject: [PATCH 005/333] drm/drm_connector: Provide short description of param 'supported_colorspaces' Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/drm_connector.c:2215: warning: Function parameter or member 'supported_colorspaces' not described in 'drm_mode_create_hdmi_colorspace_property' drivers/gpu/drm/drm_connector.c:2239: warning: Function parameter or member 'supported_colorspaces' not described in 'drm_mode_create_dp_colorspace_property' Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230824073710.2677348-17-lee@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/drm_connector.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 3ed4cfcb350c..f28725736237 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -2203,6 +2203,7 @@ static int drm_mode_create_colorspace_property(struct drm_connector *connector, /** * drm_mode_create_hdmi_colorspace_property - create hdmi colorspace property * @connector: connector to create the Colorspace property on. + * @supported_colorspaces: bitmap of supported color spaces * * Called by a driver the first time it's needed, must be attached to desired * HDMI connectors. @@ -2227,6 +2228,7 @@ EXPORT_SYMBOL(drm_mode_create_hdmi_colorspace_property); /** * drm_mode_create_dp_colorspace_property - create dp colorspace property * @connector: connector to create the Colorspace property on. + * @supported_colorspaces: bitmap of supported color spaces * * Called by a driver the first time it's needed, must be attached to desired * DP connectors. From e2884fe84a83c562346eb9d92783a3576ce67177 Mon Sep 17 00:00:00 2001 From: Simon Pilkington Date: Fri, 1 Sep 2023 08:17:38 +0100 Subject: [PATCH 006/333] drm/amd: Make fence wait in suballocator uninterruptible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c103a23f2f29 ("drm/amd: Convert amdgpu to use suballocation helper.") made the fence wait in amdgpu_sa_bo_new() interruptible but there is no code to handle an interrupt. This caused the kernel to randomly explode in high-VRAM-pressure situations so make it uninterruptible again. Signed-off-by: Simon Pilkington Fixes: c103a23f2f29 ("drm/amd: Convert amdgpu to use suballocation helper.") Reviewed-by: Christian König Signed-off-by: Christian König Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2761 CC: stable@vger.kernel.org # 6.4+ --- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index c6b4337eb20c..10df731998b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -81,7 +81,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, unsigned int size) { struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size, - GFP_KERNEL, true, 0); + GFP_KERNEL, false, 0); if (IS_ERR(sa)) { *sa_bo = NULL; From f8858d96061f5942216c6abb0194c3ea7b78e1e8 Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Sat, 2 Sep 2023 13:42:04 +0530 Subject: [PATCH 007/333] sched/fair: Optimize should_we_balance() for large SMT systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit should_we_balance() is called in load_balance() to find out if the CPU that is trying to do the load balance is the right one or not. With commit: b1bfeab9b002("sched/fair: Consider the idle state of the whole core for load balance") the code tries to find an idle core to do the load balancing and falls back on an idle sibling CPU if there is no idle core. However, on larger SMT systems, it could be needlessly iterating to find a idle by scanning all the CPUs in an non-idle core. If the core is not idle, and first SMT sibling which is idle has been found, then its not needed to check other SMT siblings for idleness Lets say in SMT4, Core0 has 0,2,4,6 and CPU0 is BUSY and rest are IDLE. balancing domain is MC/DIE. CPU2 will be set as the first idle_smt and same process would be repeated for CPU4 and CPU6 but this is unnecessary. Since calling is_core_idle loops through all CPU's in the SMT mask, effect is multiplied by weight of smt_mask. For example,when say 1 CPU is busy, we would skip loop for 2 CPU's and skip iterating over 8CPU's. That effect would be more in DIE/NUMA domain where there are more cores. Testing and performance evaluation ================================== The test has been done on this system which has 12 cores, i.e 24 small cores with SMT=4: lscpu Architecture: ppc64le Byte Order: Little Endian CPU(s): 96 On-line CPU(s) list: 0-95 Model name: POWER10 (architected), altivec supported Thread(s) per core: 8 Used funclatency bcc tool to evaluate the time taken by should_we_balance(). For base tip/sched/core the time taken is collected by making the should_we_balance() noinline. time is in nanoseconds. The values are collected by running the funclatency tracer for 60 seconds. values are average of 3 such runs. This represents the expected reduced time with patch. tip/sched/core was at commit: 2f88c8e802c8 ("sched/eevdf/doc: Modify the documented knob to base_slice_ns as well") Results: ------------------------------------------------------------------------------ workload tip/sched/core with_patch(%gain) ------------------------------------------------------------------------------ idle system 809.3 695.0(16.45) stress ng – 12 threads -l 100 1013.5 893.1(13.49) stress ng – 24 threads -l 100 1073.5 980.0(9.54) stress ng – 48 threads -l 100 683.0 641.0(6.55) stress ng – 96 threads -l 100 2421.0 2300(5.26) stress ng – 96 threads -l 15 375.5 377.5(-0.53) stress ng – 96 threads -l 25 635.5 637.5(-0.31) stress ng – 96 threads -l 35 934.0 891.0(4.83) Ran schbench(old), hackbench and stress_ng to evaluate the workload performance between tip/sched/core and with patch. No modification to tip/sched/core TL;DR: Good improvement is seen with schbench. when hackbench and stress_ng runs for longer good improvement is seen. ------------------------------------------------------------------------------ schbench(old) tip +patch(%gain) 10 iterations sched/core ------------------------------------------------------------------------------ 1 Threads 50.0th: 8.00 9.00(-12.50) 75.0th: 9.60 9.00(6.25) 90.0th: 11.80 10.20(13.56) 95.0th: 12.60 10.40(17.46) 99.0th: 13.60 11.90(12.50) 99.5th: 14.10 12.60(10.64) 99.9th: 15.90 14.60(8.18) 2 Threads 50.0th: 9.90 9.20(7.07) 75.0th: 12.60 10.10(19.84) 90.0th: 15.50 12.00(22.58) 95.0th: 17.70 14.00(20.90) 99.0th: 21.20 16.90(20.28) 99.5th: 22.60 17.50(22.57) 99.9th: 30.40 19.40(36.18) 4 Threads 50.0th: 12.50 10.60(15.20) 75.0th: 15.30 12.00(21.57) 90.0th: 18.60 14.10(24.19) 95.0th: 21.30 16.20(23.94) 99.0th: 26.00 20.70(20.38) 99.5th: 27.60 22.50(18.48) 99.9th: 33.90 31.40(7.37) 8 Threads 50.0th: 16.30 14.30(12.27) 75.0th: 20.20 17.40(13.86) 90.0th: 24.50 21.90(10.61) 95.0th: 27.30 24.70(9.52) 99.0th: 35.00 31.20(10.86) 99.5th: 46.40 33.30(28.23) 99.9th: 89.30 57.50(35.61) 16 Threads 50.0th: 22.70 20.70(8.81) 75.0th: 30.10 27.40(8.97) 90.0th: 36.00 32.80(8.89) 95.0th: 39.60 36.40(8.08) 99.0th: 49.20 44.10(10.37) 99.5th: 64.90 50.50(22.19) 99.9th: 143.50 100.60(29.90) 32 Threads 50.0th: 34.60 35.50(-2.60) 75.0th: 48.20 50.50(-4.77) 90.0th: 59.20 62.40(-5.41) 95.0th: 65.20 69.00(-5.83) 99.0th: 80.40 83.80(-4.23) 99.5th: 102.10 98.90(3.13) 99.9th: 727.10 506.80(30.30) schbench does improve in general. There is some run to run variation with schbench. Did a validation run to confirm that trend is similar. ------------------------------------------------------------------------------ hackbench tip +patch(%gain) 20 iterations, 50000 loops sched/core ------------------------------------------------------------------------------ Process 10 groups : 11.74 11.70(0.34) Process 20 groups : 22.73 22.69(0.18) Process 30 groups : 33.39 33.40(-0.03) Process 40 groups : 43.73 43.61(0.27) Process 50 groups : 53.82 54.35(-0.98) Process 60 groups : 64.16 65.29(-1.76) thread 10 Time : 12.81 12.79(0.16) thread 20 Time : 24.63 24.47(0.65) Process(Pipe) 10 Time : 6.40 6.34(0.94) Process(Pipe) 20 Time : 10.62 10.63(-0.09) Process(Pipe) 30 Time : 15.09 14.84(1.66) Process(Pipe) 40 Time : 19.42 19.01(2.11) Process(Pipe) 50 Time : 24.04 23.34(2.91) Process(Pipe) 60 Time : 28.94 27.51(4.94) thread(Pipe) 10 Time : 6.96 6.87(1.29) thread(Pipe) 20 Time : 11.74 11.73(0.09) hackbench shows slight improvement with pipe. Slight degradation in process. ------------------------------------------------------------------------------ stress_ng tip +patch(%gain) 10 iterations 100000 cpu_ops sched/core ------------------------------------------------------------------------------ --cpu=96 -util=100 Time taken : 5.30, 5.01(5.47) --cpu=48 -util=100 Time taken : 7.94, 6.73(15.24) --cpu=24 -util=100 Time taken : 11.67, 8.75(25.02) --cpu=12 -util=100 Time taken : 15.71, 15.02(4.39) --cpu=96 -util=10 Time taken : 22.71, 22.19(2.29) --cpu=96 -util=20 Time taken : 12.14, 12.37(-1.89) --cpu=96 -util=30 Time taken : 8.76, 8.86(-1.14) --cpu=96 -util=40 Time taken : 7.13, 7.14(-0.14) --cpu=96 -util=50 Time taken : 6.10, 6.13(-0.49) --cpu=96 -util=60 Time taken : 5.42, 5.41(0.18) --cpu=96 -util=70 Time taken : 4.94, 4.94(0.00) --cpu=96 -util=80 Time taken : 4.56, 4.53(0.66) --cpu=96 -util=90 Time taken : 4.27, 4.26(0.23) Good improvement seen with 24 CPUs. In this case only one CPU is busy, and no core is idle. Decent improvement with 100% utilization case. no difference in other utilization. Fixes: b1bfeab9b002 ("sched/fair: Consider the idle state of the whole core for load balance") Signed-off-by: Shrikanth Hegde Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230902081204.232218-1-sshegde@linux.vnet.ibm.com --- kernel/sched/fair.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8dbff6e7ad4f..33a2b6bba676 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6619,6 +6619,7 @@ dequeue_throttle: /* Working cpumask for: load_balance, load_balance_newidle. */ static DEFINE_PER_CPU(cpumask_var_t, load_balance_mask); static DEFINE_PER_CPU(cpumask_var_t, select_rq_mask); +static DEFINE_PER_CPU(cpumask_var_t, should_we_balance_tmpmask); #ifdef CONFIG_NO_HZ_COMMON @@ -10917,6 +10918,7 @@ static int active_load_balance_cpu_stop(void *data); static int should_we_balance(struct lb_env *env) { + struct cpumask *swb_cpus = this_cpu_cpumask_var_ptr(should_we_balance_tmpmask); struct sched_group *sg = env->sd->groups; int cpu, idle_smt = -1; @@ -10940,8 +10942,9 @@ static int should_we_balance(struct lb_env *env) return 1; } + cpumask_copy(swb_cpus, group_balance_mask(sg)); /* Try to find first idle CPU */ - for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) { + for_each_cpu_and(cpu, swb_cpus, env->cpus) { if (!idle_cpu(cpu)) continue; @@ -10953,6 +10956,14 @@ static int should_we_balance(struct lb_env *env) if (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && !is_core_idle(cpu)) { if (idle_smt == -1) idle_smt = cpu; + /* + * If the core is not idle, and first SMT sibling which is + * idle has been found, then its not needed to check other + * SMT siblings for idleness: + */ +#ifdef CONFIG_SCHED_SMT + cpumask_andnot(swb_cpus, swb_cpus, cpu_smt_mask(cpu)); +#endif continue; } @@ -12918,6 +12929,8 @@ __init void init_sched_fair_class(void) for_each_possible_cpu(i) { zalloc_cpumask_var_node(&per_cpu(load_balance_mask, i), GFP_KERNEL, cpu_to_node(i)); zalloc_cpumask_var_node(&per_cpu(select_rq_mask, i), GFP_KERNEL, cpu_to_node(i)); + zalloc_cpumask_var_node(&per_cpu(should_we_balance_tmpmask, i), + GFP_KERNEL, cpu_to_node(i)); #ifdef CONFIG_CFS_BANDWIDTH INIT_CSD(&cpu_rq(i)->cfsb_csd, __cfsb_csd_unthrottle, cpu_rq(i)); From 45dc8fc07d01b6786db88b5b176c67f9e3487d1e Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Sat, 2 Sep 2023 10:51:02 +0100 Subject: [PATCH 008/333] fbdev/g364fb: fix build failure with mips Fix the typo which resulted in the driver using FB_DEFAULT_IOMEM_HELPERS instead of FB_DEFAULT_IOMEM_OPS as the fbdev I/O helpers. Fixes: 501126083855 ("fbdev/g364fb: Use fbdev I/O helpers") Suggested-by: Linus Torvalds Signed-off-by: Sudip Mukherjee Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230902095102.5908-1-sudip.mukherjee@codethink.co.uk --- drivers/video/fbdev/g364fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/g364fb.c b/drivers/video/fbdev/g364fb.c index 7a1013b22fa7..ee6fe51e0a6b 100644 --- a/drivers/video/fbdev/g364fb.c +++ b/drivers/video/fbdev/g364fb.c @@ -112,7 +112,7 @@ static int g364fb_blank(int blank, struct fb_info *info); static const struct fb_ops g364fb_ops = { .owner = THIS_MODULE, - FB_DEFAULT_IOMEM_HELPERS, + FB_DEFAULT_IOMEM_OPS, .fb_setcolreg = g364fb_setcolreg, .fb_pan_display = g364fb_pan_display, .fb_blank = g364fb_blank, From 7583028d359db3cd0072badcc576b4f9455fd27a Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 4 Sep 2023 10:14:20 +0800 Subject: [PATCH 009/333] drm: gm12u320: Fix the timeout usage for usb_bulk_msg() The timeout arg of usb_bulk_msg() is ms already, which has been converted to jiffies by msecs_to_jiffies() in usb_start_wait_urb(). So fix the usage by removing the redundant msecs_to_jiffies() in the macros. And as Hans suggested, also remove msecs_to_jiffies() for the IDLE_TIMEOUT macro to make it consistent here and so change IDLE_TIMEOUT to msecs_to_jiffies(IDLE_TIMEOUT) where it is used. Fixes: e4f86e437164 ("drm: Add Grain Media GM12U320 driver v2") Signed-off-by: Jinjie Ruan Suggested-by: Hans de Goede Reviewed-by: Hans de Goede Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230904021421.1663892-1-ruanjinjie@huawei.com --- drivers/gpu/drm/tiny/gm12u320.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/tiny/gm12u320.c b/drivers/gpu/drm/tiny/gm12u320.c index c5bb683e440c..0187539ff5ea 100644 --- a/drivers/gpu/drm/tiny/gm12u320.c +++ b/drivers/gpu/drm/tiny/gm12u320.c @@ -70,10 +70,10 @@ MODULE_PARM_DESC(eco_mode, "Turn on Eco mode (less bright, more silent)"); #define READ_STATUS_SIZE 13 #define MISC_VALUE_SIZE 4 -#define CMD_TIMEOUT msecs_to_jiffies(200) -#define DATA_TIMEOUT msecs_to_jiffies(1000) -#define IDLE_TIMEOUT msecs_to_jiffies(2000) -#define FIRST_FRAME_TIMEOUT msecs_to_jiffies(2000) +#define CMD_TIMEOUT 200 +#define DATA_TIMEOUT 1000 +#define IDLE_TIMEOUT 2000 +#define FIRST_FRAME_TIMEOUT 2000 #define MISC_REQ_GET_SET_ECO_A 0xff #define MISC_REQ_GET_SET_ECO_B 0x35 @@ -389,7 +389,7 @@ static void gm12u320_fb_update_work(struct work_struct *work) * switches back to showing its logo. */ queue_delayed_work(system_long_wq, &gm12u320->fb_update.work, - IDLE_TIMEOUT); + msecs_to_jiffies(IDLE_TIMEOUT)); return; err: From 0b0747d507bffb827e40fc0f9fb5883fffc23477 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 28 Aug 2023 15:10:18 -0700 Subject: [PATCH 010/333] scsi: megaraid_sas: Fix deadlock on firmware crashdump The following processes run into a deadlock. CPU 41 was waiting for CPU 29 to handle a CSD request while holding spinlock "crashdump_lock", but CPU 29 was hung by that spinlock with IRQs disabled. PID: 17360 TASK: ffff95c1090c5c40 CPU: 41 COMMAND: "mrdiagd" !# 0 [ffffb80edbf37b58] __read_once_size at ffffffff9b871a40 include/linux/compiler.h:185:0 !# 1 [ffffb80edbf37b58] atomic_read at ffffffff9b871a40 arch/x86/include/asm/atomic.h:27:0 !# 2 [ffffb80edbf37b58] dump_stack at ffffffff9b871a40 lib/dump_stack.c:54:0 # 3 [ffffb80edbf37b78] csd_lock_wait_toolong at ffffffff9b131ad5 kernel/smp.c:364:0 # 4 [ffffb80edbf37b78] __csd_lock_wait at ffffffff9b131ad5 kernel/smp.c:384:0 # 5 [ffffb80edbf37bf8] csd_lock_wait at ffffffff9b13267a kernel/smp.c:394:0 # 6 [ffffb80edbf37bf8] smp_call_function_many at ffffffff9b13267a kernel/smp.c:843:0 # 7 [ffffb80edbf37c50] smp_call_function at ffffffff9b13279d kernel/smp.c:867:0 # 8 [ffffb80edbf37c50] on_each_cpu at ffffffff9b13279d kernel/smp.c:976:0 # 9 [ffffb80edbf37c78] flush_tlb_kernel_range at ffffffff9b085c4b arch/x86/mm/tlb.c:742:0 #10 [ffffb80edbf37cb8] __purge_vmap_area_lazy at ffffffff9b23a1e0 mm/vmalloc.c:701:0 #11 [ffffb80edbf37ce0] try_purge_vmap_area_lazy at ffffffff9b23a2cc mm/vmalloc.c:722:0 #12 [ffffb80edbf37ce0] free_vmap_area_noflush at ffffffff9b23a2cc mm/vmalloc.c:754:0 #13 [ffffb80edbf37cf8] free_unmap_vmap_area at ffffffff9b23bb3b mm/vmalloc.c:764:0 #14 [ffffb80edbf37cf8] remove_vm_area at ffffffff9b23bb3b mm/vmalloc.c:1509:0 #15 [ffffb80edbf37d18] __vunmap at ffffffff9b23bb8a mm/vmalloc.c:1537:0 #16 [ffffb80edbf37d40] vfree at ffffffff9b23bc85 mm/vmalloc.c:1612:0 #17 [ffffb80edbf37d58] megasas_free_host_crash_buffer [megaraid_sas] at ffffffffc020b7f2 drivers/scsi/megaraid/megaraid_sas_fusion.c:3932:0 #18 [ffffb80edbf37d80] fw_crash_state_store [megaraid_sas] at ffffffffc01f804d drivers/scsi/megaraid/megaraid_sas_base.c:3291:0 #19 [ffffb80edbf37dc0] dev_attr_store at ffffffff9b56dd7b drivers/base/core.c:758:0 #20 [ffffb80edbf37dd0] sysfs_kf_write at ffffffff9b326acf fs/sysfs/file.c:144:0 #21 [ffffb80edbf37de0] kernfs_fop_write at ffffffff9b325fd4 fs/kernfs/file.c:316:0 #22 [ffffb80edbf37e20] __vfs_write at ffffffff9b29418a fs/read_write.c:480:0 #23 [ffffb80edbf37ea8] vfs_write at ffffffff9b294462 fs/read_write.c:544:0 #24 [ffffb80edbf37ee8] SYSC_write at ffffffff9b2946ec fs/read_write.c:590:0 #25 [ffffb80edbf37ee8] SyS_write at ffffffff9b2946ec fs/read_write.c:582:0 #26 [ffffb80edbf37f30] do_syscall_64 at ffffffff9b003ca9 arch/x86/entry/common.c:298:0 #27 [ffffb80edbf37f58] entry_SYSCALL_64 at ffffffff9ba001b1 arch/x86/entry/entry_64.S:238:0 PID: 17355 TASK: ffff95c1090c3d80 CPU: 29 COMMAND: "mrdiagd" !# 0 [ffffb80f2d3c7d30] __read_once_size at ffffffff9b0f2ab0 include/linux/compiler.h:185:0 !# 1 [ffffb80f2d3c7d30] native_queued_spin_lock_slowpath at ffffffff9b0f2ab0 kernel/locking/qspinlock.c:368:0 # 2 [ffffb80f2d3c7d58] pv_queued_spin_lock_slowpath at ffffffff9b0f244b arch/x86/include/asm/paravirt.h:674:0 # 3 [ffffb80f2d3c7d58] queued_spin_lock_slowpath at ffffffff9b0f244b arch/x86/include/asm/qspinlock.h:53:0 # 4 [ffffb80f2d3c7d68] queued_spin_lock at ffffffff9b8961a6 include/asm-generic/qspinlock.h:90:0 # 5 [ffffb80f2d3c7d68] do_raw_spin_lock_flags at ffffffff9b8961a6 include/linux/spinlock.h:173:0 # 6 [ffffb80f2d3c7d68] __raw_spin_lock_irqsave at ffffffff9b8961a6 include/linux/spinlock_api_smp.h:122:0 # 7 [ffffb80f2d3c7d68] _raw_spin_lock_irqsave at ffffffff9b8961a6 kernel/locking/spinlock.c:160:0 # 8 [ffffb80f2d3c7d88] fw_crash_buffer_store [megaraid_sas] at ffffffffc01f8129 drivers/scsi/megaraid/megaraid_sas_base.c:3205:0 # 9 [ffffb80f2d3c7dc0] dev_attr_store at ffffffff9b56dd7b drivers/base/core.c:758:0 #10 [ffffb80f2d3c7dd0] sysfs_kf_write at ffffffff9b326acf fs/sysfs/file.c:144:0 #11 [ffffb80f2d3c7de0] kernfs_fop_write at ffffffff9b325fd4 fs/kernfs/file.c:316:0 #12 [ffffb80f2d3c7e20] __vfs_write at ffffffff9b29418a fs/read_write.c:480:0 #13 [ffffb80f2d3c7ea8] vfs_write at ffffffff9b294462 fs/read_write.c:544:0 #14 [ffffb80f2d3c7ee8] SYSC_write at ffffffff9b2946ec fs/read_write.c:590:0 #15 [ffffb80f2d3c7ee8] SyS_write at ffffffff9b2946ec fs/read_write.c:582:0 #16 [ffffb80f2d3c7f30] do_syscall_64 at ffffffff9b003ca9 arch/x86/entry/common.c:298:0 #17 [ffffb80f2d3c7f58] entry_SYSCALL_64 at ffffffff9ba001b1 arch/x86/entry/entry_64.S:238:0 The lock is used to synchronize different sysfs operations, it doesn't protect any resource that will be touched by an interrupt. Consequently it's not required to disable IRQs. Replace the spinlock with a mutex to fix the deadlock. Signed-off-by: Junxiao Bi Link: https://lore.kernel.org/r/20230828221018.19471-1-junxiao.bi@oracle.com Reviewed-by: Mike Christie Cc: stable@vger.kernel.org Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas.h | 2 +- drivers/scsi/megaraid/megaraid_sas_base.c | 21 +++++++++------------ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 3554f6b07727..94abba57582d 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2332,7 +2332,7 @@ struct megasas_instance { u32 support_morethan256jbod; /* FW support for more than 256 PD/JBOD */ bool use_seqnum_jbod_fp; /* Added for PD sequence */ bool smp_affinity_enable; - spinlock_t crashdump_lock; + struct mutex crashdump_lock; struct megasas_register_set __iomem *reg_set; u32 __iomem *reply_post_host_index_addr[MR_MAX_MSIX_REG_ARRAY]; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index b9d46dcb5210..e1aa667dae66 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3271,14 +3271,13 @@ fw_crash_buffer_store(struct device *cdev, struct megasas_instance *instance = (struct megasas_instance *) shost->hostdata; int val = 0; - unsigned long flags; if (kstrtoint(buf, 0, &val) != 0) return -EINVAL; - spin_lock_irqsave(&instance->crashdump_lock, flags); + mutex_lock(&instance->crashdump_lock); instance->fw_crash_buffer_offset = val; - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); return strlen(buf); } @@ -3293,24 +3292,23 @@ fw_crash_buffer_show(struct device *cdev, unsigned long dmachunk = CRASH_DMA_BUF_SIZE; unsigned long chunk_left_bytes; unsigned long src_addr; - unsigned long flags; u32 buff_offset; - spin_lock_irqsave(&instance->crashdump_lock, flags); + mutex_lock(&instance->crashdump_lock); buff_offset = instance->fw_crash_buffer_offset; if (!instance->crash_dump_buf || !((instance->fw_crash_state == AVAILABLE) || (instance->fw_crash_state == COPYING))) { dev_err(&instance->pdev->dev, "Firmware crash dump is not available\n"); - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); return -EINVAL; } if (buff_offset > (instance->fw_crash_buffer_size * dmachunk)) { dev_err(&instance->pdev->dev, "Firmware crash dump offset is out of range\n"); - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); return 0; } @@ -3322,7 +3320,7 @@ fw_crash_buffer_show(struct device *cdev, src_addr = (unsigned long)instance->crash_buf[buff_offset / dmachunk] + (buff_offset % dmachunk); memcpy(buf, (void *)src_addr, size); - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); return size; } @@ -3347,7 +3345,6 @@ fw_crash_state_store(struct device *cdev, struct megasas_instance *instance = (struct megasas_instance *) shost->hostdata; int val = 0; - unsigned long flags; if (kstrtoint(buf, 0, &val) != 0) return -EINVAL; @@ -3361,9 +3358,9 @@ fw_crash_state_store(struct device *cdev, instance->fw_crash_state = val; if ((val == COPIED) || (val == COPY_ERROR)) { - spin_lock_irqsave(&instance->crashdump_lock, flags); + mutex_lock(&instance->crashdump_lock); megasas_free_host_crash_buffer(instance); - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); if (val == COPY_ERROR) dev_info(&instance->pdev->dev, "application failed to " "copy Firmware crash dump\n"); @@ -7422,7 +7419,7 @@ static inline void megasas_init_ctrl_params(struct megasas_instance *instance) init_waitqueue_head(&instance->int_cmd_wait_q); init_waitqueue_head(&instance->abort_cmd_wait_q); - spin_lock_init(&instance->crashdump_lock); + mutex_init(&instance->crashdump_lock); spin_lock_init(&instance->mfi_pool_lock); spin_lock_init(&instance->hba_lock); spin_lock_init(&instance->stream_lock); From 31a0865bf593e59c4433a3624b4c87c40049ed9a Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Wed, 30 Aug 2023 23:19:42 -0600 Subject: [PATCH 011/333] scsi: ppa: Fix accidentally reversed conditions for 16-bit and 32-bit EPP The conditions were correct in the ppa_in() function but not in the ppa_out() function. Fixes: 68a4f84a17c1 ("scsi: ppa: Add a module parameter for the transfer mode") Signed-off-by: Alex Henrie Link: https://lore.kernel.org/r/20230831051945.515476-1-alexhenrie24@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/ppa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c index 19f0b93fa3d8..d592ee9170c1 100644 --- a/drivers/scsi/ppa.c +++ b/drivers/scsi/ppa.c @@ -307,9 +307,9 @@ static int ppa_out(ppa_struct *dev, char *buffer, int len) case PPA_EPP_8: epp_reset(ppb); w_ctr(ppb, 0x4); - if (dev->mode == PPA_EPP_32 && !(((long) buffer | len) & 0x01)) + if (dev->mode == PPA_EPP_32 && !(((long) buffer | len) & 0x03)) outsl(ppb + 4, buffer, len >> 2); - else if (dev->mode == PPA_EPP_16 && !(((long) buffer | len) & 0x03)) + else if (dev->mode == PPA_EPP_16 && !(((long) buffer | len) & 0x01)) outsw(ppb + 4, buffer, len >> 1); else outsb(ppb + 4, buffer, len); From 0be7592885d7b4c20595c388adc13930b653b847 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Thu, 31 Aug 2023 16:51:45 +0530 Subject: [PATCH 012/333] scsi: qla2xxx: Correct endianness for rqstlen and rsplen rqstlen and rsplen were changed to __le32 to fix sparse warnings: drivers/scsi/qla2xxx/qla_nvme.c:402:30: warning: incorrect type in assignment (different base types) drivers/scsi/qla2xxx/qla_nvme.c:402:30: expected restricted __le32 [usertype] cmd_len drivers/scsi/qla2xxx/qla_nvme.c:402:30: got unsigned short [usertype] rsplen drivers/scsi/qla2xxx/qla_nvme.c:507:30: warning: incorrect type in assignment (different base types) drivers/scsi/qla2xxx/qla_nvme.c:507:30: expected restricted __le32 [usertype] cmd_len drivers/scsi/qla2xxx/qla_nvme.c:507:30: got unsigned int [usertype] rqstlen drivers/scsi/qla2xxx/qla_nvme.c:508:30: warning: incorrect type in assignment (different base types) drivers/scsi/qla2xxx/qla_nvme.c:508:30: expected restricted __le32 [usertype] rsp_len drivers/scsi/qla2xxx/qla_nvme.c:508:30: got unsigned int [usertype] rsplen Correct the endianness in qla2xxx driver thus avoiding changes in nvme-fc-driver.h. Fixes: 875386b98857 ("scsi: qla2xxx: Add Unsolicited LS Request and Response Support for NVMe") Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230831112146.32595-1-njavali@marvell.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_nvme.c | 10 +++++----- include/linux/nvme-fc-driver.h | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index db753d712991..a8ddf356e662 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -399,14 +399,14 @@ static int qla_nvme_xmt_ls_rsp(struct nvme_fc_local_port *lport, nvme->u.nvme.dl = 0; nvme->u.nvme.timeout_sec = 0; nvme->u.nvme.cmd_dma = fd_resp->rspdma; - nvme->u.nvme.cmd_len = fd_resp->rsplen; + nvme->u.nvme.cmd_len = cpu_to_le32(fd_resp->rsplen); nvme->u.nvme.rsp_len = 0; nvme->u.nvme.rsp_dma = 0; nvme->u.nvme.exchange_address = uctx->exchange_address; nvme->u.nvme.nport_handle = uctx->nport_handle; nvme->u.nvme.ox_id = uctx->ox_id; dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma, - le32_to_cpu(fd_resp->rsplen), DMA_TO_DEVICE); + fd_resp->rsplen, DMA_TO_DEVICE); ql_dbg(ql_dbg_unsol, vha, 0x2122, "Unsol lsreq portid=%06x %8phC exchange_address 0x%x ox_id 0x%x hdl 0x%x\n", @@ -504,13 +504,13 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, nvme->u.nvme.desc = fd; nvme->u.nvme.dir = 0; nvme->u.nvme.dl = 0; - nvme->u.nvme.cmd_len = fd->rqstlen; - nvme->u.nvme.rsp_len = fd->rsplen; + nvme->u.nvme.cmd_len = cpu_to_le32(fd->rqstlen); + nvme->u.nvme.rsp_len = cpu_to_le32(fd->rsplen); nvme->u.nvme.rsp_dma = fd->rspdma; nvme->u.nvme.timeout_sec = fd->timeout; nvme->u.nvme.cmd_dma = fd->rqstdma; dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma, - le32_to_cpu(fd->rqstlen), DMA_TO_DEVICE); + fd->rqstlen, DMA_TO_DEVICE); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index f6ef8cf5d774..4109f1bd6128 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -53,10 +53,10 @@ struct nvmefc_ls_req { void *rqstaddr; dma_addr_t rqstdma; - __le32 rqstlen; + u32 rqstlen; void *rspaddr; dma_addr_t rspdma; - __le32 rsplen; + u32 rsplen; u32 timeout; void *private; @@ -120,7 +120,7 @@ struct nvmefc_ls_req { struct nvmefc_ls_rsp { void *rspbuf; dma_addr_t rspdma; - __le32 rsplen; + u16 rsplen; void (*done)(struct nvmefc_ls_rsp *rsp); void *nvme_fc_private; /* LLDD is not to access !! */ From 59f10a05b5c7b675256a66e3161741239889ff80 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Thu, 31 Aug 2023 16:51:46 +0530 Subject: [PATCH 013/333] scsi: qla2xxx: Use raw_smp_processor_id() instead of smp_processor_id() The following call trace was observed: localhost kernel: nvme nvme0: NVME-FC{0}: controller connect complete localhost kernel: BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u129:4/75092 localhost kernel: nvme nvme0: NVME-FC{0}: new ctrl: NQN "nqn.1992-08.com.netapp:sn.b42d198afb4d11ecad6d00a098d6abfa:subsystem.PR_Channel2022_RH84_subsystem_291" localhost kernel: caller is qla_nvme_post_cmd+0x216/0x1380 [qla2xxx] localhost kernel: CPU: 6 PID: 75092 Comm: kworker/u129:4 Kdump: loaded Tainted: G B W OE --------- --- 5.14.0-70.22.1.el9_0.x86_64+debug #1 localhost kernel: Hardware name: HPE ProLiant XL420 Gen10/ProLiant XL420 Gen10, BIOS U39 01/13/2022 localhost kernel: Workqueue: nvme-wq nvme_async_event_work [nvme_core] localhost kernel: Call Trace: localhost kernel: dump_stack_lvl+0x57/0x7d localhost kernel: check_preemption_disabled+0xc8/0xd0 localhost kernel: qla_nvme_post_cmd+0x216/0x1380 [qla2xxx] Use raw_smp_processor_id() instead of smp_processor_id(). Also use queue_work() across the driver instead of queue_work_on() thus avoiding usage of smp_processor_id() when CONFIG_DEBUG_PREEMPT is enabled. Cc: stable@vger.kernel.org Suggested-by: John Garry Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230831112146.32595-2-njavali@marvell.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_inline.h | 2 +- drivers/scsi/qla2xxx/qla_isr.c | 6 +++--- drivers/scsi/qla2xxx/qla_target.c | 3 +-- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 4 ++-- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index 0556969f6dc1..a4a56ab0ba74 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -577,7 +577,7 @@ fcport_is_bigger(fc_port_t *fcport) static inline struct qla_qpair * qla_mapq_nvme_select_qpair(struct qla_hw_data *ha, struct qla_qpair *qpair) { - int cpuid = smp_processor_id(); + int cpuid = raw_smp_processor_id(); if (qpair->cpuid != cpuid && ha->qp_cpu_map[cpuid]) { diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index e98788191897..d48007e18288 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -3965,7 +3965,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha, if (!ha->flags.fw_started) return; - if (rsp->qpair->cpuid != smp_processor_id() || !rsp->qpair->rcv_intr) { + if (rsp->qpair->cpuid != raw_smp_processor_id() || !rsp->qpair->rcv_intr) { rsp->qpair->rcv_intr = 1; if (!rsp->qpair->cpu_mapped) @@ -4468,7 +4468,7 @@ qla2xxx_msix_rsp_q(int irq, void *dev_id) } ha = qpair->hw; - queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work); + queue_work(ha->wq, &qpair->q_work); return IRQ_HANDLED; } @@ -4494,7 +4494,7 @@ qla2xxx_msix_rsp_q_hs(int irq, void *dev_id) wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_INT); spin_unlock_irqrestore(&ha->hardware_lock, flags); - queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work); + queue_work(ha->wq, &qpair->q_work); return IRQ_HANDLED; } diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 2b815a9928ea..2ef2dbac0db2 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -4425,8 +4425,7 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, queue_work_on(cmd->se_cmd.cpuid, qla_tgt_wq, &cmd->work); } else if (ha->msix_count) { if (cmd->atio.u.isp24.fcp_cmnd.rddata) - queue_work_on(smp_processor_id(), qla_tgt_wq, - &cmd->work); + queue_work(qla_tgt_wq, &cmd->work); else queue_work_on(cmd->se_cmd.cpuid, qla_tgt_wq, &cmd->work); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 3b5ba4b47b3b..68a0e6a2fb6e 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -310,7 +310,7 @@ static void tcm_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd) cmd->trc_flags |= TRC_CMD_DONE; INIT_WORK(&cmd->work, tcm_qla2xxx_complete_free); - queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work); + queue_work(tcm_qla2xxx_free_wq, &cmd->work); } /* @@ -547,7 +547,7 @@ static void tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd) cmd->trc_flags |= TRC_DATA_IN; cmd->cmd_in_wq = 1; INIT_WORK(&cmd->work, tcm_qla2xxx_handle_data_work); - queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work); + queue_work(tcm_qla2xxx_free_wq, &cmd->work); } static int tcm_qla2xxx_chk_dif_tags(uint32_t tag) From d0b0822e32dbae80bbcb3cc86f34d28539d913df Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Thu, 31 Aug 2023 22:09:29 +0800 Subject: [PATCH 014/333] scsi: qla2xxx: Fix NULL vs IS_ERR() bug for debugfs_create_dir() Since both debugfs_create_dir() and debugfs_create_file() return ERR_PTR and never NULL, use IS_ERR() instead of checking for NULL. Fixes: 1e98fb0f9208 ("scsi: qla2xxx: Setup debugfs entries for remote ports") Signed-off-by: Jinjie Ruan Link: https://lore.kernel.org/r/20230831140930.3166359-1-ruanjinjie@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_dfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c index f060e593685d..a7a364760b80 100644 --- a/drivers/scsi/qla2xxx/qla_dfs.c +++ b/drivers/scsi/qla2xxx/qla_dfs.c @@ -116,7 +116,7 @@ qla2x00_dfs_create_rport(scsi_qla_host_t *vha, struct fc_port *fp) sprintf(wwn, "pn-%016llx", wwn_to_u64(fp->port_name)); fp->dfs_rport_dir = debugfs_create_dir(wwn, vha->dfs_rport_root); - if (!fp->dfs_rport_dir) + if (IS_ERR(fp->dfs_rport_dir)) return; if (NVME_TARGET(vha->hw, fp)) debugfs_create_file("dev_loss_tmo", 0600, fp->dfs_rport_dir, @@ -708,14 +708,14 @@ create_nodes: if (IS_QLA27XX(ha) || IS_QLA83XX(ha) || IS_QLA28XX(ha)) { ha->tgt.dfs_naqp = debugfs_create_file("naqp", 0400, ha->dfs_dir, vha, &dfs_naqp_ops); - if (!ha->tgt.dfs_naqp) { + if (IS_ERR(ha->tgt.dfs_naqp)) { ql_log(ql_log_warn, vha, 0xd011, "Unable to create debugFS naqp node.\n"); goto out; } } vha->dfs_rport_root = debugfs_create_dir("rports", ha->dfs_dir); - if (!vha->dfs_rport_root) { + if (IS_ERR(vha->dfs_rport_root)) { ql_log(ql_log_warn, vha, 0xd012, "Unable to create debugFS rports node.\n"); goto out; From 5c584fe6098ae1727650acbabdef0669cefec7be Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Thu, 31 Aug 2023 14:36:38 +0000 Subject: [PATCH 015/333] scsi: target: Replace strlcpy() with strscpy() strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated [1]. In an effort to remove strlcpy() completely [2], replace strlcpy() here with strscpy(). Direct replacement is safe here since return value of -errno is used to check for truncation instead of sizeof(dest). [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [2] https://github.com/KSPP/linux/issues/89 Signed-off-by: Azeem Shaikh Link: https://lore.kernel.org/r/20230831143638.232596-1-azeemshaikh38@gmail.com Reviewed-by: Kees Cook Signed-off-by: Martin K. Petersen --- drivers/target/target_core_configfs.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 936e5ff1b209..d5860c1c1f46 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -1392,16 +1392,16 @@ static ssize_t target_wwn_vendor_id_store(struct config_item *item, /* +2 to allow for a trailing (stripped) '\n' and null-terminator */ unsigned char buf[INQUIRY_VENDOR_LEN + 2]; char *stripped = NULL; - size_t len; + ssize_t len; ssize_t ret; - len = strlcpy(buf, page, sizeof(buf)); - if (len < sizeof(buf)) { + len = strscpy(buf, page, sizeof(buf)); + if (len > 0) { /* Strip any newline added from userspace. */ stripped = strstrip(buf); len = strlen(stripped); } - if (len > INQUIRY_VENDOR_LEN) { + if (len < 0 || len > INQUIRY_VENDOR_LEN) { pr_err("Emulated T10 Vendor Identification exceeds" " INQUIRY_VENDOR_LEN: " __stringify(INQUIRY_VENDOR_LEN) "\n"); @@ -1448,16 +1448,16 @@ static ssize_t target_wwn_product_id_store(struct config_item *item, /* +2 to allow for a trailing (stripped) '\n' and null-terminator */ unsigned char buf[INQUIRY_MODEL_LEN + 2]; char *stripped = NULL; - size_t len; + ssize_t len; ssize_t ret; - len = strlcpy(buf, page, sizeof(buf)); - if (len < sizeof(buf)) { + len = strscpy(buf, page, sizeof(buf)); + if (len > 0) { /* Strip any newline added from userspace. */ stripped = strstrip(buf); len = strlen(stripped); } - if (len > INQUIRY_MODEL_LEN) { + if (len < 0 || len > INQUIRY_MODEL_LEN) { pr_err("Emulated T10 Vendor exceeds INQUIRY_MODEL_LEN: " __stringify(INQUIRY_MODEL_LEN) "\n"); @@ -1504,16 +1504,16 @@ static ssize_t target_wwn_revision_store(struct config_item *item, /* +2 to allow for a trailing (stripped) '\n' and null-terminator */ unsigned char buf[INQUIRY_REVISION_LEN + 2]; char *stripped = NULL; - size_t len; + ssize_t len; ssize_t ret; - len = strlcpy(buf, page, sizeof(buf)); - if (len < sizeof(buf)) { + len = strscpy(buf, page, sizeof(buf)); + if (len > 0) { /* Strip any newline added from userspace. */ stripped = strstrip(buf); len = strlen(stripped); } - if (len > INQUIRY_REVISION_LEN) { + if (len < 0 || len > INQUIRY_REVISION_LEN) { pr_err("Emulated T10 Revision exceeds INQUIRY_REVISION_LEN: " __stringify(INQUIRY_REVISION_LEN) "\n"); From 7df0b2605489bef3f4223ad66f1f9bb8d50d4cd2 Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Fri, 1 Sep 2023 11:36:46 +0530 Subject: [PATCH 016/333] scsi: qedf: Add synchronization between I/O completions and abort Avoid race condition between I/O completion and abort processing by protecting the cmd_type with the rport lock. Signed-off-by: Javed Hasan Signed-off-by: Saurav Kashyap Link: https://lore.kernel.org/r/20230901060646.27885-1-skashyap@marvell.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_io.c | 10 ++++++++-- drivers/scsi/qedf/qedf_main.c | 7 ++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 4750ec5789a8..10fe3383855c 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -1904,6 +1904,7 @@ int qedf_initiate_abts(struct qedf_ioreq *io_req, bool return_scsi_cmd_on_abts) goto drop_rdata_kref; } + spin_lock_irqsave(&fcport->rport_lock, flags); if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags) || test_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags) || test_bit(QEDF_CMD_IN_ABORT, &io_req->flags)) { @@ -1911,17 +1912,20 @@ int qedf_initiate_abts(struct qedf_ioreq *io_req, bool return_scsi_cmd_on_abts) "io_req xid=0x%x sc_cmd=%p already in cleanup or abort processing or already completed.\n", io_req->xid, io_req->sc_cmd); rc = 1; + spin_unlock_irqrestore(&fcport->rport_lock, flags); goto drop_rdata_kref; } + /* Set the command type to abort */ + io_req->cmd_type = QEDF_ABTS; + spin_unlock_irqrestore(&fcport->rport_lock, flags); + kref_get(&io_req->refcount); xid = io_req->xid; qedf->control_requests++; qedf->packet_aborts++; - /* Set the command type to abort */ - io_req->cmd_type = QEDF_ABTS; io_req->return_scsi_cmd_on_abts = return_scsi_cmd_on_abts; set_bit(QEDF_CMD_IN_ABORT, &io_req->flags); @@ -2210,7 +2214,9 @@ process_els: refcount, fcport, fcport->rdata->ids.port_id); /* Cleanup cmds re-use the same TID as the original I/O */ + spin_lock_irqsave(&fcport->rport_lock, flags); io_req->cmd_type = QEDF_CLEANUP; + spin_unlock_irqrestore(&fcport->rport_lock, flags); io_req->return_scsi_cmd_on_abts = return_scsi_cmd_on_abts; init_completion(&io_req->cleanup_done); diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 7825765c936c..91f3f1d7098e 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -2805,6 +2805,8 @@ void qedf_process_cqe(struct qedf_ctx *qedf, struct fcoe_cqe *cqe) struct qedf_ioreq *io_req; struct qedf_rport *fcport; u32 comp_type; + u8 io_comp_type; + unsigned long flags; comp_type = (cqe->cqe_data >> FCOE_CQE_CQE_TYPE_SHIFT) & FCOE_CQE_CQE_TYPE_MASK; @@ -2838,11 +2840,14 @@ void qedf_process_cqe(struct qedf_ctx *qedf, struct fcoe_cqe *cqe) return; } + spin_lock_irqsave(&fcport->rport_lock, flags); + io_comp_type = io_req->cmd_type; + spin_unlock_irqrestore(&fcport->rport_lock, flags); switch (comp_type) { case FCOE_GOOD_COMPLETION_CQE_TYPE: atomic_inc(&fcport->free_sqes); - switch (io_req->cmd_type) { + switch (io_comp_type) { case QEDF_SCSI_CMD: qedf_scsi_completion(qedf, cqe, io_req); break; From 2d3f59cf868b4a2dd678a96cd49bdd91411bd59f Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Mon, 4 Sep 2023 10:30:44 +0900 Subject: [PATCH 017/333] scsi: ufs: core: Move __ufshcd_send_uic_cmd() outside host_lock __ufshcd_send_uic_cmd() is wrapped by uic_cmd_mutex and its related contexts are accessed within the section wrapped by uic_cmd_mutex. Thus, wrapping with host_lock is redundant. Signed-off-by: Kiwoong Kim Link: https://lore.kernel.org/r/782ba5f26f0a96e58d85dff50751787d2d2a6b2b.1693790060.git.kwmad.kim@samsung.com Reviewed-by: Bart Van Assche Reviewed-by: Chanwoo Lee Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 93417518c04d..f5e66d775b10 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2392,7 +2392,6 @@ __ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd, bool completion) { lockdep_assert_held(&hba->uic_cmd_mutex); - lockdep_assert_held(hba->host->host_lock); if (!ufshcd_ready_for_uic_cmd(hba)) { dev_err(hba->dev, @@ -2419,7 +2418,6 @@ __ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd, int ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) { int ret; - unsigned long flags; if (hba->quirks & UFSHCD_QUIRK_BROKEN_UIC_CMD) return 0; @@ -2428,9 +2426,7 @@ int ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) mutex_lock(&hba->uic_cmd_mutex); ufshcd_add_delay_before_dme_cmd(hba); - spin_lock_irqsave(hba->host->host_lock, flags); ret = __ufshcd_send_uic_cmd(hba, uic_cmd, true); - spin_unlock_irqrestore(hba->host->host_lock, flags); if (!ret) ret = ufshcd_wait_for_uic_cmd(hba, uic_cmd); @@ -4133,8 +4129,8 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) wmb(); reenable_intr = true; } - ret = __ufshcd_send_uic_cmd(hba, cmd, false); spin_unlock_irqrestore(hba->host->host_lock, flags); + ret = __ufshcd_send_uic_cmd(hba, cmd, false); if (ret) { dev_err(hba->dev, "pwr ctrl cmd 0x%x with mode 0x%x uic error %d\n", From d32533d30e2119b0c0aa17596734f1f842f750df Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Mon, 4 Sep 2023 10:30:45 +0900 Subject: [PATCH 018/333] scsi: ufs: core: Poll HCS.UCRDY before issuing a UIC command With auto hibern8 enabled, UIC could be busy processing a hibern8 operation and the HCI would reports UIC not ready for a short while through HCS.UCRDY. The UFS driver doesn't currently handle this situation. The UFSHCI spec specifies UCRDY like this: whether the host controller is ready to process UIC COMMAND The 'ready' could be seen as many different meanings. If the meaning includes not processing any request from HCI, processing a hibern8 operation can be 'not ready'. In this situation, the driver needs to wait until the operations is completed. Signed-off-by: Kiwoong Kim Link: https://lore.kernel.org/r/550484ffb66300bdcec63d3e304dfd55cb432f1f.1693790060.git.kwmad.kim@samsung.com Reviewed-by: Adrian Hunter Reviewed-by: Chanwoo Lee Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index f5e66d775b10..c2df07545f96 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -2299,7 +2300,11 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba) */ static inline bool ufshcd_ready_for_uic_cmd(struct ufs_hba *hba) { - return ufshcd_readl(hba, REG_CONTROLLER_STATUS) & UIC_COMMAND_READY; + u32 val; + int ret = read_poll_timeout(ufshcd_readl, val, val & UIC_COMMAND_READY, + 500, UIC_CMD_TIMEOUT * 1000, false, hba, + REG_CONTROLLER_STATUS); + return ret == 0 ? true : false; } /** From d20b484c674d2eae816978a98fa38b4054aeca3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 6 Sep 2023 11:50:39 +0200 Subject: [PATCH 019/333] drm/drm_exec: Work around a WW mutex lockdep oddity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If *any* object of a certain WW mutex class is locked, lockdep will consider *all* mutexes of that class as locked. Also the lock allocation tracking code will apparently register only the address of the first mutex of a given class locked in a sequence. This has the odd consequence that if that first mutex is unlocked while other mutexes of the same class remain locked and then its memory then freed, the lock alloc tracking code will incorrectly assume that memory is freed with a held lock in there. For now, work around that for drm_exec by releasing the first grabbed object lock last. v2: - Fix a typo (Danilo Krummrich) - Reword the commit message a bit. - Add a Fixes: tag Related lock alloc tracking warning: [ 322.660067] ========================= [ 322.660070] WARNING: held lock freed! [ 322.660074] 6.5.0-rc7+ #155 Tainted: G U N [ 322.660078] ------------------------- [ 322.660081] kunit_try_catch/4981 is freeing memory ffff888112adc000-ffff888112adc3ff, with a lock still held there! [ 322.660089] ffff888112adc1a0 (reservation_ww_class_mutex){+.+.}-{3:3}, at: drm_exec_lock_obj+0x11a/0x600 [drm_exec] [ 322.660104] 2 locks held by kunit_try_catch/4981: [ 322.660108] #0: ffffc9000343fe18 (reservation_ww_class_acquire){+.+.}-{0:0}, at: test_early_put+0x22f/0x490 [drm_exec_test] [ 322.660123] #1: ffff888112adc1a0 (reservation_ww_class_mutex){+.+.}-{3:3}, at: drm_exec_lock_obj+0x11a/0x600 [drm_exec] [ 322.660135] stack backtrace: [ 322.660139] CPU: 7 PID: 4981 Comm: kunit_try_catch Tainted: G U N 6.5.0-rc7+ #155 [ 322.660146] Hardware name: ASUS System Product Name/PRIME B560M-A AC, BIOS 0403 01/26/2021 [ 322.660152] Call Trace: [ 322.660155] [ 322.660158] dump_stack_lvl+0x57/0x90 [ 322.660164] debug_check_no_locks_freed+0x20b/0x2b0 [ 322.660172] slab_free_freelist_hook+0xa1/0x160 [ 322.660179] ? drm_exec_unlock_all+0x168/0x2a0 [drm_exec] [ 322.660186] __kmem_cache_free+0xb2/0x290 [ 322.660192] drm_exec_unlock_all+0x168/0x2a0 [drm_exec] [ 322.660200] drm_exec_fini+0xf/0x1c0 [drm_exec] [ 322.660206] test_early_put+0x289/0x490 [drm_exec_test] [ 322.660215] ? __pfx_test_early_put+0x10/0x10 [drm_exec_test] [ 322.660222] ? __kasan_check_byte+0xf/0x40 [ 322.660227] ? __ksize+0x63/0x140 [ 322.660233] ? drmm_add_final_kfree+0x3e/0xa0 [drm] [ 322.660289] ? _raw_spin_unlock_irqrestore+0x30/0x60 [ 322.660294] ? lockdep_hardirqs_on+0x7d/0x100 [ 322.660301] ? __pfx_kunit_try_run_case+0x10/0x10 [kunit] [ 322.660310] ? __pfx_kunit_generic_run_threadfn_adapter+0x10/0x10 [kunit] [ 322.660319] kunit_generic_run_threadfn_adapter+0x4a/0x90 [kunit] [ 322.660328] kthread+0x2e7/0x3c0 [ 322.660334] ? __pfx_kthread+0x10/0x10 [ 322.660339] ret_from_fork+0x2d/0x70 [ 322.660345] ? __pfx_kthread+0x10/0x10 [ 322.660349] ret_from_fork_asm+0x1b/0x30 [ 322.660358] [ 322.660818] ok 8 test_early_put Cc: Christian König Cc: Boris Brezillon Cc: Danilo Krummrich Cc: dri-devel@lists.freedesktop.org Fixes: 09593216bff1 ("drm: execution context for GEM buffers v7") Signed-off-by: Thomas Hellström Reviewed-by: Boris Brezillon Reviewed-by: Danilo Krummrich Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230906095039.3320-4-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/drm_exec.c | 2 +- include/drm/drm_exec.h | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c index ff69cf0fb42a..5d2809de4517 100644 --- a/drivers/gpu/drm/drm_exec.c +++ b/drivers/gpu/drm/drm_exec.c @@ -56,7 +56,7 @@ static void drm_exec_unlock_all(struct drm_exec *exec) struct drm_gem_object *obj; unsigned long index; - drm_exec_for_each_locked_object(exec, index, obj) { + drm_exec_for_each_locked_object_reverse(exec, index, obj) { dma_resv_unlock(obj->resv); drm_gem_object_put(obj); } diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h index e0462361adf9..b5bf0b6da791 100644 --- a/include/drm/drm_exec.h +++ b/include/drm/drm_exec.h @@ -51,6 +51,20 @@ struct drm_exec { struct drm_gem_object *prelocked; }; +/** + * drm_exec_obj() - Return the object for a give drm_exec index + * @exec: Pointer to the drm_exec context + * @index: The index. + * + * Return: Pointer to the locked object corresponding to @index if + * index is within the number of locked objects. NULL otherwise. + */ +static inline struct drm_gem_object * +drm_exec_obj(struct drm_exec *exec, unsigned long index) +{ + return index < exec->num_objects ? exec->objects[index] : NULL; +} + /** * drm_exec_for_each_locked_object - iterate over all the locked objects * @exec: drm_exec object @@ -59,10 +73,23 @@ struct drm_exec { * * Iterate over all the locked GEM objects inside the drm_exec object. */ -#define drm_exec_for_each_locked_object(exec, index, obj) \ - for (index = 0, obj = (exec)->objects[0]; \ - index < (exec)->num_objects; \ - ++index, obj = (exec)->objects[index]) +#define drm_exec_for_each_locked_object(exec, index, obj) \ + for ((index) = 0; ((obj) = drm_exec_obj(exec, index)); ++(index)) + +/** + * drm_exec_for_each_locked_object_reverse - iterate over all the locked + * objects in reverse locking order + * @exec: drm_exec object + * @index: unsigned long index for the iteration + * @obj: the current GEM object + * + * Iterate over all the locked GEM objects inside the drm_exec object in + * reverse locking order. Note that @index may go below zero and wrap, + * but that will be caught by drm_exec_obj(), returning a NULL object. + */ +#define drm_exec_for_each_locked_object_reverse(exec, index, obj) \ + for ((index) = (exec)->num_objects - 1; \ + ((obj) = drm_exec_obj(exec, index)); --(index)) /** * drm_exec_until_all_locked - loop until all GEM objects are locked From dcbad727513d277144aee482b2ffbcd2255c37aa Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 6 Sep 2023 15:55:17 -0400 Subject: [PATCH 020/333] drm/radeon: make fence wait in suballocator uninterrruptable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 254986e324ad ("drm/radeon: Use the drm suballocation manager implementation.") made the fence wait in amdgpu_sa_bo_new() interruptible but there is no code to handle an interrupt. This caused the kernel to randomly explode in high-VRAM-pressure situations so make it uninterruptible again. Fixes: 254986e324ad ("drm/radeon: Use the drm suballocation manager implementation.") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2769 Signed-off-by: Alex Deucher CC: stable@vger.kernel.org # 6.4+ CC: Simon Pilkington Link: https://patchwork.freedesktop.org/patch/msgid/20230906195517.1345717-1-alexander.deucher@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/radeon/radeon_sa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index c87a57c9c592..22dd8b445685 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -123,7 +123,7 @@ int radeon_sa_bo_new(struct radeon_sa_manager *sa_manager, unsigned int size, unsigned int align) { struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size, - GFP_KERNEL, true, align); + GFP_KERNEL, false, align); if (IS_ERR(sa)) { *sa_bo = NULL; From f94cf2206b066bd6d761d3347fd35f77b828c376 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 7 Sep 2023 09:40:07 -0400 Subject: [PATCH 021/333] buffer: Make bh_offset() work for compound pages If the buffer pointed to by the buffer_head is part of a compound page, bh_offset() assumes that b_page is the precise page that contains the data. A recent change to jbd2 inadvertently violated that assumption. By using page_size(), we support both b_page being set to the head page (as page_size() will return the size of the entire folio) and the precise page (as it will return PAGE_SIZE for a tail page). Fixes: 8147c4c4546f ("jbd2: use a folio in jbd2_journal_write_metadata_buffer()") Reported-by: Zorro Lang Tested-by: Ritesh Harjani (IBM) Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/buffer_head.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6cb3e9af78c9..4ba242073adc 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -173,7 +173,10 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh) return test_bit_acquire(BH_Uptodate, &bh->b_state); } -#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) +static inline unsigned long bh_offset(const struct buffer_head *bh) +{ + return (unsigned long)(bh)->b_data & (page_size(bh->b_page) - 1); +} /* If we *know* page->private refers to buffer_heads */ #define page_buffers(page) \ From 147d4a092e9a726ce706dbf0d329d2b96a025459 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Thu, 7 Sep 2023 09:47:32 -0400 Subject: [PATCH 022/333] jbd2: Remove page size assumptions jbd2_alloc() allocates a buffer from slab when the block size is smaller than PAGE_SIZE, and slab may be using a compound page. Before commit 8147c4c4546f, we set b_page to the precise page containing the buffer and this code worked well. Now we set b_page to the head page of the allocation, so we can no longer use offset_in_page(). While we could do a 1:1 replacement with offset_in_folio(), use the more idiomatic bh_offset() and the folio APIs to map the buffer. This isn't enough to support a b_size larger than PAGE_SIZE on HIGHMEM machines, but this is good enough to fix the actual bug we're seeing. Fixes: 8147c4c4546f ("jbd2: use a folio in jbd2_journal_write_metadata_buffer()") Reported-by: Zorro Lang Signed-off-by: Ritesh Harjani (IBM) [converted to be more folio] Signed-off-by: Matthew Wilcox (Oracle) --- fs/jbd2/commit.c | 16 ++++++---------- fs/jbd2/transaction.c | 12 ++++-------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 1073259902a6..8d6f934c3d95 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -298,14 +298,12 @@ static int journal_finish_inode_data_buffers(journal_t *journal, static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) { - struct page *page = bh->b_page; char *addr; __u32 checksum; - addr = kmap_atomic(page); - checksum = crc32_be(crc32_sum, - (void *)(addr + offset_in_page(bh->b_data)), bh->b_size); - kunmap_atomic(addr); + addr = kmap_local_folio(bh->b_folio, bh_offset(bh)); + checksum = crc32_be(crc32_sum, addr, bh->b_size); + kunmap_local(addr); return checksum; } @@ -322,7 +320,6 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, struct buffer_head *bh, __u32 sequence) { journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; - struct page *page = bh->b_page; __u8 *addr; __u32 csum32; __be32 seq; @@ -331,11 +328,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, return; seq = cpu_to_be32(sequence); - addr = kmap_atomic(page); + addr = kmap_local_folio(bh->b_folio, bh_offset(bh)); csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); - csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data), - bh->b_size); - kunmap_atomic(addr); + csum32 = jbd2_chksum(j, csum32, addr, bh->b_size); + kunmap_local(addr); if (jbd2_has_feature_csum3(j)) tag3->t_checksum = cpu_to_be32(csum32); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 4d1fda1f7143..5f08b5fd105a 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -935,19 +935,15 @@ static void warn_dirty_buffer(struct buffer_head *bh) /* Call t_frozen trigger and copy buffer data into jh->b_frozen_data. */ static void jbd2_freeze_jh_data(struct journal_head *jh) { - struct page *page; - int offset; char *source; struct buffer_head *bh = jh2bh(jh); J_EXPECT_JH(jh, buffer_uptodate(bh), "Possible IO failure.\n"); - page = bh->b_page; - offset = offset_in_page(bh->b_data); - source = kmap_atomic(page); + source = kmap_local_folio(bh->b_folio, bh_offset(bh)); /* Fire data frozen trigger just before we copy the data */ - jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers); - memcpy(jh->b_frozen_data, source + offset, bh->b_size); - kunmap_atomic(source); + jbd2_buffer_frozen_trigger(jh, source, jh->b_triggers); + memcpy(jh->b_frozen_data, source, bh->b_size); + kunmap_local(source); /* * Now that the frozen data is saved off, we need to store any matching From f15f29fd4779be8a418b66e9d52979bb6d6c2325 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 Sep 2023 08:22:33 +0200 Subject: [PATCH 023/333] netfilter: nf_tables: disallow rule removal from chain binding Chain binding only requires the rule addition/insertion command within the same transaction. Removal of rules from chain bindings within the same transaction makes no sense, userspace does not utilize this feature. Replace nft_chain_is_bound() check to nft_chain_binding() in rule deletion commands. Replace command implies a rule deletion, reject this command too. Rule flush command can also safely rely on this nft_chain_binding() check because unbound chains are not allowed since 62e1e94b246e ("netfilter: nf_tables: reject unbound chain set before commit phase"). Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Reported-by: Kevin Rich Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e429ebba74b3..895c6e4fba97 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1432,7 +1432,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, chain)) continue; - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) continue; ctx->chain = chain; @@ -1477,7 +1477,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, chain)) continue; - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) continue; ctx->chain = chain; @@ -2910,6 +2910,9 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, return PTR_ERR(chain); } + if (nft_chain_binding(chain)) + return -EOPNOTSUPP; + nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); if (nla[NFTA_CHAIN_HOOK]) { @@ -3971,6 +3974,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, } if (info->nlh->nlmsg_flags & NLM_F_REPLACE) { + if (nft_chain_binding(chain)) { + err = -EOPNOTSUPP; + goto err_destroy_flow_rule; + } + err = nft_delrule(&ctx, old_rule); if (err < 0) goto err_destroy_flow_rule; @@ -4078,7 +4086,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); } - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) return -EOPNOTSUPP; } @@ -4112,7 +4120,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) continue; - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) continue; ctx.chain = chain; @@ -11054,7 +11062,7 @@ static void __nft_release_table(struct net *net, struct nft_table *table) ctx.family = table->family; ctx.table = table; list_for_each_entry(chain, &table->chains, list) { - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) continue; ctx.chain = chain; From 96b33300fba880ec0eafcf3d82486f3463b4b6da Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 5 Sep 2023 12:52:24 +0200 Subject: [PATCH 024/333] netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention rbtree GC does not modify the datastructure, instead it collects expired elements and it enqueues a GC transaction. Use a read spinlock instead to avoid data contention while GC worker is running. Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index f250b5399344..70491ba98dec 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -622,8 +622,7 @@ static void nft_rbtree_gc(struct work_struct *work) if (!gc) goto done; - write_lock_bh(&priv->lock); - write_seqcount_begin(&priv->count); + read_lock_bh(&priv->lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { /* Ruleset has been updated, try later. */ @@ -673,8 +672,7 @@ dead_elem: gc = nft_trans_gc_catchall(gc, gc_seq); try_later: - write_seqcount_end(&priv->count); - write_unlock_bh(&priv->lock); + read_unlock_bh(&priv->lock); if (gc) nft_trans_gc_queue_async_done(gc); From 4a9e12ea7e70223555ec010bec9f711089ce96f6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 6 Sep 2023 15:07:53 +0200 Subject: [PATCH 025/333] netfilter: nft_set_pipapo: call nft_trans_gc_queue_sync() in catchall GC pipapo needs to enqueue GC transactions for catchall elements through nft_trans_gc_queue_sync(). Add nft_trans_gc_catchall_sync() and nft_trans_gc_catchall_async() to handle GC transaction queueing accordingly. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +++-- net/netfilter/nf_tables_api.c | 22 +++++++++++++++++++--- net/netfilter/nft_set_hash.c | 2 +- net/netfilter/nft_set_pipapo.c | 2 +- net/netfilter/nft_set_rbtree.c | 2 +- 5 files changed, 25 insertions(+), 8 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index dd40c75011d2..a4455f4995ab 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1700,8 +1700,9 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans); void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv); -struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, - unsigned int gc_seq); +struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq); +struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc); void nft_setelem_data_deactivate(const struct net *net, const struct nft_set *set, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 895c6e4fba97..7b59311931fb 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9613,8 +9613,9 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) call_rcu(&trans->rcu, nft_trans_gc_trans_free); } -struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, - unsigned int gc_seq) +static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, + unsigned int gc_seq, + bool sync) { struct nft_set_elem_catchall *catchall; const struct nft_set *set = gc->set; @@ -9630,7 +9631,11 @@ struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, nft_set_elem_dead(ext); dead_elem: - gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (sync) + gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); + else + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) return NULL; @@ -9640,6 +9645,17 @@ dead_elem: return gc; } +struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq) +{ + return nft_trans_gc_catchall(gc, gc_seq, false); +} + +struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc) +{ + return nft_trans_gc_catchall(gc, 0, true); +} + static void nf_tables_module_autoload_cleanup(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 524763659f25..eca20dc60138 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -372,7 +372,7 @@ dead_elem: nft_trans_gc_elem_add(gc, he); } - gc = nft_trans_gc_catchall(gc, gc_seq); + gc = nft_trans_gc_catchall_async(gc, gc_seq); try_later: /* catchall list iteration requires rcu read side lock. */ diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 6af9c9ed4b5c..10b89ac74476 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1610,7 +1610,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) } } - gc = nft_trans_gc_catchall(gc, 0); + gc = nft_trans_gc_catchall_sync(gc); if (gc) { nft_trans_gc_queue_sync_done(gc); priv->last_gc = jiffies; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 70491ba98dec..487572dcd614 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -669,7 +669,7 @@ dead_elem: nft_trans_gc_elem_add(gc, rbe); } - gc = nft_trans_gc_catchall(gc, gc_seq); + gc = nft_trans_gc_catchall_async(gc, gc_seq); try_later: read_unlock_bh(&priv->lock); From 6d365eabce3c018a80f6e0379b17df2abb17405e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 6 Sep 2023 17:22:58 +0200 Subject: [PATCH 026/333] netfilter: nft_set_pipapo: stop GC iteration if GC transaction allocation fails nft_trans_gc_queue_sync() enqueues the GC transaction and it allocates a new one. If this allocation fails, then stop this GC sync run and retry later. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 10b89ac74476..c0dcc40de358 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1596,7 +1596,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); if (!gc) - break; + return; nft_pipapo_gc_deactivate(net, set, e); pipapo_drop(m, rulemap); From b079155faae94e9b3ab9337e82100a914ebb4e8d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Sep 2023 01:39:43 +0200 Subject: [PATCH 027/333] netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration Skip GC run if iterator rewinds to the beginning with EAGAIN, otherwise GC might collect the same element more than once. Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_hash.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index eca20dc60138..2013de934cef 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -338,12 +338,9 @@ static void nft_rhash_gc(struct work_struct *work) while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { - if (PTR_ERR(he) != -EAGAIN) { - nft_trans_gc_destroy(gc); - gc = NULL; - goto try_later; - } - continue; + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; } /* Ruleset has been updated, try later. */ From 7deac114be5fb25a4e865212ed0feaf5f85f2a28 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 25 Aug 2023 10:55:31 +0800 Subject: [PATCH 028/333] md: don't dereference mddev after export_rdev() Except for initial reference, mddev->kobject is referenced by rdev->kobject, and if the last rdev is freed, there is no guarantee that mddev is still valid. Hence mddev should not be used anymore after export_rdev(). This problem can be triggered by following test for mdadm at very low rate: New file: mdadm/tests/23rdev-lifetime devname=${dev0##*/} devt=`cat /sys/block/$devname/dev` pid="" runtime=2 clean_up_test() { pill -9 $pid echo clear > /sys/block/md0/md/array_state } trap 'clean_up_test' EXIT add_by_sysfs() { while true; do echo $devt > /sys/block/md0/md/new_dev done } remove_by_sysfs(){ while true; do echo remove > /sys/block/md0/md/dev-${devname}/state done } echo md0 > /sys/module/md_mod/parameters/new_array || die "create md0 failed" add_by_sysfs & pid="$pid $!" remove_by_sysfs & pid="$pid $!" sleep $runtime exit 0 Test cmd: ./test --save-logs --logdir=/tmp/ --keep-going --dev=loop --tests=23rdev-lifetime Test result: general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6bcb: 0000 [#4] PREEMPT SMP CPU: 0 PID: 1292 Comm: test Tainted: G D W 6.5.0-rc2-00121-g01e55c376936 #562 RIP: 0010:md_wakeup_thread+0x9e/0x320 [md_mod] Call Trace: mddev_unlock+0x1b6/0x310 [md_mod] rdev_attr_store+0xec/0x190 [md_mod] sysfs_kf_write+0x52/0x70 kernfs_fop_write_iter+0x19a/0x2a0 vfs_write+0x3b5/0x770 ksys_write+0x74/0x150 __x64_sys_write+0x22/0x30 do_syscall_64+0x40/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fix this problem by don't dereference mddev after export_rdev(). Fixes: 3ce94ce5d05a ("md: fix duplicate filename for rdev") Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230825025532.1523008-2-yukuai1@huaweicloud.com --- drivers/md/md.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 0fe7ab6e8ab9..590aee057aca 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -798,14 +798,14 @@ void mddev_unlock(struct mddev *mddev) } else mutex_unlock(&mddev->reconfig_mutex); + md_wakeup_thread(mddev->thread); + wake_up(&mddev->sb_wait); + list_for_each_entry_safe(rdev, tmp, &delete, same_set) { list_del_init(&rdev->same_set); kobject_del(&rdev->kobj); export_rdev(rdev, mddev); } - - md_wakeup_thread(mddev->thread); - wake_up(&mddev->sb_wait); } EXPORT_SYMBOL_GPL(mddev_unlock); From 99892147f028d711f9d40fefad4f33632593864c Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 25 Aug 2023 10:55:32 +0800 Subject: [PATCH 029/333] md: fix warning for holder mismatch from export_rdev() Commit a1d767191096 ("md: use mddev->external to select holder in export_rdev()") fix the problem that 'claim_rdev' is used for blkdev_get_by_dev() while 'rdev' is used for blkdev_put(). However, if mddev->external is changed from 0 to 1, then 'rdev' is used for blkdev_get_by_dev() while 'claim_rdev' is used for blkdev_put(). And this problem can be reporduced reliably by following: New file: mdadm/tests/23rdev-lifetime devname=${dev0##*/} devt=`cat /sys/block/$devname/dev` pid="" runtime=2 clean_up_test() { pill -9 $pid echo clear > /sys/block/md0/md/array_state } trap 'clean_up_test' EXIT add_by_sysfs() { while true; do echo $devt > /sys/block/md0/md/new_dev done } remove_by_sysfs(){ while true; do echo remove > /sys/block/md0/md/dev-${devname}/state done } echo md0 > /sys/module/md_mod/parameters/new_array || die "create md0 failed" add_by_sysfs & pid="$pid $!" remove_by_sysfs & pid="$pid $!" sleep $runtime exit 0 Test cmd: ./test --save-logs --logdir=/tmp/ --keep-going --dev=loop --tests=23rdev-lifetime Test result: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 960 at block/bdev.c:618 blkdev_put+0x27c/0x330 Modules linked in: multipath md_mod loop CPU: 0 PID: 960 Comm: test Not tainted 6.5.0-rc2-00121-g01e55c376936-dirty #50 RIP: 0010:blkdev_put+0x27c/0x330 Call Trace: export_rdev.isra.23+0x50/0xa0 [md_mod] mddev_unlock+0x19d/0x300 [md_mod] rdev_attr_store+0xec/0x190 [md_mod] sysfs_kf_write+0x52/0x70 kernfs_fop_write_iter+0x19a/0x2a0 vfs_write+0x3b5/0x770 ksys_write+0x74/0x150 __x64_sys_write+0x22/0x30 do_syscall_64+0x40/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fix the problem by recording if 'rdev' is used as holder. Fixes: a1d767191096 ("md: use mddev->external to select holder in export_rdev()") Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230825025532.1523008-3-yukuai1@huaweicloud.com --- drivers/md/md.c | 15 ++++++++++++--- drivers/md/md.h | 3 +++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 590aee057aca..73758b754127 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2452,7 +2452,8 @@ static void export_rdev(struct md_rdev *rdev, struct mddev *mddev) if (test_bit(AutoDetected, &rdev->flags)) md_autodetect_dev(rdev->bdev->bd_dev); #endif - blkdev_put(rdev->bdev, mddev->external ? &claim_rdev : rdev); + blkdev_put(rdev->bdev, + test_bit(Holder, &rdev->flags) ? rdev : &claim_rdev); rdev->bdev = NULL; kobject_put(&rdev->kobj); } @@ -3632,6 +3633,7 @@ EXPORT_SYMBOL_GPL(md_rdev_init); static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor) { struct md_rdev *rdev; + struct md_rdev *holder; sector_t size; int err; @@ -3646,8 +3648,15 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe if (err) goto out_clear_rdev; + if (super_format == -2) { + holder = &claim_rdev; + } else { + holder = rdev; + set_bit(Holder, &rdev->flags); + } + rdev->bdev = blkdev_get_by_dev(newdev, BLK_OPEN_READ | BLK_OPEN_WRITE, - super_format == -2 ? &claim_rdev : rdev, NULL); + holder, NULL); if (IS_ERR(rdev->bdev)) { pr_warn("md: could not open device unknown-block(%u,%u).\n", MAJOR(newdev), MINOR(newdev)); @@ -3684,7 +3693,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe return rdev; out_blkdev_put: - blkdev_put(rdev->bdev, super_format == -2 ? &claim_rdev : rdev); + blkdev_put(rdev->bdev, holder); out_clear_rdev: md_rdev_clear(rdev); out_free_rdev: diff --git a/drivers/md/md.h b/drivers/md/md.h index 9bcb77bca963..7c9c13abd7ca 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -211,6 +211,9 @@ enum flag_bits { * check if there is collision between raid1 * serial bios. */ + Holder, /* rdev is used as holder while opening + * underlying disk exclusively. + */ }; static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, From 41bc46c12a8053a1b3279a379bd6b5e87b045b85 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 7 Sep 2023 22:06:51 +0200 Subject: [PATCH 030/333] bpf: Add override check to kprobe multi link attach Currently the multi_kprobe link attach does not check error injection list for programs with bpf_override_return helper and allows them to attach anywhere. Adding the missing check. Fixes: 0dcac2725406 ("bpf: Add multi kprobe link") Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Reviewed-by: Alan Maguire Cc: stable@vger.kernel.org Link: https://lore.kernel.org/bpf/20230907200652.926951-1-jolsa@kernel.org --- kernel/trace/bpf_trace.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index a7264b2c17ad..c1c1af63ced2 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2853,6 +2853,17 @@ static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u3 return arr.mods_cnt; } +static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt) +{ + u32 i; + + for (i = 0; i < cnt; i++) { + if (!within_error_injection_list(addrs[i])) + return -EINVAL; + } + return 0; +} + int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_kprobe_multi_link *link = NULL; @@ -2930,6 +2941,11 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr goto error; } + if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) { + err = -EINVAL; + goto error; + } + link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) { err = -ENOMEM; From 7182e56411b9a8b76797ed7b6095fc84be76dfb0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 7 Sep 2023 22:06:52 +0200 Subject: [PATCH 031/333] selftests/bpf: Add kprobe_multi override test Adding test that tries to attach program with bpf_override_return helper to function not within error injection list. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230907200652.926951-2-jolsa@kernel.org --- .../bpf/prog_tests/kprobe_multi_test.c | 37 +++++++++++++++++++ .../bpf/progs/kprobe_multi_override.c | 13 +++++++ 2 files changed, 50 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/kprobe_multi_override.c diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 179fe300534f..e05477b210a5 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -3,6 +3,7 @@ #include "kprobe_multi.skel.h" #include "trace_helpers.h" #include "kprobe_multi_empty.skel.h" +#include "kprobe_multi_override.skel.h" #include "bpf/libbpf_internal.h" #include "bpf/hashmap.h" @@ -453,6 +454,40 @@ cleanup: } } +void test_attach_override(void) +{ + struct kprobe_multi_override *skel = NULL; + struct bpf_link *link = NULL; + + skel = kprobe_multi_override__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load")) + goto cleanup; + + /* The test_override calls bpf_override_return so it should fail + * to attach to bpf_fentry_test1 function, which is not on error + * injection list. + */ + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override, + "bpf_fentry_test1", NULL); + if (!ASSERT_ERR_PTR(link, "override_attached_bpf_fentry_test1")) { + bpf_link__destroy(link); + goto cleanup; + } + + /* The should_fail_bio function is on error injection list, + * attach should succeed. + */ + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override, + "should_fail_bio", NULL); + if (!ASSERT_OK_PTR(link, "override_attached_should_fail_bio")) + goto cleanup; + + bpf_link__destroy(link); + +cleanup: + kprobe_multi_override__destroy(skel); +} + void serial_test_kprobe_multi_bench_attach(void) { if (test__start_subtest("kernel")) @@ -480,4 +515,6 @@ void test_kprobe_multi_test(void) test_attach_api_syms(); if (test__start_subtest("attach_api_fails")) test_attach_api_fails(); + if (test__start_subtest("attach_override")) + test_attach_override(); } diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_override.c b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c new file mode 100644 index 000000000000..28f8487c9059 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +SEC("kprobe.multi") +int test_override(struct pt_regs *ctx) +{ + bpf_override_return(ctx, 123); + return 0; +} From fdd2630a7398191e84822612e589062063bd4f3d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 9 Sep 2023 07:12:30 -0400 Subject: [PATCH 032/333] nfsd: fix change_info in NFSv4 RENAME replies nfsd sends the transposed directory change info in the RENAME reply. The source directory is in save_fh and the target is in current_fh. Reported-by: Zhi Li Reported-by: Benjamin Coddington Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2218844 Signed-off-by: Jeff Layton Cc: Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5ca748309c26..4199ede0583c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1058,8 +1058,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, rename->rn_tname, rename->rn_tnamelen); if (status) return status; - set_change_info(&rename->rn_sinfo, &cstate->current_fh); - set_change_info(&rename->rn_tinfo, &cstate->save_fh); + set_change_info(&rename->rn_sinfo, &cstate->save_fh); + set_change_info(&rename->rn_tinfo, &cstate->current_fh); return nfs_ok; } From f101583fa9f8c3f372d4feb61d67da0ccbf4d9a5 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Thu, 7 Sep 2023 20:32:24 +0530 Subject: [PATCH 033/333] ASoC: soc-utils: Export snd_soc_dai_is_dummy() symbol Export symbol snd_soc_dai_is_dummy() for usage outside core driver modules. This is required by Tegra ASoC machine driver. Signed-off-by: Sameer Pujar Link: https://lore.kernel.org/r/1694098945-32760-2-git-send-email-spujar@nvidia.com Signed-off-by: Mark Brown --- sound/soc/soc-utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c index 11607c5f5d5a..9c746e4edef7 100644 --- a/sound/soc/soc-utils.c +++ b/sound/soc/soc-utils.c @@ -217,6 +217,7 @@ int snd_soc_dai_is_dummy(struct snd_soc_dai *dai) return 1; return 0; } +EXPORT_SYMBOL_GPL(snd_soc_dai_is_dummy); int snd_soc_component_is_dummy(struct snd_soc_component *component) { From e765886249c533e1bb5cbc3cd741bad677417312 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Thu, 7 Sep 2023 20:32:25 +0530 Subject: [PATCH 034/333] ASoC: tegra: Fix redundant PLLA and PLLA_OUT0 updates Tegra audio graph card has many DAI links which connects internal AHUB modules and external audio codecs. Since these are DPCM links, hw_params() call in the machine driver happens for each connected BE link and PLLA is updated every time. This is not really needed for all links as only I/O link DAIs derive respective clocks from PLLA_OUT0 and thus from PLLA. Hence add checks to limit the clock updates to DAIs over I/O links. This found to be fixing a DMIC clock discrepancy which is suspected to happen because of back to back quick PLLA and PLLA_OUT0 rate updates. This was observed on Jetson TX2 platform where DMIC clock ended up with unexpected value. Fixes: 202e2f774543 ("ASoC: tegra: Add audio graph based card driver") Cc: stable@vger.kernel.org Signed-off-by: Sameer Pujar Link: https://lore.kernel.org/r/1694098945-32760-3-git-send-email-spujar@nvidia.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_audio_graph_card.c | 30 ++++++++++++++---------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c index 1f2c5018bf5a..4737e776d383 100644 --- a/sound/soc/tegra/tegra_audio_graph_card.c +++ b/sound/soc/tegra/tegra_audio_graph_card.c @@ -10,6 +10,7 @@ #include #include #include +#include #define MAX_PLLA_OUT0_DIV 128 @@ -44,6 +45,21 @@ struct tegra_audio_cdata { unsigned int plla_out0_rates[NUM_RATE_TYPE]; }; +static bool need_clk_update(struct snd_soc_dai *dai) +{ + if (snd_soc_dai_is_dummy(dai) || + !dai->driver->ops || + !dai->driver->name) + return false; + + if (strstr(dai->driver->name, "I2S") || + strstr(dai->driver->name, "DMIC") || + strstr(dai->driver->name, "DSPK")) + return true; + + return false; +} + /* Setup PLL clock as per the given sample rate */ static int tegra_audio_graph_update_pll(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) @@ -140,19 +156,7 @@ static int tegra_audio_graph_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); int err; - /* - * This gets called for each DAI link (FE or BE) when DPCM is used. - * We may not want to update PLLA rate for each call. So PLLA update - * must be restricted to external I/O links (I2S, DMIC or DSPK) since - * they actually depend on it. I/O modules update their clocks in - * hw_param() of their respective component driver and PLLA rate - * update here helps them to derive appropriate rates. - * - * TODO: When more HW accelerators get added (like sample rate - * converter, volume gain controller etc., which don't really - * depend on PLLA) we need a better way to filter here. - */ - if (cpu_dai->driver->ops && rtd->dai_link->no_pcm) { + if (need_clk_update(cpu_dai)) { err = tegra_audio_graph_update_pll(substream, params); if (err) return err; From ec03804552e9a723569e14d2512f36a8e70dc640 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 8 Sep 2023 11:17:16 +0100 Subject: [PATCH 035/333] ASoC: cs35l56: Call pm_runtime_dont_use_autosuspend() Driver remove() must call pm_runtime_dont_use_autosuspend(). Drivers that call pm_runtime_use_autosuspend() must disable it in driver remove(). Unfortunately until recently this was only mentioned in 1 line in a 900+ line document so most people hadn't noticed this. It has only recently been added to the kerneldoc of pm_runtime_use_autosuspend(). THIS WON'T APPLY CLEANLY TO V6.5 AND EARLIER: We will send a separate backported patch to stable. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230908101716.2658582-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 600b79c62ec4..f2e7c6d0be46 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1207,6 +1207,7 @@ void cs35l56_remove(struct cs35l56_private *cs35l56) flush_workqueue(cs35l56->dsp_wq); destroy_workqueue(cs35l56->dsp_wq); + pm_runtime_dont_use_autosuspend(cs35l56->base.dev); pm_runtime_suspend(cs35l56->base.dev); pm_runtime_disable(cs35l56->base.dev); From aedf323b66b2b875137422ecb7d2525179759076 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 7 Sep 2023 11:05:04 +0200 Subject: [PATCH 036/333] ASoC: meson: spdifin: start hw on dai probe For spdif input to report the locked rate correctly, even when no capture is running, the HW and reference clock must be started as soon as the dai is probed. Fixes: 5ce5658375e6 ("ASoC: meson: add axg spdif input") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20230907090504.12700-1-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-spdifin.c | 49 ++++++++++++----------------------- 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/sound/soc/meson/axg-spdifin.c b/sound/soc/meson/axg-spdifin.c index d86880169075..bc2f2849ecfb 100644 --- a/sound/soc/meson/axg-spdifin.c +++ b/sound/soc/meson/axg-spdifin.c @@ -112,34 +112,6 @@ static int axg_spdifin_prepare(struct snd_pcm_substream *substream, return 0; } -static int axg_spdifin_startup(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - struct axg_spdifin *priv = snd_soc_dai_get_drvdata(dai); - int ret; - - ret = clk_prepare_enable(priv->refclk); - if (ret) { - dev_err(dai->dev, - "failed to enable spdifin reference clock\n"); - return ret; - } - - regmap_update_bits(priv->map, SPDIFIN_CTRL0, SPDIFIN_CTRL0_EN, - SPDIFIN_CTRL0_EN); - - return 0; -} - -static void axg_spdifin_shutdown(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - struct axg_spdifin *priv = snd_soc_dai_get_drvdata(dai); - - regmap_update_bits(priv->map, SPDIFIN_CTRL0, SPDIFIN_CTRL0_EN, 0); - clk_disable_unprepare(priv->refclk); -} - static void axg_spdifin_write_mode_param(struct regmap *map, int mode, unsigned int val, unsigned int num_per_reg, @@ -251,17 +223,32 @@ static int axg_spdifin_dai_probe(struct snd_soc_dai *dai) ret = axg_spdifin_sample_mode_config(dai, priv); if (ret) { dev_err(dai->dev, "mode configuration failed\n"); - clk_disable_unprepare(priv->pclk); - return ret; + goto pclk_err; } + ret = clk_prepare_enable(priv->refclk); + if (ret) { + dev_err(dai->dev, + "failed to enable spdifin reference clock\n"); + goto pclk_err; + } + + regmap_update_bits(priv->map, SPDIFIN_CTRL0, SPDIFIN_CTRL0_EN, + SPDIFIN_CTRL0_EN); + return 0; + +pclk_err: + clk_disable_unprepare(priv->pclk); + return ret; } static int axg_spdifin_dai_remove(struct snd_soc_dai *dai) { struct axg_spdifin *priv = snd_soc_dai_get_drvdata(dai); + regmap_update_bits(priv->map, SPDIFIN_CTRL0, SPDIFIN_CTRL0_EN, 0); + clk_disable_unprepare(priv->refclk); clk_disable_unprepare(priv->pclk); return 0; } @@ -270,8 +257,6 @@ static const struct snd_soc_dai_ops axg_spdifin_ops = { .probe = axg_spdifin_dai_probe, .remove = axg_spdifin_dai_remove, .prepare = axg_spdifin_prepare, - .startup = axg_spdifin_startup, - .shutdown = axg_spdifin_shutdown, }; static int axg_spdifin_iec958_info(struct snd_kcontrol *kcontrol, From 28115b1c4f2bb76e786436bf6597c5eb27638a5c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 7 Sep 2023 11:55:20 +0200 Subject: [PATCH 037/333] ASoC: rsnd: add missing of_node_put for_each_child_of_node performs an of_node_get on each iteration, so a break out of the loop requires an of_node_put. This was done using the Coccinelle semantic patch iterators/for_each_child.cocci Signed-off-by: Julia Lawall Acked-by: Kuninori Morimoto Link: https://lore.kernel.org/r/20230907095521.14053-11-Julia.Lawall@inria.fr Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index e29c2fee9521..1bd7114c472a 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1303,6 +1303,7 @@ audio_graph: if (i >= RSND_MAX_COMPONENT) { dev_info(dev, "reach to max component\n"); of_node_put(node); + of_node_put(ports); break; } } From d7e47e32192bb88f5b2dc8e655fa587ecf9d71e0 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 9 Sep 2023 05:02:37 -0700 Subject: [PATCH 038/333] ASoC: wm8960: Fix error handling in probe Commit 422f10adc3eb ("ASoC: wm8960: Add support for the power supplies") added regulator support to the wm8960 driver, but neglected to update error handling in the probe function. This results in warning backtraces if the probe function fails. Fixes: 422f10adc3eb ("ASoC: wm8960: Add support for the power supplies") Signed-off-by: Guenter Roeck Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20230909120237.2646275-1-linux@roeck-us.net Signed-off-by: Mark Brown --- sound/soc/codecs/wm8960.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 0a50180750e8..7689fe3cc86d 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -1468,8 +1468,10 @@ static int wm8960_i2c_probe(struct i2c_client *i2c) } wm8960->regmap = devm_regmap_init_i2c(i2c, &wm8960_regmap); - if (IS_ERR(wm8960->regmap)) - return PTR_ERR(wm8960->regmap); + if (IS_ERR(wm8960->regmap)) { + ret = PTR_ERR(wm8960->regmap); + goto bulk_disable; + } if (pdata) memcpy(&wm8960->pdata, pdata, sizeof(struct wm8960_data)); @@ -1479,13 +1481,14 @@ static int wm8960_i2c_probe(struct i2c_client *i2c) ret = i2c_master_recv(i2c, &val, sizeof(val)); if (ret >= 0) { dev_err(&i2c->dev, "Not wm8960, wm8960 reg can not read by i2c\n"); - return -EINVAL; + ret = -EINVAL; + goto bulk_disable; } ret = wm8960_reset(wm8960->regmap); if (ret != 0) { dev_err(&i2c->dev, "Failed to issue reset\n"); - return ret; + goto bulk_disable; } if (wm8960->pdata.shared_lrclk) { @@ -1494,7 +1497,7 @@ static int wm8960_i2c_probe(struct i2c_client *i2c) if (ret != 0) { dev_err(&i2c->dev, "Failed to enable LRCM: %d\n", ret); - return ret; + goto bulk_disable; } } @@ -1528,7 +1531,13 @@ static int wm8960_i2c_probe(struct i2c_client *i2c) ret = devm_snd_soc_register_component(&i2c->dev, &soc_component_dev_wm8960, &wm8960_dai, 1); + if (ret) + goto bulk_disable; + return 0; + +bulk_disable: + regulator_bulk_disable(ARRAY_SIZE(wm8960->supplies), wm8960->supplies); return ret; } From 396b907919e028d89bac912e49de014485deb8dc Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 8 Sep 2023 09:59:20 +0100 Subject: [PATCH 039/333] ASoC: soc-pcm: Shrink stack frame for __soc_pcm_hw_params Commit ac950278b087 ("ASoC: add N cpus to M codecs dai link support") added an additional local params in __soc_pcm_hw_params, for the CPU side of the DAI. The snd_pcm_hw_params struct is pretty large (604 bytes) and keeping two local copies of it can make the stack frame really large. It is worth noting the variables are in separate code blocks so for some optimisation levels in the compiler these will get automatically combined keeping the stack frame reasonable. But better to manually combine them to cover all cases. Add a single local variable for __soc_pcm_hw_params and use in both loops to shrink the stack frame. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20230908085920.2906359-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index eb0723876851..54704250c0a2 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -985,6 +985,7 @@ static int __soc_pcm_hw_params(struct snd_soc_pcm_runtime *rtd, { struct snd_soc_dai *cpu_dai; struct snd_soc_dai *codec_dai; + struct snd_pcm_hw_params tmp_params; int i, ret = 0; snd_soc_dpcm_mutex_assert_held(rtd); @@ -998,7 +999,6 @@ static int __soc_pcm_hw_params(struct snd_soc_pcm_runtime *rtd, goto out; for_each_rtd_codec_dais(rtd, i, codec_dai) { - struct snd_pcm_hw_params codec_params; unsigned int tdm_mask = snd_soc_dai_tdm_mask_get(codec_dai, substream->stream); /* @@ -1019,23 +1019,22 @@ static int __soc_pcm_hw_params(struct snd_soc_pcm_runtime *rtd, continue; /* copy params for each codec */ - codec_params = *params; + tmp_params = *params; /* fixup params based on TDM slot masks */ if (tdm_mask) - soc_pcm_codec_params_fixup(&codec_params, tdm_mask); + soc_pcm_codec_params_fixup(&tmp_params, tdm_mask); ret = snd_soc_dai_hw_params(codec_dai, substream, - &codec_params); + &tmp_params); if(ret < 0) goto out; - soc_pcm_set_dai_params(codec_dai, &codec_params); - snd_soc_dapm_update_dai(substream, &codec_params, codec_dai); + soc_pcm_set_dai_params(codec_dai, &tmp_params); + snd_soc_dapm_update_dai(substream, &tmp_params, codec_dai); } for_each_rtd_cpu_dais(rtd, i, cpu_dai) { - struct snd_pcm_hw_params cpu_params; unsigned int ch_mask = 0; int j; @@ -1047,7 +1046,7 @@ static int __soc_pcm_hw_params(struct snd_soc_pcm_runtime *rtd, continue; /* copy params for each cpu */ - cpu_params = *params; + tmp_params = *params; if (!rtd->dai_link->codec_ch_maps) goto hw_params; @@ -1062,16 +1061,16 @@ static int __soc_pcm_hw_params(struct snd_soc_pcm_runtime *rtd, /* fixup cpu channel number */ if (ch_mask) - soc_pcm_codec_params_fixup(&cpu_params, ch_mask); + soc_pcm_codec_params_fixup(&tmp_params, ch_mask); hw_params: - ret = snd_soc_dai_hw_params(cpu_dai, substream, &cpu_params); + ret = snd_soc_dai_hw_params(cpu_dai, substream, &tmp_params); if (ret < 0) goto out; /* store the parameters for each DAI */ - soc_pcm_set_dai_params(cpu_dai, &cpu_params); - snd_soc_dapm_update_dai(substream, &cpu_params, cpu_dai); + soc_pcm_set_dai_params(cpu_dai, &tmp_params); + snd_soc_dapm_update_dai(substream, &tmp_params, cpu_dai); } ret = snd_soc_pcm_component_hw_params(substream, params); From e616a916fe8431ebd5eb3cf4ac224d143c57083c Mon Sep 17 00:00:00 2001 From: Walt Holman Date: Sun, 10 Sep 2023 13:54:34 -0500 Subject: [PATCH 040/333] Add DMI ID for MSI Bravo 15 B7ED Signed-off-by: Walt Holman Link: https://lore.kernel.org/r/20230910185433.13677-1-waltholman09@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 3ec15b46fa35..59aa2e9d3a79 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -262,6 +262,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "M6500RC"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 B7ED"), + } + }, { .driver_data = &acp6x_card, .matches = { From 18495676f7886e105133f1dc06c1d5e8d5436f32 Mon Sep 17 00:00:00 2001 From: Han Xu Date: Wed, 6 Sep 2023 13:32:54 -0500 Subject: [PATCH 041/333] spi: nxp-fspi: reset the FLSHxCR1 registers Reset the FLSHxCR1 registers to default value. ROM may set the register value and it affects the SPI NAND normal functions. Signed-off-by: Han Xu Link: https://lore.kernel.org/r/20230906183254.235847-1-han.xu@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-nxp-fspi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index 45a4acc95661..c964f41dcc42 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -1084,6 +1084,13 @@ static int nxp_fspi_default_setup(struct nxp_fspi *f) fspi_writel(f, FSPI_AHBCR_PREF_EN | FSPI_AHBCR_RDADDROPT, base + FSPI_AHBCR); + /* Reset the FLSHxCR1 registers. */ + reg = FSPI_FLSHXCR1_TCSH(0x3) | FSPI_FLSHXCR1_TCSS(0x3); + fspi_writel(f, reg, base + FSPI_FLSHA1CR1); + fspi_writel(f, reg, base + FSPI_FLSHA2CR1); + fspi_writel(f, reg, base + FSPI_FLSHB1CR1); + fspi_writel(f, reg, base + FSPI_FLSHB2CR1); + /* AHB Read - Set lut sequence ID for all CS. */ fspi_writel(f, SEQID_LUT, base + FSPI_FLSHA1CR2); fspi_writel(f, SEQID_LUT, base + FSPI_FLSHA2CR2); From 6de8a70c84ee0586fdde4e671626b9caca6aed74 Mon Sep 17 00:00:00 2001 From: Valentin Caron Date: Wed, 6 Sep 2023 15:27:35 +0200 Subject: [PATCH 042/333] spi: stm32: add a delay before SPI disable As explained in errata sheet, in section "2.14.5 Truncation of SPI output signals after EOT event": On STM32MP1x, EOT interrupt can be thrown before the true end of communication. So we add a delay of a half period to wait the real end of the transmission. Link: https://www.st.com/resource/en/errata_sheet/es0539-stm32mp131x3x5x-device-errata-stmicroelectronics.pdf Signed-off-by: Valentin Caron Link: https://lore.kernel.org/r/20230906132735.748174-1-valentin.caron@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index b6d66caba4c0..ef665f470c5b 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -277,6 +277,7 @@ struct stm32_spi_cfg { * @fifo_size: size of the embedded fifo in bytes * @cur_midi: master inter-data idleness in ns * @cur_speed: speed configured in Hz + * @cur_half_period: time of a half bit in us * @cur_bpw: number of bits in a single SPI data frame * @cur_fthlv: fifo threshold level (data frames in a single data packet) * @cur_comm: SPI communication mode @@ -304,6 +305,7 @@ struct stm32_spi { unsigned int cur_midi; unsigned int cur_speed; + unsigned int cur_half_period; unsigned int cur_bpw; unsigned int cur_fthlv; unsigned int cur_comm; @@ -468,6 +470,8 @@ static int stm32_spi_prepare_mbr(struct stm32_spi *spi, u32 speed_hz, spi->cur_speed = spi->clk_rate / (1 << mbrdiv); + spi->cur_half_period = DIV_ROUND_CLOSEST(USEC_PER_SEC, 2 * spi->cur_speed); + return mbrdiv - 1; } @@ -709,6 +713,10 @@ static void stm32h7_spi_disable(struct stm32_spi *spi) return; } + /* Add a delay to make sure that transmission is ended. */ + if (spi->cur_half_period) + udelay(spi->cur_half_period); + if (spi->cur_usedma && spi->dma_tx) dmaengine_terminate_async(spi->dma_tx); if (spi->cur_usedma && spi->dma_rx) From 24e0e61db3cb86a66824531989f1df80e0939f26 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 4 Sep 2023 22:42:56 +0200 Subject: [PATCH 043/333] ata: libata: disallow dev-initiated LPM transitions to unsupported states MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In AHCI 1.3.1, the register description for CAP.SSC: "When cleared to ‘0’, software must not allow the HBA to initiate transitions to the Slumber state via agressive link power management nor the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port must be programmed to disallow device initiated Slumber requests." In AHCI 1.3.1, the register description for CAP.PSC: "When cleared to ‘0’, software must not allow the HBA to initiate transitions to the Partial state via agressive link power management nor the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port must be programmed to disallow device initiated Partial requests." Ensure that we always set the corresponding bits in PxSCTL.IPM, such that a device is not allowed to initiate transitions to power states which are unsupported by the HBA. DevSleep is always initiated by the HBA, however, for completeness, set the corresponding bit in PxSCTL.IPM such that agressive link power management cannot transition to DevSleep if DevSleep is not supported. sata_link_scr_lpm() is used by libahci, ata_piix and libata-pmp. However, only libahci has the ability to read the CAP/CAP2 register to see if these features are supported. Therefore, in order to not introduce any regressions on ata_piix or libata-pmp, create flags that indicate that the respective feature is NOT supported. This way, the behavior for ata_piix and libata-pmp should remain unchanged. This change is based on a patch originally submitted by Runa Guo-oc. Signed-off-by: Niklas Cassel Fixes: 1152b2617a6e ("libata: implement sata_link_scr_lpm() and make ata_dev_set_feature() global") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal --- drivers/ata/ahci.c | 9 +++++++++ drivers/ata/libata-sata.c | 19 ++++++++++++++++--- include/linux/libata.h | 4 ++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index abb5911c9d09..08745e7db820 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1883,6 +1883,15 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) else dev_info(&pdev->dev, "SSS flag set, parallel bus scan disabled\n"); + if (!(hpriv->cap & HOST_CAP_PART)) + host->flags |= ATA_HOST_NO_PART; + + if (!(hpriv->cap & HOST_CAP_SSC)) + host->flags |= ATA_HOST_NO_SSC; + + if (!(hpriv->cap2 & HOST_CAP2_SDS)) + host->flags |= ATA_HOST_NO_DEVSLP; + if (pi.flags & ATA_FLAG_EM) ahci_reset_em(host); diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 5d31c08be013..a701e1538482 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -396,10 +396,23 @@ int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, case ATA_LPM_MED_POWER_WITH_DIPM: case ATA_LPM_MIN_POWER_WITH_PARTIAL: case ATA_LPM_MIN_POWER: - if (ata_link_nr_enabled(link) > 0) - /* no restrictions on LPM transitions */ + if (ata_link_nr_enabled(link) > 0) { + /* assume no restrictions on LPM transitions */ scontrol &= ~(0x7 << 8); - else { + + /* + * If the controller does not support partial, slumber, + * or devsleep, then disallow these transitions. + */ + if (link->ap->host->flags & ATA_HOST_NO_PART) + scontrol |= (0x1 << 8); + + if (link->ap->host->flags & ATA_HOST_NO_SSC) + scontrol |= (0x2 << 8); + + if (link->ap->host->flags & ATA_HOST_NO_DEVSLP) + scontrol |= (0x4 << 8); + } else { /* empty port, power off */ scontrol &= ~0xf; scontrol |= (0x1 << 2); diff --git a/include/linux/libata.h b/include/linux/libata.h index 52d58b13e5ee..bf4913f4d7ac 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -222,6 +222,10 @@ enum { ATA_HOST_PARALLEL_SCAN = (1 << 2), /* Ports on this host can be scanned in parallel */ ATA_HOST_IGNORE_ATA = (1 << 3), /* Ignore ATA devices on this host. */ + ATA_HOST_NO_PART = (1 << 4), /* Host does not support partial */ + ATA_HOST_NO_SSC = (1 << 5), /* Host does not support slumber */ + ATA_HOST_NO_DEVSLP = (1 << 6), /* Host does not support devslp */ + /* bits 24:31 of host->flags are reserved for LLD specific flags */ /* various lengths of time */ From e97eb65dd464e7f118a16a26337322d07eb653e2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 4 Sep 2023 21:54:36 +0200 Subject: [PATCH 044/333] ata: sata_mv: Fix incorrect string length computation in mv_dump_mem() snprintf() returns the "number of characters which *would* be generated for the given input", not the size *really* generated. In order to avoid too large values for 'o' (and potential negative values for "sizeof(linebuf) o") use scnprintf() instead of snprintf(). Note that given the "w < 4" in the for loop, the buffer can NOT overflow, but using the *right* function is always better. Signed-off-by: Christophe JAILLET Signed-off-by: Damien Le Moal --- drivers/ata/sata_mv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index d105db5c7d81..45e48d653c60 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -1255,8 +1255,8 @@ static void mv_dump_mem(struct device *dev, void __iomem *start, unsigned bytes) for (b = 0; b < bytes; ) { for (w = 0, o = 0; b < bytes && w < 4; w++) { - o += snprintf(linebuf + o, sizeof(linebuf) - o, - "%08x ", readl(start + b)); + o += scnprintf(linebuf + o, sizeof(linebuf) - o, + "%08x ", readl(start + b)); b += sizeof(u32); } dev_dbg(dev, "%s: %p: %s\n", From 61ba93b4353106f0f4ee5fdca2d6527e85abf884 Mon Sep 17 00:00:00 2001 From: Ding Xiang Date: Fri, 8 Sep 2023 16:10:40 +0800 Subject: [PATCH 045/333] selftests: ALSA: remove unused variables These variables are never referenced in the code, just remove them. Signed-off-by: Ding Xiang Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230908081040.197243-1-dingxiang@cmss.chinamobile.com Signed-off-by: Takashi Iwai --- tools/testing/selftests/alsa/conf.c | 1 - tools/testing/selftests/alsa/mixer-test.c | 11 +++-------- tools/testing/selftests/alsa/pcm-test.c | 4 ++-- tools/testing/selftests/alsa/test-pcmtest-driver.c | 1 - 4 files changed, 5 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c index d7aafe5a1993..2f1685a3eae1 100644 --- a/tools/testing/selftests/alsa/conf.c +++ b/tools/testing/selftests/alsa/conf.c @@ -431,7 +431,6 @@ long conf_get_long(snd_config_t *root, const char *key1, const char *key2, long int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int def) { snd_config_t *cfg; - long l; int ret; if (!root) diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index c95d63e553f4..21e482b23f50 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -188,7 +188,7 @@ static int wait_for_event(struct ctl_data *ctl, int timeout) { unsigned short revents; snd_ctl_event_t *event; - int count, err; + int err; unsigned int mask = 0; unsigned int ev_id; @@ -430,7 +430,6 @@ static bool strend(const char *haystack, const char *needle) static void test_ctl_name(struct ctl_data *ctl) { bool name_ok = true; - bool check; ksft_print_msg("%d.%d %s\n", ctl->card->card, ctl->elem, ctl->name); @@ -863,7 +862,6 @@ static bool test_ctl_write_invalid_value(struct ctl_data *ctl, snd_ctl_elem_value_t *val) { int err; - long val_read; /* Ideally this will fail... */ err = snd_ctl_elem_write(ctl->card->handle, val); @@ -883,8 +881,7 @@ static bool test_ctl_write_invalid_value(struct ctl_data *ctl, static bool test_ctl_write_invalid_boolean(struct ctl_data *ctl) { - int err, i; - long val_read; + int i; bool fail = false; snd_ctl_elem_value_t *val; snd_ctl_elem_value_alloca(&val); @@ -994,8 +991,7 @@ static bool test_ctl_write_invalid_integer64(struct ctl_data *ctl) static bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl) { - int err, i; - unsigned int val_read; + int i; bool fail = false; snd_ctl_elem_value_t *val; snd_ctl_elem_value_alloca(&val); @@ -1027,7 +1023,6 @@ static bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl) static void test_ctl_write_invalid(struct ctl_data *ctl) { bool pass; - int err; /* If the control is turned off let's be polite */ if (snd_ctl_elem_info_is_inactive(ctl->info)) { diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c index 2f5e3c462194..c0a39818c5a4 100644 --- a/tools/testing/selftests/alsa/pcm-test.c +++ b/tools/testing/selftests/alsa/pcm-test.c @@ -257,7 +257,7 @@ static void find_pcms(void) static void test_pcm_time(struct pcm_data *data, enum test_class class, const char *test_name, snd_config_t *pcm_cfg) { - char name[64], key[128], msg[256]; + char name[64], msg[256]; const int duration_s = 2, margin_ms = 100; const int duration_ms = duration_s * 1000; const char *cs; @@ -567,7 +567,7 @@ int main(void) { struct card_data *card; struct pcm_data *pcm; - snd_config_t *global_config, *cfg, *pcm_cfg; + snd_config_t *global_config, *cfg; int num_pcm_tests = 0, num_tests, num_std_pcm_tests; int ret; void *thread_ret; diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c index 357adc722cba..a52ecd43dbe3 100644 --- a/tools/testing/selftests/alsa/test-pcmtest-driver.c +++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c @@ -313,7 +313,6 @@ TEST_F(pcmtest, ni_playback) { */ TEST_F(pcmtest, reset_ioctl) { snd_pcm_t *handle; - unsigned char *it; int test_res; struct pcmtest_test_params *params = &self->params; From 762f169f5d9b92f057ad2c27ec4e3849b743239a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:21:20 +0200 Subject: [PATCH 046/333] efi/x86: Move EFI runtime call setup/teardown helpers out of line Only the arch_efi_call_virt() macro that some architectures override needs to be a macro, given that it is variadic and encapsulates calls via function pointers that have different prototypes. The associated setup and teardown code are not special in this regard, and don't need to be instantiated at each call site. So turn them into ordinary C functions and move them out of line. Signed-off-by: Ard Biesheuvel --- arch/x86/include/asm/efi.h | 32 ++------------------------------ arch/x86/platform/efi/efi_32.c | 12 ++++++++++++ arch/x86/platform/efi/efi_64.c | 19 +++++++++++++++++-- 3 files changed, 31 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index b0994ae3bc23..c4555b269a1b 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -91,19 +91,6 @@ static inline void efi_fpu_end(void) #ifdef CONFIG_X86_32 #define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1) - -#define arch_efi_call_virt_setup() \ -({ \ - efi_fpu_begin(); \ - firmware_restrict_branch_speculation_start(); \ -}) - -#define arch_efi_call_virt_teardown() \ -({ \ - firmware_restrict_branch_speculation_end(); \ - efi_fpu_end(); \ -}) - #else /* !CONFIG_X86_32 */ #define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT @@ -116,14 +103,6 @@ extern bool efi_disable_ibt_for_runtime; __efi_call(__VA_ARGS__); \ }) -#define arch_efi_call_virt_setup() \ -({ \ - efi_sync_low_kernel_mappings(); \ - efi_fpu_begin(); \ - firmware_restrict_branch_speculation_start(); \ - efi_enter_mm(); \ -}) - #undef arch_efi_call_virt #define arch_efi_call_virt(p, f, args...) ({ \ u64 ret, ibt = ibt_save(efi_disable_ibt_for_runtime); \ @@ -132,13 +111,6 @@ extern bool efi_disable_ibt_for_runtime; ret; \ }) -#define arch_efi_call_virt_teardown() \ -({ \ - efi_leave_mm(); \ - firmware_restrict_branch_speculation_end(); \ - efi_fpu_end(); \ -}) - #ifdef CONFIG_KASAN /* * CONFIG_KASAN may redefine memset to __memset. __memset function is present @@ -168,8 +140,8 @@ extern void efi_delete_dummy_variable(void); extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr); extern void efi_free_boot_services(void); -void efi_enter_mm(void); -void efi_leave_mm(void); +void arch_efi_call_virt_setup(void); +void arch_efi_call_virt_teardown(void); /* kexec external ABI */ struct efi_setup_data { diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index e06a199423c0..b2cc7b4552a1 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -140,3 +140,15 @@ void __init efi_runtime_update_mappings(void) } } } + +void arch_efi_call_virt_setup(void) +{ + efi_fpu_begin(); + firmware_restrict_branch_speculation_start(); +} + +void arch_efi_call_virt_teardown(void) +{ + firmware_restrict_branch_speculation_end(); + efi_fpu_end(); +} diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 77f7ac3668cb..91d31ac422d6 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -474,19 +474,34 @@ void __init efi_dump_pagetable(void) * can not change under us. * It should be ensured that there are no concurrent calls to this function. */ -void efi_enter_mm(void) +static void efi_enter_mm(void) { efi_prev_mm = current->active_mm; current->active_mm = &efi_mm; switch_mm(efi_prev_mm, &efi_mm, NULL); } -void efi_leave_mm(void) +static void efi_leave_mm(void) { current->active_mm = efi_prev_mm; switch_mm(&efi_mm, efi_prev_mm, NULL); } +void arch_efi_call_virt_setup(void) +{ + efi_sync_low_kernel_mappings(); + efi_fpu_begin(); + firmware_restrict_branch_speculation_start(); + efi_enter_mm(); +} + +void arch_efi_call_virt_teardown(void) +{ + efi_leave_mm(); + firmware_restrict_branch_speculation_end(); + efi_fpu_end(); +} + static DEFINE_SPINLOCK(efi_runtime_lock); /* From aba7e066c738d4b349413a271b2a236aa55bacbc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 2 Aug 2023 17:17:04 +0200 Subject: [PATCH 047/333] efi/x86: Ensure that EFI_RUNTIME_MAP is enabled for kexec CONFIG_EFI_RUNTIME_MAP needs to be enabled in order for kexec to be able to provide the required information about the EFI runtime mappings to the incoming kernel, regardless of whether kexec_load() or kexec_file_load() is being used. Without this information, kexec boot in EFI mode is not possible. The CONFIG_EFI_RUNTIME_MAP option is currently directly configurable if CONFIG_EXPERT is enabled, so that it can be turned on for debugging purposes even if KEXEC is not enabled. However, the upshot of this is that it can also be disabled even when it shouldn't. So tweak the Kconfig declarations to avoid this situation. Reported-by: Kirill A. Shutemov Signed-off-by: Ard Biesheuvel --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 982b777eadc7..66bfabae8814 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1945,6 +1945,7 @@ config EFI select UCS2_STRING select EFI_RUNTIME_WRAPPERS select ARCH_USE_MEMREMAP_PROT + select EFI_RUNTIME_MAP if KEXEC_CORE help This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). @@ -2020,7 +2021,6 @@ config EFI_MAX_FAKE_MEM config EFI_RUNTIME_MAP bool "Export EFI runtime maps to sysfs" if EXPERT depends on EFI - default KEXEC_CORE help Export EFI runtime memory regions to /sys/firmware/efi/runtime-map. That memory map is required by the 2nd kernel to set up EFI virtual From e7761d827e99919c32400056a884e481ef008ec4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 16 Aug 2023 21:05:57 +0200 Subject: [PATCH 048/333] efi/unaccepted: Use ACPI reclaim memory for unaccepted memory table Kyril reports that crashkernels fail to work on confidential VMs that rely on the unaccepted memory table, and this appears to be caused by the fact that it is not considered part of the set of firmware tables that the crashkernel needs to map. This is an oversight, and a result of the use of the EFI_LOADER_DATA memory type for this table. The correct memory type to use for any firmware table is EFI_ACPI_RECLAIM_MEMORY (including ones created by the EFI stub), even though the name suggests that is it specific to ACPI. ACPI reclaim means that the memory is used by the firmware to expose information to the operating system, but that the memory region has no special significance to the firmware itself, and the OS is free to reclaim the memory and use it as ordinary memory if it is not interested in the contents, or if it has already consumed them. In Linux, this memory is never reclaimed, but it is always covered by the kernel direct map and generally made accessible as ordinary memory. On x86, ACPI reclaim memory is translated into E820_ACPI, which the kexec logic already recognizes as memory that the crashkernel may need to to access, and so it will be mapped and accessible to the booting crash kernel. Fixes: 745e3ed85f71 ("efi/libstub: Implement support for unaccepted memory") Reported-by: Kirill A. Shutemov Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/unaccepted_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/unaccepted_memory.c b/drivers/firmware/efi/libstub/unaccepted_memory.c index ca61f4733ea5..9a655f30ba47 100644 --- a/drivers/firmware/efi/libstub/unaccepted_memory.c +++ b/drivers/firmware/efi/libstub/unaccepted_memory.c @@ -62,7 +62,7 @@ efi_status_t allocate_unaccepted_bitmap(__u32 nr_desc, bitmap_size = DIV_ROUND_UP(unaccepted_end - unaccepted_start, EFI_UNACCEPTED_UNIT_SIZE * BITS_PER_BYTE); - status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, + status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY, sizeof(*unaccepted_table) + bitmap_size, (void **)&unaccepted_table); if (status != EFI_SUCCESS) { From 79b83606abc778aa3cbee535b362ce905d0b9448 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sun, 10 Sep 2023 06:54:45 +0200 Subject: [PATCH 049/333] efivarfs: fix statfs() on efivarfs Some firmware (notably U-Boot) provides GetVariable() and GetNextVariableName() but not QueryVariableInfo(). With commit d86ff3333cb1 ("efivarfs: expose used and total size") the statfs syscall was broken for such firmware. If QueryVariableInfo() does not exist or returns EFI_UNSUPPORTED, just report the file system size as 0 as statfs_simple() previously did. Fixes: d86ff3333cb1 ("efivarfs: expose used and total size") Link: https://lore.kernel.org/all/20230910045445.41632-1-heinrich.schuchardt@canonical.com/ Signed-off-by: Heinrich Schuchardt [ardb: log warning on QueryVariableInfo() failure] Reviewed-by: Ilias Apalodimas Signed-off-by: Ard Biesheuvel --- fs/efivarfs/super.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index e028fafa04f3..996271473609 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -32,10 +32,16 @@ static int efivarfs_statfs(struct dentry *dentry, struct kstatfs *buf) u64 storage_space, remaining_space, max_variable_size; efi_status_t status; - status = efivar_query_variable_info(attr, &storage_space, &remaining_space, - &max_variable_size); - if (status != EFI_SUCCESS) - return efi_status_to_err(status); + /* Some UEFI firmware does not implement QueryVariableInfo() */ + storage_space = remaining_space = 0; + if (efi_rt_services_supported(EFI_RT_SUPPORTED_QUERY_VARIABLE_INFO)) { + status = efivar_query_variable_info(attr, &storage_space, + &remaining_space, + &max_variable_size); + if (status != EFI_SUCCESS && status != EFI_UNSUPPORTED) + pr_warn_ratelimited("query_variable_info() failed: 0x%lx\n", + status); + } /* * This is not a normal filesystem, so no point in pretending it has a block From 23a3bfd4ba7acd36abf52b78605f61b21bdac216 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 10 Sep 2023 19:04:45 +0200 Subject: [PATCH 050/333] netfilter: nf_tables: disallow element removal on anonymous sets Anonymous sets need to be populated once at creation and then they are bound to rule since 938154b93be8 ("netfilter: nf_tables: reject unbound anonymous set before commit phase"), otherwise transaction reports EINVAL. Userspace does not need to delete elements of anonymous sets that are not yet bound, reject this with EOPNOTSUPP. From flush command path, skip anonymous sets, they are expected to be bound already. Otherwise, EINVAL is hit at the end of this transaction for unbound sets. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7b59311931fb..c1e485aee763 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1446,8 +1446,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, set)) continue; - if (nft_set_is_anonymous(set) && - !list_empty(&set->bindings)) + if (nft_set_is_anonymous(set)) continue; err = nft_delset(ctx, set); @@ -7191,8 +7190,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb, if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && - (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + + if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT)) return -EBUSY; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); From f26a679ed799deef9e2934a6b60b8f38bdbf4921 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 6 Sep 2023 11:48:42 +0300 Subject: [PATCH 051/333] usb: typec: ucsi: Fix NULL pointer dereference Making sure the UCSI debugfs entry actually exists before attempting to remove it. Fixes: df0383ffad64 ("usb: typec: ucsi: Add debugfs for ucsi commands") Reported-by: Dave Hansen Closes: https://lore.kernel.org/linux-usb/700df3c4-2f6c-85f9-6c61-065bc5b2db3a@intel.com/ Suggested-by: Dave Hansen Suggested-by: Mario Limonciello Cc: Saranya Gopal Signed-off-by: Heikki Krogerus Cc: Thorsten Leemhuis Link: https://lore.kernel.org/r/20230906084842.1922052-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/typec/ucsi/debugfs.c b/drivers/usb/typec/ucsi/debugfs.c index 0c7bf88d4a7f..f67733cecfdf 100644 --- a/drivers/usb/typec/ucsi/debugfs.c +++ b/drivers/usb/typec/ucsi/debugfs.c @@ -84,6 +84,9 @@ void ucsi_debugfs_register(struct ucsi *ucsi) void ucsi_debugfs_unregister(struct ucsi *ucsi) { + if (IS_ERR_OR_NULL(ucsi) || !ucsi->debugfs) + return; + debugfs_remove_recursive(ucsi->debugfs->dentry); kfree(ucsi->debugfs); } From 6223e073db78458f8846c380ccd224a7a73a3867 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 11 Sep 2023 14:42:47 +0200 Subject: [PATCH 052/333] regulator: Fix voltage range selection Use the correct field to fix wrong voltage range selection on regulators such as tps6287x since the blamed commit. Fixes: 269cb04b601d ("regulator: Use bitfield values for range selectors") Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20230911-regulator-voltage-sel-v1-1-886eb1ade8d8@axis.com Signed-off-by: Mark Brown --- drivers/regulator/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c index 5ad5f3b3a6b5..d49268336553 100644 --- a/drivers/regulator/helpers.c +++ b/drivers/regulator/helpers.c @@ -197,7 +197,7 @@ int regulator_set_voltage_sel_pickable_regmap(struct regulator_dev *rdev, sel += rdev->desc->linear_ranges[i].min_sel; range = rdev->desc->linear_range_selectors_bitfield[i]; - range <<= ffs(rdev->desc->vsel_mask) - 1; + range <<= ffs(rdev->desc->vsel_range_mask) - 1; if (rdev->desc->vsel_reg == rdev->desc->vsel_range_reg) { ret = regmap_update_bits(rdev->regmap, From 2f9426905a63be7ccf8cd10109caf1848aa0993a Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Mon, 11 Sep 2023 14:38:07 +0800 Subject: [PATCH 053/333] ASoC: fsl: imx-pcm-rpmsg: Add SNDRV_PCM_INFO_BATCH flag The rpmsg pcm device is a device which should support double buffering. Found this issue with pipewire. When there is no SNDRV_PCM_INFO_BATCH flag in driver, the pipewire will set headroom to be zero, and because rpmsg pcm device don't support residue report, when the latency setting is small, the "delay" always larger than "target" in alsa-pcm.c, that reading next period data is not scheduled on time. With SNDRV_PCM_INFO_BATCH flag in driver, the pipewire will select a smaller period size for device, then the task of reading next period data will be scheduled on time. Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1694414287-13291-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-pcm-rpmsg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/fsl/imx-pcm-rpmsg.c b/sound/soc/fsl/imx-pcm-rpmsg.c index d63782b8bdef..bb736d45c9e0 100644 --- a/sound/soc/fsl/imx-pcm-rpmsg.c +++ b/sound/soc/fsl/imx-pcm-rpmsg.c @@ -19,6 +19,7 @@ static struct snd_pcm_hardware imx_rpmsg_pcm_hardware = { .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | + SNDRV_PCM_INFO_BATCH | SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_NO_PERIOD_WAKEUP | From 8a81cf96f5510aaf9a65d103f7405079a7b0fcc5 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 7 Sep 2023 11:55:18 +0200 Subject: [PATCH 054/333] thermal/of: add missing of_node_put() for_each_child_of_node performs an of_node_get on each iteration, so a break out of the loop requires an of_node_put. This was done using the Coccinelle semantic patch iterators/for_each_child.cocci Signed-off-by: Julia Lawall Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_of.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 4ca905723429..1e0655b63259 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -37,8 +37,10 @@ static int of_find_trip_id(struct device_node *np, struct device_node *trip) */ for_each_child_of_node(trips, t) { - if (t == trip) + if (t == trip) { + of_node_put(t); goto out; + } i++; } @@ -401,8 +403,10 @@ static int thermal_of_for_each_cooling_maps(struct thermal_zone_device *tz, for_each_child_of_node(cm_np, child) { ret = thermal_of_for_each_cooling_device(tz_np, child, tz, cdev, action); - if (ret) + if (ret) { + of_node_put(child); break; + } } of_node_put(cm_np); From ebc7abb35b258152d4a424f89d7c03db1d7ce61c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 7 Sep 2023 20:18:56 +0200 Subject: [PATCH 055/333] thermal: Constify the trip argument of the .get_trend() zone callback Add 'const' to the definition of the 'trip' argument of the .get_trend() thermal zone callback to indicate that the trip point passed to it should not be modified by it and adjust the callback functions implementing it, thermal_get_trend() in the ACPI thermal driver and __ti_thermal_get_trend(), accordingly. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Michal Wilczynski --- drivers/acpi/thermal.c | 2 +- drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 3 ++- include/linux/thermal.h | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index f14e68266ccd..312730f8272e 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -492,7 +492,7 @@ static int thermal_get_temp(struct thermal_zone_device *thermal, int *temp) } static int thermal_get_trend(struct thermal_zone_device *thermal, - struct thermal_trip *trip, + const struct thermal_trip *trip, enum thermal_trend *trend) { struct acpi_thermal *tz = thermal_zone_device_priv(thermal); diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c index 6ba2613627e1..0cf0826b805a 100644 --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c @@ -110,7 +110,8 @@ static inline int __ti_thermal_get_temp(struct thermal_zone_device *tz, int *tem } static int __ti_thermal_get_trend(struct thermal_zone_device *tz, - struct thermal_trip *trip, enum thermal_trend *trend) + const struct thermal_trip *trip, + enum thermal_trend *trend) { struct ti_thermal_data *data = thermal_zone_device_priv(tz); struct ti_bandgap *bgp; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index c99440aac1a1..a5ae4af955ff 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -80,8 +80,8 @@ struct thermal_zone_device_ops { int (*set_trip_hyst) (struct thermal_zone_device *, int, int); int (*get_crit_temp) (struct thermal_zone_device *, int *); int (*set_emul_temp) (struct thermal_zone_device *, int); - int (*get_trend) (struct thermal_zone_device *, struct thermal_trip *, - enum thermal_trend *); + int (*get_trend) (struct thermal_zone_device *, + const struct thermal_trip *, enum thermal_trend *); void (*hot)(struct thermal_zone_device *); void (*critical)(struct thermal_zone_device *); }; From 954998b60caa8f2a3bf3abe490de6f08d283687a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:34:37 -0400 Subject: [PATCH 056/333] NFS: Fix error handling for O_DIRECT write scheduling If we fail to schedule a request for transmission, there are 2 possibilities: 1) Either we hit a fatal error, and we just want to drop the remaining requests on the floor. 2) We were asked to try again, in which case we should allow the outstanding RPC calls to complete, so that we can recoalesce requests and try again. Fixes: d600ad1f2bdb ("NFS41: pop some layoutget errors to application") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 62 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 47d892a1d363..ee88f0a6e7b8 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -528,10 +528,9 @@ nfs_direct_write_scan_commit_list(struct inode *inode, static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; - struct nfs_page *req, *tmp; + struct nfs_page *req; LIST_HEAD(reqs); struct nfs_commit_info cinfo; - LIST_HEAD(failed); nfs_init_cinfo_from_dreq(&cinfo, dreq); nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); @@ -549,27 +548,36 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; - list_for_each_entry_safe(req, tmp, &reqs, wb_list) { + while (!list_empty(&reqs)) { + req = nfs_list_entry(reqs.next); /* Bump the transmission count */ req->wb_nio++; if (!nfs_pageio_add_request(&desc, req)) { - nfs_list_move_request(req, &failed); spin_lock(&cinfo.inode->i_lock); - dreq->flags = 0; - if (desc.pg_error < 0) + if (dreq->error < 0) { + desc.pg_error = dreq->error; + } else if (desc.pg_error != -EAGAIN) { + dreq->flags = 0; + if (!desc.pg_error) + desc.pg_error = -EIO; dreq->error = desc.pg_error; - else - dreq->error = -EIO; + } else + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; spin_unlock(&cinfo.inode->i_lock); + break; } nfs_release_request(req); } nfs_pageio_complete(&desc); - while (!list_empty(&failed)) { - req = nfs_list_entry(failed.next); + while (!list_empty(&reqs)) { + req = nfs_list_entry(reqs.next); nfs_list_remove_request(req); nfs_unlock_and_release_request(req); + if (desc.pg_error == -EAGAIN) + nfs_mark_request_commit(req, NULL, &cinfo, 0); + else + nfs_release_request(req); } if (put_dreq(dreq)) @@ -794,9 +802,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; + struct nfs_commit_info cinfo; ssize_t result = 0; size_t requested_bytes = 0; size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); + bool defer = false; trace_nfs_direct_write_schedule_iovec(dreq); @@ -837,17 +847,37 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, break; } - nfs_lock_request(req); - if (!nfs_pageio_add_request(&desc, req)) { - result = desc.pg_error; - nfs_unlock_and_release_request(req); - break; - } pgbase = 0; bytes -= req_len; requested_bytes += req_len; pos += req_len; dreq->bytes_left -= req_len; + + if (defer) { + nfs_mark_request_commit(req, NULL, &cinfo, 0); + continue; + } + + nfs_lock_request(req); + if (nfs_pageio_add_request(&desc, req)) + continue; + + /* Exit on hard errors */ + if (desc.pg_error < 0 && desc.pg_error != -EAGAIN) { + result = desc.pg_error; + nfs_unlock_and_release_request(req); + break; + } + + /* If the error is soft, defer remaining requests */ + nfs_init_cinfo_from_dreq(&cinfo, dreq); + spin_lock(&cinfo.inode->i_lock); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + spin_unlock(&cinfo.inode->i_lock); + nfs_unlock_request(req); + nfs_mark_request_commit(req, NULL, &cinfo, 0); + desc.pg_error = 0; + defer = true; } nfs_direct_release_pages(pagevec, npages); kvfree(pagevec); From 5290e88ba2c742ca77c5f5b690e5af549cfd8591 Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Mon, 7 Aug 2023 09:17:30 -0500 Subject: [PATCH 057/333] x86/platform/uv: Use alternate source for socket to node data The UV code attempts to build a set of tables to allow it to do bidirectional socket<=>node lookups. But when nr_cpus is set to a smaller number than actually present, the cpu_to_node() mapping information for unused CPUs is not available to build_socket_tables(). This results in skipping some nodes or sockets when creating the tables and leaving some -1's for later code to trip. over, causing oopses. The problem is that the socket<=>node lookups are created by doing a loop over all CPUs, then looking up the CPU's APICID and socket. But if a CPU is not present, there is no way to start this lookup. Instead of looping over all CPUs, take CPUs out of the equation entirely. Loop over all APICIDs which are mapped to a valid NUMA node. Then just extract the socket-id from the APICID. This avoid tripping over disabled CPUs. Fixes: 8a50c5851927 ("x86/platform/uv: UV support for sub-NUMA clustering") Signed-off-by: Steve Wahl Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20230807141730.1117278-1-steve.wahl%40hpe.com --- arch/x86/kernel/apic/x2apic_uv_x.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d9f5d7492f83..205cee567629 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1533,7 +1533,7 @@ static void __init build_socket_tables(void) { struct uv_gam_range_entry *gre = uv_gre_table; int nums, numn, nump; - int cpu, i, lnid; + int i, lnid, apicid; int minsock = _min_socket; int maxsock = _max_socket; int minpnode = _min_pnode; @@ -1584,15 +1584,14 @@ static void __init build_socket_tables(void) /* Set socket -> node values: */ lnid = NUMA_NO_NODE; - for_each_possible_cpu(cpu) { - int nid = cpu_to_node(cpu); - int apicid, sockid; + for (apicid = 0; apicid < ARRAY_SIZE(__apicid_to_node); apicid++) { + int nid = __apicid_to_node[apicid]; + int sockid; - if (lnid == nid) + if ((nid == NUMA_NO_NODE) || (lnid == nid)) continue; lnid = nid; - apicid = per_cpu(x86_cpu_to_apicid, cpu); sockid = apicid >> uv_cpuid.socketid_shift; if (_socket_to_node[sockid - minsock] == SOCK_EMPTY) From 9855d60cfc720ff32355484c119acafd3c4dc806 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 11 Sep 2023 10:46:16 +0300 Subject: [PATCH 058/333] spi: intel-pci: Add support for Granite Rapids SPI serial flash Intel Granite Rapids has a flash controller that is compatible with the other Cannon Lake derivatives. Add Granite Rapids PCI ID to the driver list of supported devices. Signed-off-by: Mika Westerberg Link: https://lore.kernel.org/r/20230911074616.3473347-1-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index a7381e774b95..57d767a68e7b 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -72,6 +72,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x51a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x54a4), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0x5794), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7a24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7e23), (unsigned long)&cnl_info }, From d52b59315bf5e86e83c00bfae47cedd388dad6a8 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 8 Sep 2023 21:39:20 +0800 Subject: [PATCH 059/333] bpf: Adjust size_index according to the value of KMALLOC_MIN_SIZE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following warning was reported when running "./test_progs -a link_api -a linked_list" on a RISC-V QEMU VM: ------------[ cut here ]------------ WARNING: CPU: 3 PID: 261 at kernel/bpf/memalloc.c:342 bpf_mem_refill Modules linked in: bpf_testmod(OE) CPU: 3 PID: 261 Comm: test_progs- ... 6.5.0-rc5-01743-gdcb152bb8328 #2 Hardware name: riscv-virtio,qemu (DT) epc : bpf_mem_refill+0x1fc/0x206 ra : irq_work_single+0x68/0x70 epc : ffffffff801b1bc4 ra : ffffffff8015fe84 sp : ff2000000001be20 gp : ffffffff82d26138 tp : ff6000008477a800 t0 : 0000000000046600 t1 : ffffffff812b6ddc t2 : 0000000000000000 s0 : ff2000000001be70 s1 : ff5ffffffffe8998 a0 : ff5ffffffffe8998 a1 : ff600003fef4b000 a2 : 000000000000003f a3 : ffffffff80008250 a4 : 0000000000000060 a5 : 0000000000000080 a6 : 0000000000000000 a7 : 0000000000735049 s2 : ff5ffffffffe8998 s3 : 0000000000000022 s4 : 0000000000001000 s5 : 0000000000000007 s6 : ff5ffffffffe8570 s7 : ffffffff82d6bd30 s8 : 000000000000003f s9 : ffffffff82d2c5e8 s10: 000000000000ffff s11: ffffffff82d2c5d8 t3 : ffffffff81ea8f28 t4 : 0000000000000000 t5 : ff6000008fd28278 t6 : 0000000000040000 [] bpf_mem_refill+0x1fc/0x206 [] irq_work_single+0x68/0x70 [] irq_work_run_list+0x28/0x36 [] irq_work_run+0x38/0x66 [] handle_IPI+0x3a/0xb4 [] handle_percpu_devid_irq+0xa4/0x1f8 [] generic_handle_domain_irq+0x28/0x36 [] ipi_mux_process+0xac/0xfa [] sbi_ipi_handle+0x2e/0x88 [] generic_handle_domain_irq+0x28/0x36 [] riscv_intc_irq+0x36/0x4e [] handle_riscv_irq+0x54/0x86 [] do_irq+0x66/0x98 ---[ end trace 0000000000000000 ]--- The warning is due to WARN_ON_ONCE(tgt->unit_size != c->unit_size) in free_bulk(). The direct reason is that a object is allocated and freed by bpf_mem_caches with different unit_size. The root cause is that KMALLOC_MIN_SIZE is 64 and there is no 96-bytes slab cache in the specific VM. When linked_list test allocates a 72-bytes object through bpf_obj_new(), bpf_global_ma will allocate it from a bpf_mem_cache with 96-bytes unit_size, but this bpf_mem_cache is backed by 128-bytes slab cache. When the object is freed, bpf_mem_free() uses ksize() to choose the corresponding bpf_mem_cache. Because the object is allocated from 128-bytes slab cache, ksize() returns 128, bpf_mem_free() chooses a 128-bytes bpf_mem_cache to free the object and triggers the warning. A similar warning will also be reported when using CONFIG_SLAB instead of CONFIG_SLUB in a x86-64 kernel. Because CONFIG_SLUB defines KMALLOC_MIN_SIZE as 8 but CONFIG_SLAB defines KMALLOC_MIN_SIZE as 32. An alternative fix is to use kmalloc_size_round() in bpf_mem_alloc() to choose a bpf_mem_cache which has the same unit_size with the backing slab cache, but it may introduce performance degradation, so fix the warning by adjusting the indexes in size_index according to the value of KMALLOC_MIN_SIZE just like setup_kmalloc_cache_index_table() does. Fixes: 822fb26bdb55 ("bpf: Add a hint to allocated objects.") Reported-by: Björn Töpel Closes: https://lore.kernel.org/bpf/87jztjmmy4.fsf@all.your.base.are.belong.to.us Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20230908133923.2675053-2-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/memalloc.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 9c49ae53deaf..98d9e96fba3c 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -916,3 +916,41 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) return !ret ? NULL : ret + LLIST_NODE_SZ; } + +/* Most of the logic is taken from setup_kmalloc_cache_index_table() */ +static __init int bpf_mem_cache_adjust_size(void) +{ + unsigned int size, index; + + /* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be + * up-to 256-bytes. + */ + size = KMALLOC_MIN_SIZE; + if (size <= 192) + index = size_index[(size - 1) / 8]; + else + index = fls(size - 1) - 1; + for (size = 8; size < KMALLOC_MIN_SIZE && size <= 192; size += 8) + size_index[(size - 1) / 8] = index; + + /* The minimal alignment is 64-bytes, so disable 96-bytes cache and + * use 128-bytes cache instead. + */ + if (KMALLOC_MIN_SIZE >= 64) { + index = size_index[(128 - 1) / 8]; + for (size = 64 + 8; size <= 96; size += 8) + size_index[(size - 1) / 8] = index; + } + + /* The minimal alignment is 128-bytes, so disable 192-bytes cache and + * use 256-bytes cache instead. + */ + if (KMALLOC_MIN_SIZE >= 128) { + index = fls(256 - 1) - 1; + for (size = 128 + 8; size <= 192; size += 8) + size_index[(size - 1) / 8] = index; + } + + return 0; +} +subsys_initcall(bpf_mem_cache_adjust_size); From b1d53958b69312e43c118d4093d8f93d3f6f80af Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 8 Sep 2023 21:39:21 +0800 Subject: [PATCH 060/333] bpf: Don't prefill for unused bpf_mem_cache When the unit_size of a bpf_mem_cache is unmatched with the object_size of the underlying slab cache, the bpf_mem_cache will not be used, and the allocation will be redirected to a bpf_mem_cache with a bigger unit_size instead, so there is no need to prefill for these unused bpf_mem_caches. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20230908133923.2675053-3-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/memalloc.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 98d9e96fba3c..90c1ed8210a2 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -459,8 +459,7 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c) * Typical case will be between 11K and 116K closer to 11K. * bpf progs can and should share bpf_mem_cache when possible. */ - -static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) +static void init_refill_work(struct bpf_mem_cache *c) { init_irq_work(&c->refill_work, bpf_mem_refill); if (c->unit_size <= 256) { @@ -476,7 +475,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) c->high_watermark = max(96 * 256 / c->unit_size, 3); } c->batch = max((c->high_watermark - c->low_watermark) / 4 * 3, 1); +} +static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) +{ /* To avoid consuming memory assume that 1st run of bpf * prog won't be doing more than 4 map_update_elem from * irq disabled region @@ -521,6 +523,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) c->objcg = objcg; c->percpu_size = percpu_size; c->tgt = c; + init_refill_work(c); prefill_mem_cache(c, cpu); } ma->cache = pc; @@ -544,6 +547,15 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) c->unit_size = sizes[i]; c->objcg = objcg; c->tgt = c; + + init_refill_work(c); + /* Another bpf_mem_cache will be used when allocating + * c->unit_size in bpf_mem_alloc(), so doesn't prefill + * for the bpf_mem_cache because these free objects will + * never be used. + */ + if (i != bpf_mem_cache_idx(c->unit_size)) + continue; prefill_mem_cache(c, cpu); } } From c930472552022bd09aab3cd946ba3f243070d5c7 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 8 Sep 2023 21:39:22 +0800 Subject: [PATCH 061/333] bpf: Ensure unit_size is matched with slab cache object size Add extra check in bpf_mem_alloc_init() to ensure the unit_size of bpf_mem_cache is matched with the object_size of underlying slab cache. If these two sizes are unmatched, print a warning once and return -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and the potential issue can be prevented. Suggested-by: Alexei Starovoitov Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20230908133923.2675053-4-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/memalloc.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 90c1ed8210a2..1c22b90e754a 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -486,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false); } +static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) +{ + struct llist_node *first; + unsigned int obj_size; + + first = c->free_llist.first; + if (!first) + return 0; + + obj_size = ksize(first); + if (obj_size != c->unit_size) { + WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n", + idx, obj_size, c->unit_size); + return -EINVAL; + } + return 0; +} + /* When size != 0 bpf_mem_cache for each cpu. * This is typical bpf hash map use case when all elements have equal size. * @@ -496,10 +514,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) { static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; + int cpu, i, err, unit_size, percpu_size = 0; struct bpf_mem_caches *cc, __percpu *pcc; struct bpf_mem_cache *c, __percpu *pc; struct obj_cgroup *objcg = NULL; - int cpu, i, unit_size, percpu_size = 0; if (size) { pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL); @@ -537,6 +555,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL); if (!pcc) return -ENOMEM; + err = 0; #ifdef CONFIG_MEMCG_KMEM objcg = get_obj_cgroup_from_current(); #endif @@ -557,10 +576,20 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) if (i != bpf_mem_cache_idx(c->unit_size)) continue; prefill_mem_cache(c, cpu); + err = check_obj_size(c, i); + if (err) + goto out; } } + +out: ma->caches = pcc; - return 0; + /* refill_work is either zeroed or initialized, so it is safe to + * call irq_work_sync(). + */ + if (err) + bpf_mem_alloc_destroy(ma); + return err; } static void drain_mem_cache(struct bpf_mem_cache *c) From f0a42ab5890f749626b35f9fddd8d0704fc89524 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 8 Sep 2023 21:39:23 +0800 Subject: [PATCH 062/333] selftests/bpf: Test all valid alloc sizes for bpf mem allocator Add a test to test all possible and valid allocation size for bpf memory allocator. For each possible allocation size, the test uses the following two steps to test the alloc and free path: 1) allocate N (N > high_watermark) objects to trigger the refill executed in irq_work. 2) free N objects to trigger the freeing executed in irq_work. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20230908133923.2675053-5-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/test_bpf_ma.c | 50 +++++++ .../testing/selftests/bpf/progs/test_bpf_ma.c | 123 ++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c create mode 100644 tools/testing/selftests/bpf/progs/test_bpf_ma.c diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c new file mode 100644 index 000000000000..0cca4e8ae38e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include "test_bpf_ma.skel.h" + +void test_test_bpf_ma(void) +{ + struct test_bpf_ma *skel; + struct btf *btf; + int i, err; + + skel = test_bpf_ma__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + btf = bpf_object__btf(skel->obj); + if (!ASSERT_OK_PTR(btf, "btf")) + goto out; + + for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) { + char name[32]; + int id; + + snprintf(name, sizeof(name), "bin_data_%u", skel->rodata->data_sizes[i]); + id = btf__find_by_name_kind(btf, name, BTF_KIND_STRUCT); + if (!ASSERT_GT(id, 0, "bin_data")) + goto out; + skel->rodata->data_btf_ids[i] = id; + } + + err = test_bpf_ma__load(skel); + if (!ASSERT_OK(err, "load")) + goto out; + + err = test_bpf_ma__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto out; + + skel->bss->pid = getpid(); + usleep(1); + ASSERT_OK(skel->bss->err, "test error"); +out: + test_bpf_ma__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c new file mode 100644 index 000000000000..ecde41ae0fc8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include +#include +#include + +#include "bpf_experimental.h" +#include "bpf_misc.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +struct generic_map_value { + void *data; +}; + +char _license[] SEC("license") = "GPL"; + +const unsigned int data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096}; +const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {}; + +int err = 0; +int pid = 0; + +#define DEFINE_ARRAY_WITH_KPTR(_size) \ + struct bin_data_##_size { \ + char data[_size - sizeof(void *)]; \ + }; \ + struct map_value_##_size { \ + struct bin_data_##_size __kptr * data; \ + /* To emit BTF info for bin_data_xx */ \ + struct bin_data_##_size not_used; \ + }; \ + struct { \ + __uint(type, BPF_MAP_TYPE_ARRAY); \ + __type(key, int); \ + __type(value, struct map_value_##_size); \ + __uint(max_entries, 128); \ + } array_##_size SEC(".maps"); + +static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int batch, + unsigned int idx) +{ + struct generic_map_value *value; + unsigned int i, key; + void *old, *new; + + for (i = 0; i < batch; i++) { + key = i; + value = bpf_map_lookup_elem(map, &key); + if (!value) { + err = 1; + return; + } + new = bpf_obj_new_impl(data_btf_ids[idx], NULL); + if (!new) { + err = 2; + return; + } + old = bpf_kptr_xchg(&value->data, new); + if (old) { + bpf_obj_drop(old); + err = 3; + return; + } + } + for (i = 0; i < batch; i++) { + key = i; + value = bpf_map_lookup_elem(map, &key); + if (!value) { + err = 4; + return; + } + old = bpf_kptr_xchg(&value->data, NULL); + if (!old) { + err = 5; + return; + } + bpf_obj_drop(old); + } +} + +#define CALL_BATCH_ALLOC_FREE(size, batch, idx) \ + batch_alloc_free((struct bpf_map *)(&array_##size), batch, idx) + +DEFINE_ARRAY_WITH_KPTR(8); +DEFINE_ARRAY_WITH_KPTR(16); +DEFINE_ARRAY_WITH_KPTR(32); +DEFINE_ARRAY_WITH_KPTR(64); +DEFINE_ARRAY_WITH_KPTR(96); +DEFINE_ARRAY_WITH_KPTR(128); +DEFINE_ARRAY_WITH_KPTR(192); +DEFINE_ARRAY_WITH_KPTR(256); +DEFINE_ARRAY_WITH_KPTR(512); +DEFINE_ARRAY_WITH_KPTR(1024); +DEFINE_ARRAY_WITH_KPTR(2048); +DEFINE_ARRAY_WITH_KPTR(4096); + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int test_bpf_mem_alloc_free(void *ctx) +{ + if ((u32)bpf_get_current_pid_tgid() != pid) + return 0; + + /* Alloc 128 8-bytes objects in batch to trigger refilling, + * then free 128 8-bytes objects in batch to trigger freeing. + */ + CALL_BATCH_ALLOC_FREE(8, 128, 0); + CALL_BATCH_ALLOC_FREE(16, 128, 1); + CALL_BATCH_ALLOC_FREE(32, 128, 2); + CALL_BATCH_ALLOC_FREE(64, 128, 3); + CALL_BATCH_ALLOC_FREE(96, 128, 4); + CALL_BATCH_ALLOC_FREE(128, 128, 5); + CALL_BATCH_ALLOC_FREE(192, 128, 6); + CALL_BATCH_ALLOC_FREE(256, 128, 7); + CALL_BATCH_ALLOC_FREE(512, 64, 8); + CALL_BATCH_ALLOC_FREE(1024, 32, 9); + CALL_BATCH_ALLOC_FREE(2048, 16, 10); + CALL_BATCH_ALLOC_FREE(4096, 8, 11); + + return 0; +} From 7cb779a6867fea00b4209bcf6de2f178a743247d Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 11 Sep 2023 12:47:30 -0700 Subject: [PATCH 063/333] bpf: Clarify error expectations from bpf_clone_redirect Commit 151e887d8ff9 ("veth: Fixing transmit return status for dropped packets") exposed the fact that bpf_clone_redirect is capable of returning raw NET_XMIT_XXX return codes. This is in the conflict with its UAPI doc which says the following: "0 on success, or a negative error in case of failure." Update the UAPI to reflect the fact that bpf_clone_redirect can return positive error numbers, but don't explicitly define their meaning. Reported-by: Daniel Borkmann Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230911194731.286342-1-sdf@google.com --- include/uapi/linux/bpf.h | 4 +++- tools/include/uapi/linux/bpf.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8790b3962e4b..0448700890f7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1962,7 +1962,9 @@ union bpf_attr { * performed again, if the helper is used in combination with * direct packet access. * Return - * 0 on success, or a negative error in case of failure. + * 0 on success, or a negative error in case of failure. Positive + * error indicates a potential drop or congestion in the target + * device. The particular positive error codes are not defined. * * u64 bpf_get_current_pid_tgid(void) * Description diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 8790b3962e4b..0448700890f7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1962,7 +1962,9 @@ union bpf_attr { * performed again, if the helper is used in combination with * direct packet access. * Return - * 0 on success, or a negative error in case of failure. + * 0 on success, or a negative error in case of failure. Positive + * error indicates a potential drop or congestion in the target + * device. The particular positive error codes are not defined. * * u64 bpf_get_current_pid_tgid(void) * Description From b772b70b69046c5b76e3f2eda680f692dee5e6d5 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 11 Sep 2023 12:47:31 -0700 Subject: [PATCH 064/333] selftests/bpf: Update bpf_clone_redirect expected return code Commit 151e887d8ff9 ("veth: Fixing transmit return status for dropped packets") started propagating proper NET_XMIT_DROP error to the caller which means it's now possible to get positive error code when calling bpf_clone_redirect() in this particular test. Update the test to reflect that. Reported-by: Daniel Borkmann Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230911194731.286342-2-sdf@google.com --- tools/testing/selftests/bpf/prog_tests/empty_skb.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c index 3b77d8a422db..261228eb68e8 100644 --- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c @@ -24,6 +24,7 @@ void test_empty_skb(void) int *ifindex; int err; int ret; + int lwt_egress_ret; /* expected retval at lwt/egress */ bool success_on_tc; } tests[] = { /* Empty packets are always rejected. */ @@ -57,6 +58,7 @@ void test_empty_skb(void) .data_size_in = sizeof(eth_hlen), .ifindex = &veth_ifindex, .ret = -ERANGE, + .lwt_egress_ret = -ERANGE, .success_on_tc = true, }, { @@ -70,6 +72,7 @@ void test_empty_skb(void) .data_size_in = sizeof(eth_hlen), .ifindex = &ipip_ifindex, .ret = -ERANGE, + .lwt_egress_ret = -ERANGE, }, /* ETH_HLEN+1-sized packet should be redirected. */ @@ -79,6 +82,7 @@ void test_empty_skb(void) .data_in = eth_hlen_pp, .data_size_in = sizeof(eth_hlen_pp), .ifindex = &veth_ifindex, + .lwt_egress_ret = 1, /* veth_xmit NET_XMIT_DROP */ }, { .msg = "ipip ETH_HLEN+1 packet ingress", @@ -108,8 +112,12 @@ void test_empty_skb(void) for (i = 0; i < ARRAY_SIZE(tests); i++) { bpf_object__for_each_program(prog, bpf_obj->obj) { - char buf[128]; + bool at_egress = strstr(bpf_program__name(prog), "egress") != NULL; bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2); + int expected_ret; + char buf[128]; + + expected_ret = at_egress && !at_tc ? tests[i].lwt_egress_ret : tests[i].ret; tattr.data_in = tests[i].data_in; tattr.data_size_in = tests[i].data_size_in; @@ -128,7 +136,7 @@ void test_empty_skb(void) if (at_tc && tests[i].success_on_tc) ASSERT_GE(bpf_obj->bss->ret, 0, buf); else - ASSERT_EQ(bpf_obj->bss->ret, tests[i].ret, buf); + ASSERT_EQ(bpf_obj->bss->ret, expected_ret, buf); } } From df203da47f4428bc286fc99318936416253a321c Mon Sep 17 00:00:00 2001 From: Nigel Croxon Date: Mon, 11 Sep 2023 14:25:23 -0700 Subject: [PATCH 065/333] md/raid1: fix error: ISO C90 forbids mixed declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a compile error when this commit is added: md: raid1: fix potential OOB in raid1_remove_disk() drivers/md/raid1.c: In function 'raid1_remove_disk': drivers/md/raid1.c:1844:9: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement] 1844 |         struct raid1_info *p = conf->mirrors + number;     |         ^~~~~~ That's because the new code was inserted before the struct. The change is move the struct command above this commit. Fixes: 8b0472b50bcf ("md: raid1: fix potential OOB in raid1_remove_disk()") Signed-off-by: Nigel Croxon Signed-off-by: Song Liu Link: https://lore.kernel.org/r/46d929d0-2aab-4cf2-b2bf-338963e8ba5a@redhat.com --- drivers/md/raid1.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 4b30a1742162..2aabac773fe7 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1837,12 +1837,11 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) struct r1conf *conf = mddev->private; int err = 0; int number = rdev->raid_disk; + struct raid1_info *p = conf->mirrors + number; if (unlikely(number >= conf->raid_disks)) goto abort; - struct raid1_info *p = conf->mirrors + number; - if (rdev != p->rdev) p = conf->mirrors + conf->raid_disks + number; From 81faf9e0c3d39d47c6825469591d60a2cd0bbe10 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 28 Aug 2023 14:18:23 -0400 Subject: [PATCH 066/333] drm/amdkfd: Fix reg offset for setting CWSR grace period This patch fixes the case where the code currently passes absolute register address and not the reg offset, which HWS expects, when sending the PM4 packet to set/update CWSR grace period. Additionally, cleanup the signature of build_grace_period_packet_info function as it no longer needs the inst parameter. Signed-off-by: Mukul Joshi Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 3 +-- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 +-- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c | 3 +-- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 3 +-- 7 files changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index f1f2c24de081..69810b3f1c63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -980,8 +980,7 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t wait_times, uint32_t grace_period, uint32_t *reg_offset, - uint32_t *reg_data, - uint32_t inst) + uint32_t *reg_data) { *reg_data = wait_times; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h index ecaead24e8c9..67bcaa3d4226 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h @@ -55,5 +55,4 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t wait_times, uint32_t grace_period, uint32_t *reg_offset, - uint32_t *reg_data, - uint32_t inst); + uint32_t *reg_data); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index fa5ee96f8845..3c45a188b701 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1103,8 +1103,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t wait_times, uint32_t grace_period, uint32_t *reg_offset, - uint32_t *reg_data, - uint32_t inst) + uint32_t *reg_data) { *reg_data = wait_times; @@ -1120,8 +1119,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, SCH_WAVE, grace_period); - *reg_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), - mmCP_IQ_WAIT_TIME2); + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); } void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 936e501908ce..ce424615f59b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -100,5 +100,4 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t wait_times, uint32_t grace_period, uint32_t *reg_offset, - uint32_t *reg_data, - uint32_t inst); + uint32_t *reg_data); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index b166f30f083e..8a6cb41444a4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1677,8 +1677,7 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm->dev->kfd2kgd->build_grace_period_packet_info( dqm->dev->adev, dqm->wait_times, grace_period, ®_offset, - &dqm->wait_times, - ffs(dqm->dev->xcc_mask) - 1); + &dqm->wait_times); } dqm_unlock(dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 8ce6f5200905..1a03173e2313 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -299,8 +299,7 @@ static int pm_set_grace_period_v9(struct packet_manager *pm, pm->dqm->wait_times, grace_period, ®_offset, - ®_data, - 0); + ®_data); if (grace_period == USE_DEFAULT_GRACE_PERIOD) reg_data = pm->dqm->wait_times; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 8433f99f6667..f3f40dbb8ff7 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -326,8 +326,7 @@ struct kfd2kgd_calls { uint32_t wait_times, uint32_t grace_period, uint32_t *reg_offset, - uint32_t *reg_data, - uint32_t inst); + uint32_t *reg_data); void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid, int *wave_cnt, int *max_waves_per_cu, uint32_t inst); void (*program_trap_handler_settings)(struct amdgpu_device *adev, From 2f06b27444f928a79389b149247508bdad54252b Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 29 Aug 2023 12:06:09 -0400 Subject: [PATCH 067/333] drm/amdkfd: Fix unaligned 64-bit doorbell warning This patch fixes the following unaligned 64-bit doorbell warning seen when submitting packets on HIQ on GFX v9.4.3 by making the HIQ doorbell 64-bit aligned. The warning is seen when GPU is loaded in any mode other than SPX mode. [ +0.000301] ------------[ cut here ]------------ [ +0.000003] Unaligned 64-bit doorbell [ +0.000030] WARNING: /amdkfd/kfd_doorbell.c:339 write_kernel_doorbell64+0x72/0x80 [ +0.000003] RIP: 0010:write_kernel_doorbell64+0x72/0x80 [ +0.000004] RSP: 0018:ffffc90004287730 EFLAGS: 00010246 [ +0.000005] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ +0.000003] RDX: 0000000000000001 RSI: ffffffff82837c71 RDI: 00000000ffffffff [ +0.000003] RBP: ffffc90004287748 R08: 0000000000000003 R09: 0000000000000001 [ +0.000002] R10: 000000000000001a R11: ffff88a034008198 R12: ffffc900013bd004 [ +0.000003] R13: 0000000000000008 R14: ffffc900042877b0 R15: 000000000000007f [ +0.000003] FS: 00007fa8c7b62000(0000) GS:ffff889f88400000(0000) knlGS:0000000000000000 [ +0.000004] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000003] CR2: 000056111c45aaf0 CR3: 00000001414f2002 CR4: 0000000000770ee0 [ +0.000003] PKRU: 55555554 [ +0.000002] Call Trace: [ +0.000004] [ +0.000006] kq_submit_packet+0x45/0x50 [amdgpu] [ +0.000524] pm_send_set_resources+0x7f/0xc0 [amdgpu] [ +0.000500] set_sched_resources+0xe4/0x160 [amdgpu] [ +0.000503] start_cpsch+0x1c5/0x2a0 [amdgpu] [ +0.000497] kgd2kfd_device_init.cold+0x816/0xb42 [amdgpu] [ +0.000743] amdgpu_amdkfd_device_init+0x15f/0x1f0 [amdgpu] [ +0.000602] amdgpu_device_init.cold+0x1813/0x2176 [amdgpu] [ +0.000684] ? pci_bus_read_config_word+0x4a/0x80 [ +0.000012] ? do_pci_enable_device+0xdc/0x110 [ +0.000008] amdgpu_driver_load_kms+0x1a/0x110 [amdgpu] [ +0.000545] amdgpu_pci_probe+0x197/0x400 [amdgpu] Fixes: c31866651086 ("drm/amdgpu: use doorbell mgr for kfd kernel doorbells") Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index c2e0b79dcc6d..7b38537c7c99 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -162,6 +162,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, return NULL; *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx); + inx *= 2; pr_debug("Get kernel queue doorbell\n" " doorbell offset == 0x%08X\n" @@ -176,6 +177,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) unsigned int inx; inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); + inx /= 2; mutex_lock(&kfd->doorbell_mutex); __clear_bit(inx, kfd->doorbell_bitmap); From 97e3c6a853f2af9145daf0c6ca25bcdf55c759d4 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 25 Aug 2023 11:59:09 -0400 Subject: [PATCH 068/333] drm/amdgpu: Store CU info from all XCCs for GFX v9.4.3 Currently, we store CU info only for a single XCC assuming that it is the same for all XCCs. However, that may not be true. As a result, store CU info for all XCCs. This info is later used for CU masking. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 72 +++++++++---------- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 8 ++- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 11 ++- .../gpu/drm/amd/include/kgd_kfd_interface.h | 6 +- 14 files changed, 58 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index cdf6087706aa..25d5fda5b243 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -478,7 +478,7 @@ void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *c cu_info->cu_active_number = acu_info.number; cu_info->cu_ao_mask = acu_info.ao_cu_mask; memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], - sizeof(acu_info.bitmap)); + sizeof(cu_info->cu_bitmap)); cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 395c1768b9fc..0ca95c4d4bfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -43,6 +43,7 @@ #define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L #define AMDGPU_MAX_GC_INSTANCES 8 +#define KGD_MAX_QUEUES 128 #define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES @@ -257,7 +258,7 @@ struct amdgpu_cu_info { uint32_t number; uint32_t ao_cu_mask; uint32_t ao_cu_bitmap[4][4]; - uint32_t bitmap[4][4]; + uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4]; }; struct amdgpu_gfx_ras { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 99f4df133ed3..2cd2ecebf465 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -839,7 +839,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0], sizeof(adev->gfx.cu_info.ao_cu_bitmap)); memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], - sizeof(adev->gfx.cu_info.bitmap)); + sizeof(dev_info->cu_bitmap)); dev_info->vram_type = adev->gmc.vram_type; dev_info->vram_bit_width = adev->gmc.vram_width; dev_info->vce_harvest_config = adev->vce.harvest_config; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 0aee9c8288a2..9032d7a24d7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9449,7 +9449,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh( adev, disable_masks[i * 2 + j]); bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 5c3db694afa8..762d7a19f1be 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6368,7 +6368,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} */ - cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap; + cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index da6caff78c22..34f9211b2679 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3577,7 +3577,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) gfx_v6_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); bitmap = gfx_v6_0_get_cu_enabled(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 90b034b173c1..c2faf6b4c2fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -5119,7 +5119,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) gfx_v7_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); bitmap = gfx_v7_0_get_cu_active_bitmap(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 51c1745c8369..885ebd703260 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -7121,7 +7121,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) gfx_v8_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); bitmap = gfx_v8_0_get_cu_active_bitmap(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 458faf657042..fd61574a737c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1499,7 +1499,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { - if (cu_info->bitmap[i][j] & mask) { + if (cu_info->bitmap[0][i][j] & mask) { if (counter == pg_always_on_cu_num) WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); if (counter < always_on_cu_num) @@ -7233,7 +7233,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, * SE6,SH0 --> bitmap[2][1] * SE7,SH0 --> bitmap[3][1] */ - cu_info->bitmap[i % 4][j + i / 4] = bitmap; + cu_info->bitmap[0][i % 4][j + i / 4] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 0a26a00074a6..18ce5fe45f6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4259,7 +4259,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) } static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, - u32 bitmap) + u32 bitmap, int xcc_id) { u32 data; @@ -4269,15 +4269,15 @@ static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; - WREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data); } -static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev) +static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_id) { u32 data, mask; - data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_GC_SHADER_ARRAY_CONFIG); - data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG); + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG); data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; @@ -4290,7 +4290,7 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev) static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info) { - int i, j, k, counter, active_cu_number = 0; + int i, j, k, counter, xcc_id, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; unsigned disable_masks[4 * 4]; @@ -4309,46 +4309,38 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, adev->gfx.config.max_sh_per_se); mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - mask = 1; - ao_bitmap = 0; - counter = 0; - gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, 0); - gfx_v9_4_3_set_user_cu_inactive_bitmap( - adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); - bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev); + for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + mask = 1; + ao_bitmap = 0; + counter = 0; + gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id); + gfx_v9_4_3_set_user_cu_inactive_bitmap( + adev, + disable_masks[i * adev->gfx.config.max_sh_per_se + j], + xcc_id); + bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev, xcc_id); - /* - * The bitmap(and ao_cu_bitmap) in cu_info structure is - * 4x4 size array, and it's usually suitable for Vega - * ASICs which has 4*2 SE/SH layout. - * But for Arcturus, SE/SH layout is changed to 8*1. - * To mostly reduce the impact, we make it compatible - * with current bitmap array as below: - * SE4,SH0 --> bitmap[0][1] - * SE5,SH0 --> bitmap[1][1] - * SE6,SH0 --> bitmap[2][1] - * SE7,SH0 --> bitmap[3][1] - */ - cu_info->bitmap[i % 4][j + i / 4] = bitmap; + cu_info->bitmap[xcc_id][i][j] = bitmap; - for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { - if (bitmap & mask) { - if (counter < adev->gfx.config.max_cu_per_sh) - ao_bitmap |= mask; - counter++; + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { + if (bitmap & mask) { + if (counter < adev->gfx.config.max_cu_per_sh) + ao_bitmap |= mask; + counter++; + } + mask <<= 1; } - mask <<= 1; + active_cu_number += counter; + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } - active_cu_number += counter; - if (i < 2 && j < 2) - ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); - cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 86fb7ac7982a..f76b7aee5c0a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -2087,7 +2087,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); cu->num_simd_per_cu = cu_info.simd_per_cu; - cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; + cu->num_simd_cores = cu_info.simd_per_cu * + (cu_info.cu_active_number / kdev->kfd->num_nodes); cu->max_waves_simd = cu_info.max_waves_per_simd; cu->wave_front_size = cu_info.wave_front_size; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index d01bb57733b3..763966236658 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -104,11 +104,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0); uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1; int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1; + uint32_t cu_active_per_node; amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info); - if (cu_mask_count > cu_info.cu_active_number) - cu_mask_count = cu_info.cu_active_number; + cu_active_per_node = cu_info.cu_active_number / mm->dev->kfd->num_nodes; + if (cu_mask_count > cu_active_per_node) + cu_mask_count = cu_active_per_node; /* Exceeding these bounds corrupts the stack and indicates a coding error. * Returning with no CU's enabled will hang the queue, which should be @@ -141,7 +143,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, for (se = 0; se < cu_info.num_shader_engines; se++) for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) cu_per_sh[se][sh] = hweight32( - cu_info.cu_bitmap[se % 4][sh + (se / 4) * cu_bitmap_sh_mul]); + cu_info.cu_bitmap[0][se % 4][sh + (se / 4) * cu_bitmap_sh_mul]); /* Symmetrically map cu_mask to all SEs & SHs: * se_mask programs up to 2 SH in the upper and lower 16 bits. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index ff98fded9534..c54795682dfb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -450,8 +450,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count", dev->node_props.cpu_cores_count); sysfs_show_32bit_prop(buffer, offs, "simd_count", - dev->gpu ? (dev->node_props.simd_count * - NUM_XCC(dev->gpu->xcc_mask)) : 0); + dev->gpu ? dev->node_props.simd_count : 0); sysfs_show_32bit_prop(buffer, offs, "mem_banks_count", dev->node_props.mem_banks_count); sysfs_show_32bit_prop(buffer, offs, "caches_count", @@ -1604,7 +1603,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, int i, j, k; struct kfd_cache_properties *pcache = NULL; - cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; + cu_sibling_map_mask = cu_info->cu_bitmap[0][0][0]; cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); first_active_cu = ffs(cu_sibling_map_mask); @@ -1647,7 +1646,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); k += 4; - cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4]; + cu_sibling_map_mask = cu_info->cu_bitmap[0][i % 4][j + i / 4]; cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); } } @@ -1708,8 +1707,8 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, - pcu_info->cu_bitmap[i % 4][j + i / 4], ct, - cu_processor_id, k); + pcu_info->cu_bitmap[0][i % 4][j + i / 4], ct, + cu_processor_id, k); if (ret < 0) break; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index f3f40dbb8ff7..3b5a56585c4b 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -31,12 +31,12 @@ #include #include #include +#include "amdgpu_irq.h" +#include "amdgpu_gfx.h" struct pci_dev; struct amdgpu_device; -#define KGD_MAX_QUEUES 128 - struct kfd_dev; struct kgd_mem; @@ -68,7 +68,7 @@ struct kfd_cu_info { uint32_t wave_front_size; uint32_t max_scratch_slots_per_cu; uint32_t lds_size; - uint32_t cu_bitmap[4][4]; + uint32_t cu_bitmap[AMDGPU_MAX_GC_INSTANCES][4][4]; }; /* For getting GPU local memory information from KGD */ From 0752e66e91fa86fa5481b04b22053363833ffb85 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 25 Aug 2023 12:18:06 -0400 Subject: [PATCH 069/333] drm/amdkfd: Update cache info reporting for GFX v9.4.3 Update cache info reporting in sysfs to report the correct number of CUs and associated cache information based on different spatial partitioning modes. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 4 ++ drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 74 +++++++++++++---------- drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 2 +- 3 files changed, 47 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 387a8ef49385..74c2d7a0d628 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -79,6 +79,10 @@ struct crat_header { #define CRAT_SUBTYPE_IOLINK_AFFINITY 5 #define CRAT_SUBTYPE_MAX 6 +/* + * Do not change the value of CRAT_SIBLINGMAP_SIZE from 32 + * as it breaks the ABI. + */ #define CRAT_SIBLINGMAP_SIZE 32 /* diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index c54795682dfb..c8c75ff7cea8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1596,14 +1596,17 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, struct kfd_gpu_cache_info *pcache_info, struct kfd_cu_info *cu_info, - int cache_type, unsigned int cu_processor_id) + int cache_type, unsigned int cu_processor_id, + struct kfd_node *knode) { unsigned int cu_sibling_map_mask; int first_active_cu; - int i, j, k; + int i, j, k, xcc, start, end; struct kfd_cache_properties *pcache = NULL; - cu_sibling_map_mask = cu_info->cu_bitmap[0][0][0]; + start = ffs(knode->xcc_mask) - 1; + end = start + NUM_XCC(knode->xcc_mask); + cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0]; cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); first_active_cu = ffs(cu_sibling_map_mask); @@ -1638,16 +1641,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); k = 0; - for (i = 0; i < cu_info->num_shader_engines; i++) { - for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { - pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); - pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); - pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); - pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); - k += 4; + for (xcc = start; xcc < end; xcc++) { + for (i = 0; i < cu_info->num_shader_engines; i++) { + for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { + pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); + pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); + pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); + pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); + k += 4; - cu_sibling_map_mask = cu_info->cu_bitmap[0][i % 4][j + i / 4]; - cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); + cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4]; + cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); + } } } pcache->sibling_map_size = k; @@ -1665,7 +1670,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev) { struct kfd_gpu_cache_info *pcache_info = NULL; - int i, j, k; + int i, j, k, xcc, start, end; int ct = 0; unsigned int cu_processor_id; int ret; @@ -1699,37 +1704,42 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct * then it will consider only one CU from * the shared unit */ + start = ffs(kdev->xcc_mask) - 1; + end = start + NUM_XCC(kdev->xcc_mask); + for (ct = 0; ct < num_of_cache_types; ct++) { cu_processor_id = gpu_processor_id; if (pcache_info[ct].cache_level == 1) { - for (i = 0; i < pcu_info->num_shader_engines; i++) { - for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { - for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { + for (xcc = start; xcc < end; xcc++) { + for (i = 0; i < pcu_info->num_shader_engines; i++) { + for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { + for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { - ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, - pcu_info->cu_bitmap[0][i % 4][j + i / 4], ct, - cu_processor_id, k); + ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, + pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct, + cu_processor_id, k); - if (ret < 0) - break; + if (ret < 0) + break; - if (!ret) { - num_of_entries++; - list_add_tail(&props_ext->list, &dev->cache_props); + if (!ret) { + num_of_entries++; + list_add_tail(&props_ext->list, &dev->cache_props); + } + + /* Move to next CU block */ + num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= + pcu_info->num_cu_per_sh) ? + pcache_info[ct].num_cu_shared : + (pcu_info->num_cu_per_sh - k); + cu_processor_id += num_cu_shared; } - - /* Move to next CU block */ - num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= - pcu_info->num_cu_per_sh) ? - pcache_info[ct].num_cu_shared : - (pcu_info->num_cu_per_sh - k); - cu_processor_id += num_cu_shared; } } } } else { ret = fill_in_l2_l3_pcache(&props_ext, pcache_info, - pcu_info, ct, cu_processor_id); + pcu_info, ct, cu_processor_id, kdev); if (ret < 0) break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index dea32a9e5506..27386ce9a021 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -89,7 +89,7 @@ struct kfd_mem_properties { struct attribute attr; }; -#define CACHE_SIBLINGMAP_SIZE 64 +#define CACHE_SIBLINGMAP_SIZE 128 struct kfd_cache_properties { struct list_head list; From fc6efed2c728c9c10b058512fc9c1613f870a8e8 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 22 Aug 2023 11:35:25 -0400 Subject: [PATCH 070/333] drm/amdkfd: Update CU masking for GFX 9.4.3 The CU mask passed from user-space will change based on different spatial partitioning mode. As a result, update CU masking code for GFX9.4.3 to work for all partitioning modes. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 28 +++++++++--- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 44 ++++++++++++------- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 2 +- 7 files changed, 55 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 763966236658..447829c22295 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -97,14 +97,16 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, const uint32_t *cu_mask, uint32_t cu_mask_count, - uint32_t *se_mask) + uint32_t *se_mask, uint32_t inst) { struct kfd_cu_info cu_info; uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0}; bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0); uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1; - int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1; + int i, se, sh, cu, cu_bitmap_sh_mul, cu_inc = wgp_mode_req ? 2 : 1; uint32_t cu_active_per_node; + int inc = cu_inc * NUM_XCC(mm->dev->xcc_mask); + int xcc_inst = inst + ffs(mm->dev->xcc_mask) - 1; amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info); @@ -143,7 +145,8 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, for (se = 0; se < cu_info.num_shader_engines; se++) for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) cu_per_sh[se][sh] = hweight32( - cu_info.cu_bitmap[0][se % 4][sh + (se / 4) * cu_bitmap_sh_mul]); + cu_info.cu_bitmap[xcc_inst][se % 4][sh + (se / 4) * + cu_bitmap_sh_mul]); /* Symmetrically map cu_mask to all SEs & SHs: * se_mask programs up to 2 SH in the upper and lower 16 bits. @@ -166,20 +169,33 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, * cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1) * ... * + * For GFX 9.4.3, the following code only looks at a + * subset of the cu_mask corresponding to the inst parameter. + * If we have n XCCs under one GPU node + * cu_mask[0] bit0 -> XCC0 se_mask[0] bit0 (XCC0,SE0,SH0,CU0) + * cu_mask[0] bit1 -> XCC1 se_mask[0] bit0 (XCC1,SE0,SH0,CU0) + * .. + * cu_mask[0] bitn -> XCCn se_mask[0] bit0 (XCCn,SE0,SH0,CU0) + * cu_mask[0] bit n+1 -> XCC0 se_mask[1] bit0 (XCC0,SE1,SH0,CU0) + * + * For example, if there are 6 XCCs under 1 KFD node, this code + * running for each inst, will look at the bits as: + * inst, inst + 6, inst + 12... + * * First ensure all CUs are disabled, then enable user specified CUs. */ for (i = 0; i < cu_info.num_shader_engines; i++) se_mask[i] = 0; - i = 0; - for (cu = 0; cu < 16; cu += inc) { + i = inst; + for (cu = 0; cu < 16; cu += cu_inc) { for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) { for (se = 0; se < cu_info.num_shader_engines; se++) { if (cu_per_sh[se][sh] > cu) { if (cu_mask[i / 32] & (en_mask << (i % 32))) se_mask[se] |= en_mask << (cu + sh * 16); i += inc; - if (i == cu_mask_count) + if (i >= cu_mask_count) return; } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 23158db7da03..57bf5e513f4d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -138,7 +138,7 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, const uint32_t *cu_mask, uint32_t cu_mask_count, - uint32_t *se_mask); + uint32_t *se_mask, uint32_t inst); int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index ee1d32d957f2..1a4a69943c71 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, return; mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0); m = get_mqd(mqd); m->compute_static_thread_mgmt_se0 = se_mask[0]; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 83699392c808..8b7fed913526 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, return; mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0); m = get_mqd(mqd); m->compute_static_thread_mgmt_se0 = se_mask[0]; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 0bbf0edbabd4..964b5d50a77e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -71,7 +71,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, } mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0); m->compute_static_thread_mgmt_se0 = se_mask[0]; m->compute_static_thread_mgmt_se1 = se_mask[1]; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index e23d32f35607..42d881809dc7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -60,7 +60,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) } static void update_cu_mask(struct mqd_manager *mm, void *mqd, - struct mqd_update_info *minfo) + struct mqd_update_info *minfo, uint32_t inst) { struct v9_mqd *m; uint32_t se_mask[KFD_MAX_NUM_SE] = {0}; @@ -69,27 +69,36 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, return; mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, inst); m = get_mqd(mqd); + m->compute_static_thread_mgmt_se0 = se_mask[0]; m->compute_static_thread_mgmt_se1 = se_mask[1]; m->compute_static_thread_mgmt_se2 = se_mask[2]; m->compute_static_thread_mgmt_se3 = se_mask[3]; - m->compute_static_thread_mgmt_se4 = se_mask[4]; - m->compute_static_thread_mgmt_se5 = se_mask[5]; - m->compute_static_thread_mgmt_se6 = se_mask[6]; - m->compute_static_thread_mgmt_se7 = se_mask[7]; + if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) { + m->compute_static_thread_mgmt_se4 = se_mask[4]; + m->compute_static_thread_mgmt_se5 = se_mask[5]; + m->compute_static_thread_mgmt_se6 = se_mask[6]; + m->compute_static_thread_mgmt_se7 = se_mask[7]; - pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n", - m->compute_static_thread_mgmt_se0, - m->compute_static_thread_mgmt_se1, - m->compute_static_thread_mgmt_se2, - m->compute_static_thread_mgmt_se3, - m->compute_static_thread_mgmt_se4, - m->compute_static_thread_mgmt_se5, - m->compute_static_thread_mgmt_se6, - m->compute_static_thread_mgmt_se7); + pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n", + m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, + m->compute_static_thread_mgmt_se2, + m->compute_static_thread_mgmt_se3, + m->compute_static_thread_mgmt_se4, + m->compute_static_thread_mgmt_se5, + m->compute_static_thread_mgmt_se6, + m->compute_static_thread_mgmt_se7); + } else { + pr_debug("inst: %u, update cu mask to %#x %#x %#x %#x\n", + inst, m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, + m->compute_static_thread_mgmt_se2, + m->compute_static_thread_mgmt_se3); + } } static void set_priority(struct v9_mqd *m, struct queue_properties *q) @@ -290,7 +299,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) m->cp_hqd_ctx_save_control = 0; - update_cu_mask(mm, mqd, minfo); + if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) + update_cu_mask(mm, mqd, minfo, 0); set_priority(m, q); q->is_active = QUEUE_IS_ACTIVE(*q); @@ -676,6 +686,8 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd + size * xcc); update_mqd(mm, m, q, minfo); + update_cu_mask(mm, mqd, minfo, xcc); + if (q->format == KFD_QUEUE_FORMAT_AQL) { switch (xcc) { case 0: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 657c37822980..3e1a574d4ea6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -55,7 +55,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, return; mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0); m = get_mqd(mqd); m->compute_static_thread_mgmt_se0 = se_mask[0]; From 6be6d112419713334ddd9c01f219ca16adaa4c76 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Fri, 8 Sep 2023 08:57:02 +0800 Subject: [PATCH 071/333] blk-mq: fix tags UAF when shrinking q->nr_hw_queues When nr_hw_queues shrink, we free the excess tags before realloc'ing hw_ctxs for each queue. During that resize, we may need to access those tags, like blk_mq_tag_idle(hctx) will access queue shared tags. This can cause a slab use-after-free, as reported by KASAN. Fix it by moving the releasing of excess tags to the end. Fixes: e1dd7bc93029 ("blk-mq: fix tags leak when shrink nr_hw_queues") Reported-by: Yi Zhang Closes: https://lore.kernel.org/all/CAHj4cs_CK63uoDpGBGZ6DN4OCTpzkR3UaVgK=LX8Owr8ej2ieQ@mail.gmail.com/ Cc: Ming Lei Signed-off-by: Chengming Zhou Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20230908005702.2183908-1-chengming.zhou@linux.dev Signed-off-by: Jens Axboe --- block/blk-mq.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index ec922c6bccbe..1fafd54dce3c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4405,11 +4405,8 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, struct blk_mq_tags **new_tags; int i; - if (set->nr_hw_queues >= new_nr_hw_queues) { - for (i = new_nr_hw_queues; i < set->nr_hw_queues; i++) - __blk_mq_free_map_and_rqs(set, i); + if (set->nr_hw_queues >= new_nr_hw_queues) goto done; - } new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *), GFP_KERNEL, set->numa_node); @@ -4719,7 +4716,8 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, { struct request_queue *q; LIST_HEAD(head); - int prev_nr_hw_queues; + int prev_nr_hw_queues = set->nr_hw_queues; + int i; lockdep_assert_held(&set->tag_list_lock); @@ -4746,7 +4744,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, blk_mq_sysfs_unregister_hctxs(q); } - prev_nr_hw_queues = set->nr_hw_queues; if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) goto reregister; @@ -4781,6 +4778,10 @@ switch_back: list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_unfreeze_queue(q); + + /* Free the excess tags when nr_hw_queues shrink. */ + for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++) + __blk_mq_free_map_and_rqs(set, i); } void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) From ef064187a9709393a981a56cce1e31880fd97107 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Fri, 8 Sep 2023 16:46:39 +0800 Subject: [PATCH 072/333] drm/amd/display: fix the white screen issue when >= 64GB DRAM Dropping bit 31:4 of page table base is wrong, it makes page table base points to wrong address if phys addr is beyond 64GB; dropping page_table_start/end bit 31:4 is unnecessary since dcn20_vmid_setup will do that. Also, while we are at it, cleanup the assignments using upper_32_bits()/lower_32_bits() and AMDGPU_GPU_PAGE_SHIFT. Cc: stable@vger.kernel.org Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2354 Fixes: 81d0bcf99009 ("drm/amdgpu: make display pinning more flexible (v2)") Acked-by: Harry Wentland Reviewed-by: Alex Deucher Signed-off-by: Yifan Zhang Co-developed-by: Hamza Mahfooz Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 88ba8b66de1f..6a0ea15936ae 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1274,11 +1274,15 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); - page_table_start.high_part = (u32)(adev->gmc.gart_start >> 44) & 0xF; - page_table_start.low_part = (u32)(adev->gmc.gart_start >> 12); - page_table_end.high_part = (u32)(adev->gmc.gart_end >> 44) & 0xF; - page_table_end.low_part = (u32)(adev->gmc.gart_end >> 12); - page_table_base.high_part = upper_32_bits(pt_base) & 0xF; + page_table_start.high_part = upper_32_bits(adev->gmc.gart_start >> + AMDGPU_GPU_PAGE_SHIFT); + page_table_start.low_part = lower_32_bits(adev->gmc.gart_start >> + AMDGPU_GPU_PAGE_SHIFT); + page_table_end.high_part = upper_32_bits(adev->gmc.gart_end >> + AMDGPU_GPU_PAGE_SHIFT); + page_table_end.low_part = lower_32_bits(adev->gmc.gart_end >> + AMDGPU_GPU_PAGE_SHIFT); + page_table_base.high_part = upper_32_bits(pt_base); page_table_base.low_part = lower_32_bits(pt_base); pa_config->system_aperture.start_addr = (uint64_t)logical_addr_low << 18; From 169ed4ece8373f02f10642eae5240e3d1ef5c038 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Fri, 8 Sep 2023 10:36:44 -0400 Subject: [PATCH 073/333] Revert "drm/amd: Disable S/G for APUs when 64GB or more host memory" This reverts commit 70e64c4d522b732e31c6475a3be2349de337d321. Since, we now have an actual fix for this issue, we can get rid of this workaround as it can cause pin failures if enough VRAM isn't carved out by the BIOS. Cc: stable@vger.kernel.org # 6.1+ Acked-by: Harry Wentland Reviewed-by: Alex Deucher Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 ------------------- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++-- 3 files changed, 3 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index dc2d53081e80..a79d53bdbe13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1293,7 +1293,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); -bool amdgpu_sg_display_supported(struct amdgpu_device *adev); bool amdgpu_device_pcie_dynamic_switching_supported(void); bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev); bool amdgpu_device_aspm_support_quirk(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3f001a50b34a..30c4f5cca02c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1244,32 +1244,6 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) return true; } -/* - * On APUs with >= 64GB white flickering has been observed w/ SG enabled. - * Disable S/G on such systems until we have a proper fix. - * https://gitlab.freedesktop.org/drm/amd/-/issues/2354 - * https://gitlab.freedesktop.org/drm/amd/-/issues/2735 - */ -bool amdgpu_sg_display_supported(struct amdgpu_device *adev) -{ - switch (amdgpu_sg_display) { - case -1: - break; - case 0: - return false; - case 1: - return true; - default: - return false; - } - if ((totalram_pages() << (PAGE_SHIFT - 10)) + - (adev->gmc.real_vram_size / 1024) >= 64000000) { - DRM_WARN("Disabling S/G due to >=64GB RAM\n"); - return false; - } - return true; -} - /* * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic * speed switching. Until we have confirmation from Intel that a specific host diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6a0ea15936ae..954906c515aa 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1644,8 +1644,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } break; } - if (init_data.flags.gpu_vm_support) - init_data.flags.gpu_vm_support = amdgpu_sg_display_supported(adev); + if (init_data.flags.gpu_vm_support && + (amdgpu_sg_display == 0)) + init_data.flags.gpu_vm_support = false; if (init_data.flags.gpu_vm_support) adev->mode_info.gpu_vm_support = true; From 679fc891bf11845730b572fc44f8a0eb846aba29 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Tue, 22 Aug 2023 10:02:46 -0400 Subject: [PATCH 074/333] drm/amd/display: Add dirty rect support for Replay Dirty rect can be used with replay, so enable them to allow for more powersaving. Reviewed-by: Sun peng Li Acked-by: Stylon Wang Signed-off-by: Bhawanpreet Lakha Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 954906c515aa..ca129983a08b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8078,7 +8078,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].plane_info = &bundle->plane_infos[planes_count]; - if (acrtc_state->stream->link->psr_settings.psr_feature_enabled) { + if (acrtc_state->stream->link->psr_settings.psr_feature_enabled || + acrtc_state->stream->link->replay_settings.replay_feature_enabled) { fill_dc_dirty_rects(plane, old_plane_state, new_plane_state, new_crtc_state, &bundle->flip_addrs[planes_count], From 81cc8779cf46d6323c83475706b61d9552230274 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 6 Sep 2023 13:54:38 +0300 Subject: [PATCH 075/333] drm/amdgpu: fix retry loop test This loop will exit with "retry" set to -1 if it fails but the code checks for if "retry" is zero. Fix this by changing post-op to a pre-op. --retry vs retry--. Fixes: e01eeffc3f86 ("drm/amd/pm: avoid driver getting empty metrics table for the first time") Reviewed-by: Evan Quan Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 199a673b8120..de80e191a92c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -336,7 +336,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) /* Store one-time values in driver PPTable */ if (!pptable->Init) { - while (retry--) { + while (--retry) { ret = smu_v13_0_6_get_metrics_table(smu, NULL, true); if (ret) return ret; From f5b2c10b57615828b531bb0ae56bd6325a41167e Mon Sep 17 00:00:00 2001 From: Swapnil Patel Date: Thu, 17 Aug 2023 14:04:26 -0400 Subject: [PATCH 076/333] drm/amd/display: Don't check registers, if using AUX BL control [Why] Currently the driver looks DCN registers to access if BL is on or not. This check is not valid if we are using AUX based brightness control. This causes driver to not send out "backlight off" command during power off sequence as it already thinks it is off. [How] Only check DCN registers if we aren't using AUX based brightness control. Reviewed-by: Wenjing Liu Acked-by: Stylon Wang Signed-off-by: Swapnil Patel Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index ad967b58d7be..478281f2a5ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -964,7 +964,9 @@ void dce110_edp_backlight_control( return; } - if (link->panel_cntl) { + if (link->panel_cntl && !(link->dpcd_sink_ext_caps.bits.oled || + link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 || + link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) { bool is_backlight_on = link->panel_cntl->funcs->is_panel_backlight_on(link->panel_cntl); if ((enable && is_backlight_on) || (!enable && !is_backlight_on)) { From 1832403cd41ca6b19b24e9d64f79cb08d920ca44 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 6 Sep 2023 11:35:04 -0400 Subject: [PATCH 077/333] drm/amdgpu/soc21: don't remap HDP registers for SR-IOV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This matches the behavior for soc15 and nv. Acked-by: Christian König Reviewed-by: Timmy Tsai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 40d23738ee4e..8b2ff2b281b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -766,7 +766,7 @@ static int soc21_common_hw_init(void *handle) * for the purpose of expose those registers * to process space */ - if (adev->nbio.funcs->remap_hdp_registers) + if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev)) adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ adev->nbio.funcs->enable_doorbell_aperture(adev, true); From ab43213e7afd08ac68d4282060bacf309e70fd14 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 7 Sep 2023 15:44:54 -0400 Subject: [PATCH 078/333] drm/amdgpu/nbio4.3: set proper rmmio_remap.reg_offset for SR-IOV Needed for HDP flush to work correctly. Reviewed-by: Timmy Tsai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c index d5ed9e0e1a5f..e5b5b0f4940f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c @@ -345,6 +345,9 @@ static void nbio_v4_3_init_registers(struct amdgpu_device *adev) data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK; WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data); } + if (amdgpu_sriov_vf(adev)) + adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, + regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev) From ffd6bde302061aeee405ab364403af30210f0b99 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 8 Sep 2023 21:21:55 +0800 Subject: [PATCH 079/333] drm/amdgpu: fallback to old RAS error message for aqua_vanjaram So driver doesn't generate incorrect message until the new format is settled down for aqua_vanjaram Signed-off-by: Hawking Zhang Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 3c4600e15b86..937c54fc7174 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1052,7 +1052,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, info->ce_count = obj->err_data.ce_count; if (err_data.ce_count) { - if (adev->smuio.funcs && + if (!adev->aid_mask && + adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { dev_info(adev->dev, "socket: %d, die: %d " @@ -1072,7 +1073,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, } } if (err_data.ue_count) { - if (adev->smuio.funcs && + if (!adev->aid_mask && + adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { dev_info(adev->dev, "socket: %d, die: %d " From ec5fa9fcdeca69edf7dab5ca3b2e0ceb1c08fe9a Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Tue, 22 Aug 2023 16:03:17 +0800 Subject: [PATCH 080/333] drm/amd/display: Adjust the MST resume flow [Why] In drm_dp_mst_topology_mgr_resume() today, it will resume the mst branch to be ready handling mst mode and also consecutively do the mst topology probing. Which will cause the dirver have chance to fire hotplug event before restoring the old state. Then Userspace will react to the hotplug event based on a wrong state. [How] Adjust the mst resume flow as: 1. set dpcd to resume mst branch status 2. restore source old state 3. Do mst resume topology probing For drm_dp_mst_topology_mgr_resume(), it's better to adjust it to pull out topology probing work into a 2nd part procedure of the mst resume. Will have a follow up patch in drm. Reviewed-by: Chao-kai Wang Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Stylon Wang Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 93 ++++++++++++++++--- 1 file changed, 80 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ca129983a08b..c6fd34bab358 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2340,14 +2340,62 @@ static int dm_late_init(void *handle) return detect_mst_link_for_all_connectors(adev_to_drm(adev)); } +static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr) +{ + int ret; + u8 guid[16]; + u64 tmp64; + + mutex_lock(&mgr->lock); + if (!mgr->mst_primary) + goto out_fail; + + if (drm_dp_read_dpcd_caps(mgr->aux, mgr->dpcd) < 0) { + drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n"); + goto out_fail; + } + + ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, + DP_MST_EN | + DP_UP_REQ_EN | + DP_UPSTREAM_IS_SRC); + if (ret < 0) { + drm_dbg_kms(mgr->dev, "mst write failed - undocked during suspend?\n"); + goto out_fail; + } + + /* Some hubs forget their guids after they resume */ + ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16); + if (ret != 16) { + drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n"); + goto out_fail; + } + + if (memchr_inv(guid, 0, 16) == NULL) { + tmp64 = get_jiffies_64(); + memcpy(&guid[0], &tmp64, sizeof(u64)); + memcpy(&guid[8], &tmp64, sizeof(u64)); + + ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, guid, 16); + + if (ret != 16) { + drm_dbg_kms(mgr->dev, "check mstb guid failed - undocked during suspend?\n"); + goto out_fail; + } + } + + memcpy(mgr->mst_primary->guid, guid, 16); + +out_fail: + mutex_unlock(&mgr->lock); +} + static void s3_handle_mst(struct drm_device *dev, bool suspend) { struct amdgpu_dm_connector *aconnector; struct drm_connector *connector; struct drm_connector_list_iter iter; struct drm_dp_mst_topology_mgr *mgr; - int ret; - bool need_hotplug = false; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -2369,18 +2417,15 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) if (!dp_is_lttpr_present(aconnector->dc_link)) try_to_configure_aux_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD); - ret = drm_dp_mst_topology_mgr_resume(mgr, true); - if (ret < 0) { - dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx, - aconnector->dc_link); - need_hotplug = true; - } + /* TODO: move resume_mst_branch_status() into drm mst resume again + * once topology probing work is pulled out from mst resume into mst + * resume 2nd step. mst resume 2nd step should be called after old + * state getting restored (i.e. drm_atomic_helper_resume()). + */ + resume_mst_branch_status(mgr); } } drm_connector_list_iter_end(&iter); - - if (need_hotplug) - drm_kms_helper_hotplug_event(dev); } static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev) @@ -2774,7 +2819,8 @@ static int dm_resume(void *handle) struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state); enum dc_connection_type new_connection_type = dc_connection_none; struct dc_state *dc_state; - int i, r, j; + int i, r, j, ret; + bool need_hotplug = false; if (amdgpu_in_reset(adev)) { dc_state = dm->cached_dc_state; @@ -2872,7 +2918,7 @@ static int dm_resume(void *handle) continue; /* - * this is the case when traversing through already created + * this is the case when traversing through already created end sink * MST connectors, should be skipped */ if (aconnector && aconnector->mst_root) @@ -2932,6 +2978,27 @@ static int dm_resume(void *handle) dm->cached_state = NULL; + /* Do mst topology probing after resuming cached state*/ + drm_connector_list_iter_begin(ddev, &iter); + drm_for_each_connector_iter(connector, &iter) { + aconnector = to_amdgpu_dm_connector(connector); + if (aconnector->dc_link->type != dc_connection_mst_branch || + aconnector->mst_root) + continue; + + ret = drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr, true); + + if (ret < 0) { + dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx, + aconnector->dc_link); + need_hotplug = true; + } + } + drm_connector_list_iter_end(&iter); + + if (need_hotplug) + drm_kms_helper_hotplug_event(ddev); + amdgpu_dm_irq_resume_late(adev); amdgpu_dm_smu_write_watermarks_table(adev); From 9296da8c40900b4dae3d973aa22be306e2a77671 Mon Sep 17 00:00:00 2001 From: David Francis Date: Tue, 22 Nov 2022 15:14:32 -0500 Subject: [PATCH 081/333] drm/amdkfd: Checkpoint and restore queues on GFX11 The code in kfd_mqd_manager_v11.c to support criu dump and restore of queue state was missing. Added it; should be equivalent to kfd_mqd_manager_v10.c. CC: Felix Kuehling Reviewed-by: Harish Kasiviswanathan Acked-by: Alex Deucher Signed-off-by: David Francis Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 964b5d50a77e..15277f1d5cf0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, return 0; } +static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst) +{ + struct v11_compute_mqd *m; + + m = get_mqd(mqd); + + memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd)); +} + +static void restore_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *qp, + const void *mqd_src, + const void *ctl_stack_src, const u32 ctl_stack_size) +{ + uint64_t addr; + struct v11_compute_mqd *m; + + m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr; + addr = mqd_mem_obj->gpu_addr; + + memcpy(m, mqd_src, sizeof(*m)); + + *mqd = m; + if (gart_addr) + *gart_addr = addr; + + m->cp_hqd_pq_doorbell_control = + qp->doorbell_off << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", + m->cp_hqd_pq_doorbell_control); + + qp->is_active = 0; +} + + static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -458,6 +495,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->mqd_size = sizeof(struct v11_compute_mqd); mqd->get_wave_state = get_wave_state; mqd->mqd_stride = kfd_mqd_stride; + mqd->checkpoint_mqd = checkpoint_mqd; + mqd->restore_mqd = restore_mqd; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -502,6 +541,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = kfd_destroy_mqd_sdma; mqd->is_occupied = kfd_is_occupied_sdma; + mqd->checkpoint_mqd = checkpoint_mqd; + mqd->restore_mqd = restore_mqd; mqd->mqd_size = sizeof(struct v11_sdma_mqd); mqd->mqd_stride = kfd_mqd_stride; #if defined(CONFIG_DEBUG_FS) From 5e7e82254270c8cf8b107451c5de01cee2f135ae Mon Sep 17 00:00:00 2001 From: David Francis Date: Tue, 5 Sep 2023 10:13:51 -0400 Subject: [PATCH 082/333] drm/amdgpu: Handle null atom context in VBIOS info ioctl On some APU systems, there is no atom context and so the atom_context struct is null. Add a check to the VBIOS_INFO branch of amdgpu_info_ioctl to handle this case, returning all zeroes. Reviewed-by: Alex Deucher Signed-off-by: David Francis Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 2cd2ecebf465..d30dc0b718c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -940,12 +940,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct atom_context *atom_context; atom_context = adev->mode_info.atom_context; - memcpy(vbios_info.name, atom_context->name, sizeof(atom_context->name)); - memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, sizeof(atom_context->vbios_pn)); - vbios_info.version = atom_context->version; - memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str, - sizeof(atom_context->vbios_ver_str)); - memcpy(vbios_info.date, atom_context->date, sizeof(atom_context->date)); + if (atom_context) { + memcpy(vbios_info.name, atom_context->name, + sizeof(atom_context->name)); + memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, + sizeof(atom_context->vbios_pn)); + vbios_info.version = atom_context->version; + memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str, + sizeof(atom_context->vbios_ver_str)); + memcpy(vbios_info.date, atom_context->date, + sizeof(atom_context->date)); + } return copy_to_user(out, &vbios_info, min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0; From db5494a85294f057e0bb41bdb5372c2dbf46fb79 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 10 Sep 2023 16:44:50 -0700 Subject: [PATCH 083/333] drm/amd/display: fix replay_mode kernel-doc warning Fix the typo in the kernel-doc for @replay_mode to prevent kernel-doc warnings: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:623: warning: Incorrect use of kernel-doc format: * @replay mode: Replay supported drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:626: warning: Function parameter or member 'replay_mode' not described in 'amdgpu_hdmi_vsdb_info' Fixes: ec8e59cb4e0c ("drm/amd/display: Get replay info from VSDB") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Bhawanpreet Lakha Cc: Harry Wentland Cc: Alex Deucher Cc: Leo Li Cc: Rodrigo Siqueira Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index a2d34be82613..9e4cc5eeda76 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -620,7 +620,7 @@ struct amdgpu_hdmi_vsdb_info { unsigned int max_refresh_rate_hz; /** - * @replay mode: Replay supported + * @replay_mode: Replay supported */ bool replay_mode; }; From 64be47ba286117ee4e3dd9d064c88ea2913e3269 Mon Sep 17 00:00:00 2001 From: Mustapha Ghaddar Date: Thu, 10 Aug 2023 16:20:23 -0400 Subject: [PATCH 084/333] drm/amd/display: Add DPIA Link Encoder Assignment Fix For DPIA we should have preferred DIG assignment based on DPIA selected as per the ASIC design. Reviewed-by: George Shen Acked-by: Hamza Mahfooz Signed-off-by: Mustapha Ghaddar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../drm/amd/display/dc/core/dc_link_enc_cfg.c | 35 +++++++++++++++---- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../amd/display/dc/dcn314/dcn314_resource.c | 23 ++++++++++++ .../gpu/drm/amd/display/dc/inc/core_types.h | 1 + .../drm/amd/display/dc/link/link_factory.c | 4 +++ 5 files changed, 58 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index 30c0644d4418..b66eeac4d3d2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -169,11 +169,23 @@ static void add_link_enc_assignment( /* Return first available DIG link encoder. */ static enum engine_id find_first_avail_link_enc( const struct dc_context *ctx, - const struct dc_state *state) + const struct dc_state *state, + enum engine_id eng_id_requested) { enum engine_id eng_id = ENGINE_ID_UNKNOWN; int i; + if (eng_id_requested != ENGINE_ID_UNKNOWN) { + + for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) { + eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i]; + if (eng_id == eng_id_requested) + return eng_id; + } + } + + eng_id = ENGINE_ID_UNKNOWN; + for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) { eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i]; if (eng_id != ENGINE_ID_UNKNOWN) @@ -287,7 +299,7 @@ void link_enc_cfg_link_encs_assign( struct dc_stream_state *streams[], uint8_t stream_count) { - enum engine_id eng_id = ENGINE_ID_UNKNOWN; + enum engine_id eng_id = ENGINE_ID_UNKNOWN, eng_id_req = ENGINE_ID_UNKNOWN; int i; int j; @@ -377,8 +389,15 @@ void link_enc_cfg_link_encs_assign( * assigned to that endpoint. */ link_enc = get_link_enc_used_by_link(state, stream->link); - if (link_enc == NULL) - eng_id = find_first_avail_link_enc(stream->ctx, state); + if (link_enc == NULL) { + + if (stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + stream->link->dpia_preferred_eng_id != ENGINE_ID_UNKNOWN) + eng_id_req = stream->link->dpia_preferred_eng_id; + + if (eng_id == ENGINE_ID_UNKNOWN) + eng_id = find_first_avail_link_enc(stream->ctx, state, eng_id_req); + } else eng_id = link_enc->preferred_engine; @@ -402,7 +421,9 @@ void link_enc_cfg_link_encs_assign( DC_LOG_DEBUG("%s: CUR %s(%d) - enc_id(%d)\n", __func__, assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA", - assignment.ep_id.link_id.enum_id - 1, + assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? + assignment.ep_id.link_id.enum_id : + assignment.ep_id.link_id.enum_id - 1, assignment.eng_id); } for (i = 0; i < MAX_PIPES; i++) { @@ -413,7 +434,9 @@ void link_enc_cfg_link_encs_assign( DC_LOG_DEBUG("%s: NEW %s(%d) - enc_id(%d)\n", __func__, assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA", - assignment.ep_id.link_id.enum_id - 1, + assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? + assignment.ep_id.link_id.enum_id : + assignment.ep_id.link_id.enum_id - 1, assignment.eng_id); } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 0d0bef8eb331..31e3183497a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1496,6 +1496,7 @@ struct dc_link { * object creation. */ enum engine_id eng_id; + enum engine_id dpia_preferred_eng_id; bool test_pattern_enabled; enum dp_test_pattern current_test_pattern; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 1c1fb2fa0822..004beed9bd44 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -1032,6 +1032,28 @@ static const struct dce_i2c_mask i2c_masks = { I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) }; +/* ========================================================== */ + +/* + * DPIA index | Preferred Encoder | Host Router + * 0 | C | 0 + * 1 | First Available | 0 + * 2 | D | 1 + * 3 | First Available | 1 + */ +/* ========================================================== */ +static const enum engine_id dpia_to_preferred_enc_id_table[] = { + ENGINE_ID_DIGC, + ENGINE_ID_DIGC, + ENGINE_ID_DIGD, + ENGINE_ID_DIGD +}; + +static enum engine_id dcn314_get_preferred_eng_id_dpia(unsigned int dpia_index) +{ + return dpia_to_preferred_enc_id_table[dpia_index]; +} + static struct dce_i2c_hw *dcn31_i2c_hw_create( struct dc_context *ctx, uint32_t inst) @@ -1785,6 +1807,7 @@ static struct resource_funcs dcn314_res_pool_funcs = { .update_bw_bounding_box = dcn314_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn314_get_panel_config_defaults, + .get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia, }; static struct clock_source *dcn30_clock_source_create( diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 027aec70c070..eaad1260bfd1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -65,6 +65,7 @@ struct resource_context; struct clk_bw_params; struct resource_funcs { + enum engine_id (*get_preferred_eng_id_dpia)(unsigned int dpia_index); void (*destroy)(struct resource_pool **pool); void (*link_init)(struct dc_link *link); struct panel_cntl*(*panel_cntl_create)( diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 195ca9e52eda..0895742a3102 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -791,6 +791,10 @@ static bool construct_dpia(struct dc_link *link, /* Set dpia port index : 0 to number of dpia ports */ link->ddc_hw_inst = init_params->connector_index; + // Assign Dpia preferred eng_id + if (link->dc->res_pool->funcs->get_preferred_eng_id_dpia) + link->dpia_preferred_eng_id = link->dc->res_pool->funcs->get_preferred_eng_id_dpia(link->ddc_hw_inst); + /* TODO: Create link encoder */ link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; From 29319378449035c6fc6391b31a3c2cbaf75be221 Mon Sep 17 00:00:00 2001 From: Mustapha Ghaddar Date: Tue, 22 Aug 2023 16:18:03 -0400 Subject: [PATCH 085/333] drm/amd/display: Fix 2nd DPIA encoder Assignment [HOW & Why] There seems to be an issue with 2nd DPIA acquiring link encoder for tiled displays. Solution is to remove check for eng_id before we get first dynamic encoder for it Reviewed-by: Cruise Hung Reviewed-by: Meenakshikumar Somasundaram Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Stylon Wang Signed-off-by: Mustapha Ghaddar Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index b66eeac4d3d2..be5a6d008b29 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -395,8 +395,7 @@ void link_enc_cfg_link_encs_assign( stream->link->dpia_preferred_eng_id != ENGINE_ID_UNKNOWN) eng_id_req = stream->link->dpia_preferred_eng_id; - if (eng_id == ENGINE_ID_UNKNOWN) - eng_id = find_first_avail_link_enc(stream->ctx, state, eng_id_req); + eng_id = find_first_avail_link_enc(stream->ctx, state, eng_id_req); } else eng_id = link_enc->preferred_engine; @@ -501,7 +500,6 @@ struct dc_link *link_enc_cfg_get_link_using_link_enc( if (stream) link = stream->link; - // dm_output_to_console("%s: No link using DIG(%d).\n", __func__, eng_id); return link; } From a06023a8f78d3e9e73ca4363ccf3871a06e16ecc Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Fri, 8 Sep 2023 20:19:16 +0000 Subject: [PATCH 086/333] selftests/user_events: Fix failures when user_events is not installed When user_events is not installed the self tests currently fail. Now that these self tests run by default we need to ensure they don't fail when user_events was not enabled for the kernel being tested. Add common methods to detect if tracefs and user_events is enabled. If either is not enabled skip the test. If tracefs is enabled, but is not mounted, mount tracefs and fail if there were any errors. Fail if not run as root. Fixes: 68b4d2d58389 ("selftests/user_events: Reenable build") Reported-by: Naresh Kamboju Link: https://lore.kernel.org/all/CA+G9fYuugZ0OMeS6HvpSS4nuf_A3s455ecipGBvER0LJHojKZg@mail.gmail.com/ Signed-off-by: Beau Belgrave Signed-off-by: Shuah Khan --- .../testing/selftests/user_events/abi_test.c | 3 + .../testing/selftests/user_events/dyn_test.c | 2 + .../selftests/user_events/ftrace_test.c | 3 + .../testing/selftests/user_events/perf_test.c | 3 + .../user_events/user_events_selftests.h | 100 ++++++++++++++++++ 5 files changed, 111 insertions(+) create mode 100644 tools/testing/selftests/user_events/user_events_selftests.h diff --git a/tools/testing/selftests/user_events/abi_test.c b/tools/testing/selftests/user_events/abi_test.c index 5125c42efe65..22374d29ffdd 100644 --- a/tools/testing/selftests/user_events/abi_test.c +++ b/tools/testing/selftests/user_events/abi_test.c @@ -19,6 +19,7 @@ #include #include "../kselftest_harness.h" +#include "user_events_selftests.h" const char *data_file = "/sys/kernel/tracing/user_events_data"; const char *enable_file = "/sys/kernel/tracing/events/user_events/__abi_event/enable"; @@ -93,6 +94,8 @@ FIXTURE(user) { }; FIXTURE_SETUP(user) { + USER_EVENT_FIXTURE_SETUP(return); + change_event(false); self->check = 0; } diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c index 91a4444ad42b..32c827a52d7d 100644 --- a/tools/testing/selftests/user_events/dyn_test.c +++ b/tools/testing/selftests/user_events/dyn_test.c @@ -15,6 +15,7 @@ #include #include "../kselftest_harness.h" +#include "user_events_selftests.h" const char *abi_file = "/sys/kernel/tracing/user_events_data"; const char *enable_file = "/sys/kernel/tracing/events/user_events/__test_event/enable"; @@ -146,6 +147,7 @@ FIXTURE(user) { }; FIXTURE_SETUP(user) { + USER_EVENT_FIXTURE_SETUP(return); } FIXTURE_TEARDOWN(user) { diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c index 5beb0aef1d81..6a260caeeddc 100644 --- a/tools/testing/selftests/user_events/ftrace_test.c +++ b/tools/testing/selftests/user_events/ftrace_test.c @@ -16,6 +16,7 @@ #include #include "../kselftest_harness.h" +#include "user_events_selftests.h" const char *data_file = "/sys/kernel/tracing/user_events_data"; const char *status_file = "/sys/kernel/tracing/user_events_status"; @@ -206,6 +207,8 @@ FIXTURE(user) { }; FIXTURE_SETUP(user) { + USER_EVENT_FIXTURE_SETUP(return); + self->status_fd = open(status_file, O_RDONLY); ASSERT_NE(-1, self->status_fd); diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c index 8b09be566fa2..f893398cda05 100644 --- a/tools/testing/selftests/user_events/perf_test.c +++ b/tools/testing/selftests/user_events/perf_test.c @@ -17,6 +17,7 @@ #include #include "../kselftest_harness.h" +#include "user_events_selftests.h" const char *data_file = "/sys/kernel/tracing/user_events_data"; const char *id_file = "/sys/kernel/tracing/events/user_events/__test_event/id"; @@ -113,6 +114,8 @@ FIXTURE(user) { }; FIXTURE_SETUP(user) { + USER_EVENT_FIXTURE_SETUP(return); + self->data_fd = open(data_file, O_RDWR); ASSERT_NE(-1, self->data_fd); } diff --git a/tools/testing/selftests/user_events/user_events_selftests.h b/tools/testing/selftests/user_events/user_events_selftests.h new file mode 100644 index 000000000000..690378942f82 --- /dev/null +++ b/tools/testing/selftests/user_events/user_events_selftests.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _USER_EVENTS_SELFTESTS_H +#define _USER_EVENTS_SELFTESTS_H + +#include +#include +#include +#include +#include + +#include "../kselftest.h" + +static inline bool tracefs_enabled(char **message, bool *fail) +{ + struct stat buf; + int ret; + + *message = ""; + *fail = false; + + /* Ensure tracefs is installed */ + ret = stat("/sys/kernel/tracing", &buf); + + if (ret == -1) { + *message = "Tracefs is not installed"; + return false; + } + + /* Ensure mounted tracefs */ + ret = stat("/sys/kernel/tracing/README", &buf); + + if (ret == -1 && errno == ENOENT) { + if (mount(NULL, "/sys/kernel/tracing", "tracefs", 0, NULL) != 0) { + *message = "Cannot mount tracefs"; + *fail = true; + return false; + } + + ret = stat("/sys/kernel/tracing/README", &buf); + } + + if (ret == -1) { + *message = "Cannot access tracefs"; + *fail = true; + return false; + } + + return true; +} + +static inline bool user_events_enabled(char **message, bool *fail) +{ + struct stat buf; + int ret; + + *message = ""; + *fail = false; + + if (getuid() != 0) { + *message = "Must be run as root"; + *fail = true; + return false; + } + + if (!tracefs_enabled(message, fail)) + return false; + + /* Ensure user_events is installed */ + ret = stat("/sys/kernel/tracing/user_events_data", &buf); + + if (ret == -1) { + switch (errno) { + case ENOENT: + *message = "user_events is not installed"; + return false; + + default: + *message = "Cannot access user_events_data"; + *fail = true; + return false; + } + } + + return true; +} + +#define USER_EVENT_FIXTURE_SETUP(statement) do { \ + char *message; \ + bool fail; \ + if (!user_events_enabled(&message, &fail)) { \ + if (fail) { \ + TH_LOG("Setup failed due to: %s", message); \ + ASSERT_FALSE(fail); \ + } \ + SKIP(statement, "Skipping due to: %s", message); \ + } \ +} while (0) + +#endif /* _USER_EVENTS_SELFTESTS_H */ From 7dc1e125f07aeeb8b6eacfd9a05ef3ef6fe539c7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 8 Sep 2023 18:17:21 -0400 Subject: [PATCH 087/333] ftrace/selftests: Add softlink to latest log directory When I'm debugging something with the ftrace selftests and need to look at the logs, it becomes tedious that I need to do the following: ls -ltr logs [ copy the last directory ] ls logs/ to see where the logs are. Instead, do the common practice of having a "latest" softlink to the last run selftest. This way after running the selftest I only need to do: ls logs/latest/ and it will always give me the directory of the last run selftest logs! Signed-off-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/ftracetest | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index cb5f18c06593..7df8baa0f98f 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -124,6 +124,7 @@ parse_opts() { # opts ;; --logdir|-l) LOG_DIR=$2 + LINK_PTR= shift 2 ;; *.tc) @@ -181,7 +182,10 @@ fi TOP_DIR=`absdir $0` TEST_DIR=$TOP_DIR/test.d TEST_CASES=`find_testcases $TEST_DIR` -LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/ +LOG_TOP_DIR=$TOP_DIR/logs +LOG_DATE=`date +%Y%m%d-%H%M%S` +LOG_DIR=$LOG_TOP_DIR/$LOG_DATE/ +LINK_PTR=$LOG_TOP_DIR/latest KEEP_LOG=0 KTAP=0 DEBUG=0 @@ -207,6 +211,10 @@ else LOG_FILE=$LOG_DIR/ftracetest.log mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR" date > $LOG_FILE + if [ "x-$LINK_PTR" != "x-" ]; then + unlink $LINK_PTR + ln -fs $LOG_DATE $LINK_PTR + fi fi # Define text colors From 7ab6fe6625c9bdcb8fa5f61c8f8e30e13f689284 Mon Sep 17 00:00:00 2001 From: Naresh Kamboju Date: Thu, 7 Sep 2023 13:32:09 +0530 Subject: [PATCH 088/333] selftests: user_events: create test-specific Kconfig fragments Create the config file in user_events directory of testcase which need more kernel configuration than the default defconfig. User could use these configs with merge_config.sh script: The Kconfig CONFIG_USER_EVENTS=y is needed for the test to read data from the following files, - "/sys/kernel/tracing/user_events_data" - "/sys/kernel/tracing/user_events_status" - "/sys/kernel/tracing/events/user_events/*" Enable config for specific testcase: (export ARCH=xxx #for cross compiling) ./scripts/kconfig/merge_config.sh .config \ tools/testing/selftests/user_events/config Enable configs for all testcases: (export ARCH=xxx #for cross compiling) ./scripts/kconfig/merge_config.sh .config \ tools/testing/selftests/*/config Cc: Beau Belgrave Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Signed-off-by: Naresh Kamboju Signed-off-by: Shuah Khan --- tools/testing/selftests/user_events/config | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/user_events/config diff --git a/tools/testing/selftests/user_events/config b/tools/testing/selftests/user_events/config new file mode 100644 index 000000000000..64f7a9a90cec --- /dev/null +++ b/tools/testing/selftests/user_events/config @@ -0,0 +1 @@ +CONFIG_USER_EVENTS=y From a34a9f1a19afe9c60ca0ea61dfeee63a1c2baac8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 11 Sep 2023 15:28:14 +0200 Subject: [PATCH 089/333] bpf: Avoid deadlock when using queue and stack maps from NMI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sysbot discovered that the queue and stack maps can deadlock if they are being used from a BPF program that can be called from NMI context (such as one that is attached to a perf HW counter event). To fix this, add an in_nmi() check and use raw_spin_trylock() in NMI context, erroring out if grabbing the lock fails. Fixes: f1a2e44a3aec ("bpf: add queue and stack maps") Reported-by: Hsin-Wei Hung Tested-by: Hsin-Wei Hung Co-developed-by: Hsin-Wei Hung Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20230911132815.717240-1-toke@redhat.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/queue_stack_maps.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 8d2ddcb7566b..d869f51ea93a 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -98,7 +98,12 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete) int err = 0; void *ptr; - raw_spin_lock_irqsave(&qs->lock, flags); + if (in_nmi()) { + if (!raw_spin_trylock_irqsave(&qs->lock, flags)) + return -EBUSY; + } else { + raw_spin_lock_irqsave(&qs->lock, flags); + } if (queue_stack_map_is_empty(qs)) { memset(value, 0, qs->map.value_size); @@ -128,7 +133,12 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete) void *ptr; u32 index; - raw_spin_lock_irqsave(&qs->lock, flags); + if (in_nmi()) { + if (!raw_spin_trylock_irqsave(&qs->lock, flags)) + return -EBUSY; + } else { + raw_spin_lock_irqsave(&qs->lock, flags); + } if (queue_stack_map_is_empty(qs)) { memset(value, 0, qs->map.value_size); @@ -193,7 +203,12 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value, if (flags & BPF_NOEXIST || flags > BPF_EXIST) return -EINVAL; - raw_spin_lock_irqsave(&qs->lock, irq_flags); + if (in_nmi()) { + if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags)) + return -EBUSY; + } else { + raw_spin_lock_irqsave(&qs->lock, irq_flags); + } if (queue_stack_map_is_full(qs)) { if (!replace) { From 1a49f4195d3498fe458a7f5ff7ec5385da70d92e Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 12 Sep 2023 03:55:37 +0300 Subject: [PATCH 090/333] bpf: Avoid dummy bpf_offload_netdev in __bpf_prog_dev_bound_init Fix for a bug observable under the following sequence of events: 1. Create a network device that does not support XDP offload. 2. Load a device bound XDP program with BPF_F_XDP_DEV_BOUND_ONLY flag (such programs are not offloaded). 3. Load a device bound XDP program with zero flags (such programs are offloaded). At step (2) __bpf_prog_dev_bound_init() associates with device (1) a dummy bpf_offload_netdev struct with .offdev field set to NULL. At step (3) __bpf_prog_dev_bound_init() would reuse dummy struct allocated at step (2). However, downstream usage of the bpf_offload_netdev assumes that .offdev field can't be NULL, e.g. in bpf_prog_offload_verifier_prep(). Adjust __bpf_prog_dev_bound_init() to require bpf_offload_netdev with non-NULL .offdev for offloaded BPF programs. Fixes: 2b3486bc2d23 ("bpf: Introduce device-bound XDP programs") Reported-by: syzbot+291100dcb32190ec02a8@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/000000000000d97f3c060479c4f8@google.com/ Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230912005539.2248244-2-eddyz87@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/offload.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 3e4f2ec1af06..87d6693d8233 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -199,12 +199,14 @@ static int __bpf_prog_dev_bound_init(struct bpf_prog *prog, struct net_device *n offload->netdev = netdev; ondev = bpf_offload_find_netdev(offload->netdev); + /* When program is offloaded require presence of "true" + * bpf_offload_netdev, avoid the one created for !ondev case below. + */ + if (bpf_prog_is_offloaded(prog->aux) && (!ondev || !ondev->offdev)) { + err = -EINVAL; + goto err_free; + } if (!ondev) { - if (bpf_prog_is_offloaded(prog->aux)) { - err = -EINVAL; - goto err_free; - } - /* When only binding to the device, explicitly * create an entry in the hashtable. */ From 72178d5d1a38dd185d1db15f177f2d122ef10d9b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 11 Sep 2023 16:56:13 -0700 Subject: [PATCH 091/333] objtool: Fix _THIS_IP_ detection for cold functions Cold functions and their non-cold counterparts can use _THIS_IP_ to reference each other. Don't warn about !ENDBR in that case. Note that for GCC this is currently irrelevant in light of the following commit c27cd083cfb9 ("Compiler attributes: GCC cold function alignment workarounds") which disabled cold functions in the kernel. However this may still be possible with Clang. Fixes several warnings like the following: drivers/scsi/bnx2i/bnx2i.prelink.o: warning: objtool: bnx2i_hw_ep_disconnect+0x19d: relocation to !ENDBR: bnx2i_hw_ep_disconnect.cold+0x0 drivers/net/ipvlan/ipvlan.prelink.o: warning: objtool: ipvlan_addr4_event.cold+0x28: relocation to !ENDBR: ipvlan_addr4_event+0xda drivers/net/ipvlan/ipvlan.prelink.o: warning: objtool: ipvlan_addr6_event.cold+0x26: relocation to !ENDBR: ipvlan_addr6_event+0xb7 drivers/net/ethernet/broadcom/tg3.prelink.o: warning: objtool: tg3_set_ringparam.cold+0x17: relocation to !ENDBR: tg3_set_ringparam+0x115 drivers/net/ethernet/broadcom/tg3.prelink.o: warning: objtool: tg3_self_test.cold+0x17: relocation to !ENDBR: tg3_self_test+0x2e1 drivers/target/iscsi/cxgbit/cxgbit.prelink.o: warning: objtool: __cxgbit_free_conn.cold+0x24: relocation to !ENDBR: __cxgbit_free_conn+0xfb net/can/can.prelink.o: warning: objtool: can_rx_unregister.cold+0x2c: relocation to !ENDBR: can_rx_unregister+0x11b drivers/net/ethernet/qlogic/qed/qed.prelink.o: warning: objtool: qed_spq_post+0xc0: relocation to !ENDBR: qed_spq_post.cold+0x9a drivers/net/ethernet/qlogic/qed/qed.prelink.o: warning: objtool: qed_iwarp_ll2_comp_syn_pkt.cold+0x12f: relocation to !ENDBR: qed_iwarp_ll2_comp_syn_pkt+0x34b net/tipc/tipc.prelink.o: warning: objtool: tipc_nametbl_publish.cold+0x21: relocation to !ENDBR: tipc_nametbl_publish+0xa6 Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/d8f1ab6a23a6105bc023c132b105f245c7976be6.1694476559.git.jpoimboe@kernel.org --- tools/objtool/check.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 1384090530db..e308d1ba664e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4333,7 +4333,8 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn continue; } - if (insn_func(dest) && insn_func(dest) == insn_func(insn)) { + if (insn_func(dest) && insn_func(insn) && + insn_func(dest)->pfunc == insn_func(insn)->pfunc) { /* * Anything from->to self is either _THIS_IP_ or * IRET-to-self. From e4c31164737e9a00de1be6455e2c667ac5478b3c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 12 Sep 2023 03:55:38 +0300 Subject: [PATCH 092/333] selftests/bpf: Offloaded prog after non-offloaded should not cause BUG Check what happens if non-offloaded dev bound BPF program is followed by offloaded dev bound program. Test case adapated from syzbot report [1]. [1] https://lore.kernel.org/bpf/000000000000d97f3c060479c4f8@google.com/ Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230912005539.2248244-3-eddyz87@gmail.com Signed-off-by: Martin KaFai Lau --- .../bpf/prog_tests/xdp_dev_bound_only.c | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c new file mode 100644 index 000000000000..7dd18c6d06c6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#define LOCAL_NETNS "xdp_dev_bound_only_netns" + +static int load_dummy_prog(char *name, __u32 ifindex, __u32 flags) +{ + struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN() }; + LIBBPF_OPTS(bpf_prog_load_opts, opts); + + opts.prog_flags = flags; + opts.prog_ifindex = ifindex; + return bpf_prog_load(BPF_PROG_TYPE_XDP, name, "GPL", insns, ARRAY_SIZE(insns), &opts); +} + +/* A test case for bpf_offload_netdev->offload handling bug: + * - create a veth device (does not support offload); + * - create a device bound XDP program with BPF_F_XDP_DEV_BOUND_ONLY flag + * (such programs are not offloaded); + * - create a device bound XDP program without flags (such programs are offloaded). + * This might lead to 'BUG: kernel NULL pointer dereference'. + */ +void test_xdp_dev_bound_only_offdev(void) +{ + struct nstoken *tok = NULL; + __u32 ifindex; + int fd1 = -1; + int fd2 = -1; + + SYS(out, "ip netns add " LOCAL_NETNS); + tok = open_netns(LOCAL_NETNS); + if (!ASSERT_OK_PTR(tok, "open_netns")) + goto out; + SYS(out, "ip link add eth42 type veth"); + ifindex = if_nametoindex("eth42"); + if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex")) { + perror("if_nametoindex"); + goto out; + } + fd1 = load_dummy_prog("dummy1", ifindex, BPF_F_XDP_DEV_BOUND_ONLY); + if (!ASSERT_GE(fd1, 0, "load_dummy_prog #1")) { + perror("load_dummy_prog #1"); + goto out; + } + /* Program with ifindex is considered offloaded, however veth + * does not support offload => error should be reported. + */ + fd2 = load_dummy_prog("dummy2", ifindex, 0); + ASSERT_EQ(fd2, -EINVAL, "load_dummy_prog #2 (offloaded)"); + +out: + close(fd1); + close(fd2); + close_netns(tok); + /* eth42 was added inside netns, removing the netns will + * also remove eth42 veth pair. + */ + SYS_NOFAIL("ip netns del " LOCAL_NETNS); +} From caaaa34eff2a3fc4e61bfccde9e15ae5dba49a7d Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 8 Sep 2023 11:12:23 +0100 Subject: [PATCH 093/333] ALSA: hda: cs35l56: Call pm_runtime_dont_use_autosuspend() Driver remove() must call pm_runtime_dont_use_autosuspend(). Drivers that call pm_runtime_use_autosuspend() must disable it in driver remove(). Unfortunately until recently this was only mentioned in 1 line in a 900+ line document so most people hadn't noticed this. It has only recently been added to the kerneldoc of pm_runtime_use_autosuspend(). Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Link: https://lore.kernel.org/r/20230908101223.2656901-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 76b9c685560b..9e4976bdb5e0 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -1003,6 +1003,7 @@ void cs35l56_hda_remove(struct device *dev) { struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev); + pm_runtime_dont_use_autosuspend(cs35l56->base.dev); pm_runtime_get_sync(cs35l56->base.dev); pm_runtime_disable(cs35l56->base.dev); From 60edec9beffebd01a49c005221230f3a61fe6587 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Sep 2023 09:59:44 +0200 Subject: [PATCH 094/333] ALSA: docs: Fix a typo of midi2_ump_probe option for snd-usb-audio A simple typo fix: midi2_probe => midi2_ump_probe. Fixes: febdfa0e9c8a ("ALSA: docs: Update MIDI 2.0 documentation for UMP 1.1 enhancement") Link: https://lore.kernel.org/r/20230912075944.14032-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- Documentation/sound/designs/midi-2.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/sound/designs/midi-2.0.rst b/Documentation/sound/designs/midi-2.0.rst index 45987f256b97..086487ca7ab1 100644 --- a/Documentation/sound/designs/midi-2.0.rst +++ b/Documentation/sound/designs/midi-2.0.rst @@ -74,8 +74,8 @@ topology based on those information. When the device is older and doesn't respond to the new UMP inquiries, the driver falls back and builds the topology based on Group Terminal Block (GTB) information from the USB descriptor. Some device might be screwed up by the -unexpected UMP command; in such a case, pass `midi2_probe=0` option to -snd-usb-audio driver for skipping the UMP v1.1 inquiries. +unexpected UMP command; in such a case, pass `midi2_ump_probe=0` +option to snd-usb-audio driver for skipping the UMP v1.1 inquiries. When the MIDI 2.0 device is probed, the kernel creates a rawmidi device for each UMP Endpoint of the device. Its device name is From 22eefaeab03fe968ab7786fb3d5c5abd203a8bab Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Sep 2023 10:51:44 +0200 Subject: [PATCH 095/333] ALSA: seq: Avoid delivery of events for disabled UMP groups ALSA sequencer core still delivers events to the disabled UMP Group, leaving this handling to the device. But it's rather risky and it's easy to imagine that such an unexpected event may screw up the device firmware. This patch avoids the superfluous event deliveries by setting the group_filter of the UMP client as default, and evaluate the group_filter properly at delivery from non-UMP clients. The grouop_filter is updated upon the dynamic UMP Function Block updates, so that it follows the change of the disabled UMP Groups, too. Fixes: d2b706077792 ("ALSA: seq: Add UMP group filter") Link: https://lore.kernel.org/r/20230912085144.32534-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_ump_client.c | 22 ++++++++++++++++++++++ sound/core/seq/seq_ump_convert.c | 2 ++ 2 files changed, 24 insertions(+) diff --git a/sound/core/seq/seq_ump_client.c b/sound/core/seq/seq_ump_client.c index f26a1812dfa7..a60e3f069a80 100644 --- a/sound/core/seq/seq_ump_client.c +++ b/sound/core/seq/seq_ump_client.c @@ -416,6 +416,25 @@ static void setup_client_midi_version(struct seq_ump_client *client) snd_seq_kernel_client_put(cptr); } +/* set up client's group_filter bitmap */ +static void setup_client_group_filter(struct seq_ump_client *client) +{ + struct snd_seq_client *cptr; + unsigned int filter; + int p; + + cptr = snd_seq_kernel_client_get(client->seq_client); + if (!cptr) + return; + filter = ~(1U << 0); /* always allow groupless messages */ + for (p = 0; p < SNDRV_UMP_MAX_GROUPS; p++) { + if (client->groups[p].active) + filter &= ~(1U << (p + 1)); + } + cptr->group_filter = filter; + snd_seq_kernel_client_put(cptr); +} + /* UMP group change notification */ static void handle_group_notify(struct work_struct *work) { @@ -424,6 +443,7 @@ static void handle_group_notify(struct work_struct *work) update_group_attrs(client); update_port_infos(client); + setup_client_group_filter(client); } /* UMP FB change notification */ @@ -492,6 +512,8 @@ static int snd_seq_ump_probe(struct device *_dev) goto error; } + setup_client_group_filter(client); + err = create_ump_endpoint_port(client); if (err < 0) goto error; diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index 7cc84e137999..b141024830ec 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -1197,6 +1197,8 @@ int snd_seq_deliver_to_ump(struct snd_seq_client *source, struct snd_seq_event *event, int atomic, int hop) { + if (dest->group_filter & (1U << dest_port->ump_group)) + return 0; /* group filtered - skip the event */ if (event->type == SNDRV_SEQ_EVENT_SYSEX) return cvt_sysex_to_ump(dest, dest_port, event, atomic, hop); else if (snd_seq_client_is_midi2(dest)) From 40d84e198b0ae64df71ac0e70675b16900b90bde Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 6 Sep 2023 12:18:41 +0800 Subject: [PATCH 096/333] PM: hibernate: Rename function parameter from snapshot_test to exclusive Several functions reply on snapshot_test to decide whether to open the resume device exclusively. However there is no strict connection between the snapshot_test and the open mode. Rename the 'snapshot_test' input parameter to 'exclusive' to better reflect the use case. No functional change is expected. Signed-off-by: Chen Yu Signed-off-by: Rafael J. Wysocki --- kernel/power/power.h | 4 ++-- kernel/power/swap.c | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/kernel/power/power.h b/kernel/power/power.h index 46eb14dc50c3..a98f95e309a3 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -168,11 +168,11 @@ extern int swsusp_swap_in_use(void); #define SF_HW_SIG 8 /* kernel/power/hibernate.c */ -int swsusp_check(bool snapshot_test); +int swsusp_check(bool exclusive); extern void swsusp_free(void); extern int swsusp_read(unsigned int *flags_p); extern int swsusp_write(unsigned int flags); -void swsusp_close(bool snapshot_test); +void swsusp_close(bool exclusive); #ifdef CONFIG_SUSPEND extern int swsusp_unmark(void); #endif diff --git a/kernel/power/swap.c b/kernel/power/swap.c index f6ebcd00c410..74edbce2320b 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -1513,12 +1513,13 @@ end: static void *swsusp_holder; /** - * swsusp_check - Check for swsusp signature in the resume device + * swsusp_check - Check for swsusp signature in the resume device + * @exclusive: Open the resume device exclusively. */ -int swsusp_check(bool snapshot_test) +int swsusp_check(bool exclusive) { - void *holder = snapshot_test ? &swsusp_holder : NULL; + void *holder = exclusive ? &swsusp_holder : NULL; int error; hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, BLK_OPEN_READ, @@ -1563,17 +1564,18 @@ put: } /** - * swsusp_close - close swap device. + * swsusp_close - close swap device. + * @exclusive: Close the resume device which is exclusively opened. */ -void swsusp_close(bool snapshot_test) +void swsusp_close(bool exclusive) { if (IS_ERR(hib_resume_bdev)) { pr_debug("Image device not initialised\n"); return; } - blkdev_put(hib_resume_bdev, snapshot_test ? &swsusp_holder : NULL); + blkdev_put(hib_resume_bdev, exclusive ? &swsusp_holder : NULL); } /** From 148b6f4cc3920e563094540fe1a12d00d3bbccae Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 6 Sep 2023 12:18:52 +0800 Subject: [PATCH 097/333] PM: hibernate: Fix the exclusive get block device in test_resume mode Commit 5904de0d735b ("PM: hibernate: Do not get block device exclusively in test_resume mode") fixes a hibernation issue under test_resume mode. That commit is supposed to open the block device in non-exclusive mode when in test_resume. However the code does the opposite, which is against its description. In summary, the swap device is only opened exclusively by swsusp_check() with its corresponding *close(), and must be in non test_resume mode. This is to avoid the race condition that different processes scribble the device at the same time. All the other cases should use non-exclusive mode. Fix it by really disabling exclusive mode under test_resume. Fixes: 5904de0d735b ("PM: hibernate: Do not get block device exclusively in test_resume mode") Closes: https://lore.kernel.org/lkml/000000000000761f5f0603324129@google.com/ Reported-by: Pengfei Xu Signed-off-by: Chen Yu Tested-by: Chenzhou Feng Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 2b4a946a6ff5..8d35b9f9aaa3 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -786,9 +786,9 @@ int hibernate(void) unlock_device_hotplug(); if (snapshot_test) { pm_pr_dbg("Checking hibernation image\n"); - error = swsusp_check(snapshot_test); + error = swsusp_check(false); if (!error) - error = load_image_and_restore(snapshot_test); + error = load_image_and_restore(false); } thaw_processes(); @@ -945,14 +945,14 @@ static int software_resume(void) pm_pr_dbg("Looking for hibernation image.\n"); mutex_lock(&system_transition_mutex); - error = swsusp_check(false); + error = swsusp_check(true); if (error) goto Unlock; /* The snapshot device should not be opened while we're running */ if (!hibernate_acquire()) { error = -EBUSY; - swsusp_close(false); + swsusp_close(true); goto Unlock; } @@ -973,7 +973,7 @@ static int software_resume(void) goto Close_Finish; } - error = load_image_and_restore(false); + error = load_image_and_restore(true); thaw_processes(); Finish: pm_notifier_call_chain(PM_POST_RESTORE); @@ -987,7 +987,7 @@ static int software_resume(void) pm_pr_dbg("Hibernation image not present or could not be loaded.\n"); return error; Close_Finish: - swsusp_close(false); + swsusp_close(true); goto Finish; } From fb6254df09bba303db2a1002085f6c0b90a456ed Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 12 Sep 2023 15:31:49 +0800 Subject: [PATCH 098/333] ALSA: hda/realtek - Fixed two speaker platform If system has two speakers and one connect to 0x14 pin, use this function will disable it. Fixes: e43252db7e20 ("ALSA: hda/realtek - ALC287 I2S speaker platform support") Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/e3f2aac3fe6a47079d728a6443358cc2@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b7e78bfcffd8..887c1b163865 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7073,8 +7073,10 @@ static void alc287_fixup_bind_dacs(struct hda_codec *codec, snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); spec->gen.preferred_dacs = preferred_pairs; spec->gen.auto_mute_via_amp = 1; - snd_hda_codec_write_cache(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, - 0x0); /* Make sure 0x14 was disable */ + if (spec->gen.autocfg.speaker_pins[0] != 0x14) { + snd_hda_codec_write_cache(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, + 0x0); /* Make sure 0x14 was disable */ + } } From 1263cc0f414d212129c0f1289b49b7df77f92084 Mon Sep 17 00:00:00 2001 From: August Wikerfors Date: Mon, 11 Sep 2023 23:34:09 +0200 Subject: [PATCH 099/333] ASoC: amd: yc: Fix non-functional mic on Lenovo 82QF and 82UG Like the Lenovo 82TL and 82V2, the Lenovo 82QF (Yoga 7 14ARB7) and 82UG (Legion S7 16ARHA7) both need a quirk entry for the internal microphone to function. Commit c008323fe361 ("ASoC: amd: yc: Fix a non-functional mic on Lenovo 82SJ") restricted the quirk that previously matched "82" to "82V2", breaking microphone functionality on these devices. Fix this by adding specific quirks for these models, as was done for the Lenovo 82TL. Fixes: c008323fe361 ("ASoC: amd: yc: Fix a non-functional mic on Lenovo 82SJ") Closes: https://github.com/tomsom/yoga-linux/issues/51 Link: https://bugzilla.kernel.org/show_bug.cgi?id=208555#c780 Cc: stable@vger.kernel.org Signed-off-by: August Wikerfors Link: https://lore.kernel.org/r/20230911213409.6106-1-git@augustwikerfors.se Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 59aa2e9d3a79..94e9eb8e73f2 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -213,6 +213,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21J6"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), + } + }, { .driver_data = &acp6x_card, .matches = { @@ -220,6 +227,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82TL"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82UG"), + } + }, { .driver_data = &acp6x_card, .matches = { From 7c95ec3b59479bb24093918bbfc801c9f31826f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 8 Sep 2023 08:25:27 +0300 Subject: [PATCH 100/333] drm/i915: Only check eDP HPD when AUX CH is shared MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently Acer Chromebook C740 (BDW-ULT) doesn't have the eDP HPD line properly connected, and thus fails the new HPD check during eDP probe. The result is that we lose the eDP output. I suspect all such machines would be Chromebooks or other Linux exclusive systems as the Windows driver likely wouldn't work either. I did check a few other BDW machines here and those do have eDP HPD connected, one of them even is a different Chromebook (Samus). To account for these funky machines let's skip the HPD check when it looks like the eDP port is the only one using that specific AUX channel. In case of multiple ports sharing the same AUX CH (eg. on Asrock B250M-HDV) we still do the check and thus should correctly ignore the eDP port in favor of the other DP port (usually a DP->VGA converter). v2: Don't oops during list iteration Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9264 Fixes: cfe5bdfb27fa ("drm/i915: Check HPD live state during eDP probe") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230908052527.685-1-ville.syrjala@linux.intel.com Reviewed-by: Luca Coelho (cherry picked from commit 70052100fabec5d8c1b09c9959817a2f4517e6b5) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_bios.c | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_bios.h | 1 + drivers/gpu/drm/i915/display/intel_dp.c | 7 ++++++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 858c959f7bab..f735b035436c 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -3540,6 +3540,27 @@ enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata) return map_aux_ch(devdata->i915, devdata->child.aux_channel); } +bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devdata) +{ + struct drm_i915_private *i915; + u8 aux_channel; + int count = 0; + + if (!devdata || !devdata->child.aux_channel) + return false; + + i915 = devdata->i915; + aux_channel = devdata->child.aux_channel; + + list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + if (intel_bios_encoder_supports_dp(devdata) && + aux_channel == devdata->child.aux_channel) + count++; + } + + return count > 1; +} + int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata) { if (!devdata || devdata->i915->display.vbt.version < 196 || !devdata->child.iboost) diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h index 9680e3e92bb5..49e24b7cf675 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.h +++ b/drivers/gpu/drm/i915/display/intel_bios.h @@ -273,6 +273,7 @@ enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata); int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata); int intel_bios_dp_max_lane_count(const struct intel_bios_encoder_data *devdata); int intel_bios_dp_max_link_rate(const struct intel_bios_encoder_data *devdata); +bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devdata); int intel_bios_hdmi_boost_level(const struct intel_bios_encoder_data *devdata); int intel_bios_hdmi_ddc_pin(const struct intel_bios_encoder_data *devdata); int intel_bios_hdmi_level_shift(const struct intel_bios_encoder_data *devdata); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 12bd2f322e62..e0e4cb529284 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5512,8 +5512,13 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, /* * VBT and straps are liars. Also check HPD as that seems * to be the most reliable piece of information available. + * + * ... expect on devices that forgot to hook HPD up for eDP + * (eg. Acer Chromebook C710), so we'll check it only if multiple + * ports are attempting to use the same AUX CH, according to VBT. */ - if (!intel_digital_port_connected(encoder)) { + if (intel_bios_dp_has_shared_aux_ch(encoder->devdata) && + !intel_digital_port_connected(encoder)) { /* * If this fails, presume the DPCD answer came * from some other port using the same AUX CH. From 091c2848b0f7643eeb44abc1e7ba8f9ef5eb366f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 12 Sep 2023 14:01:13 +0300 Subject: [PATCH 101/333] ALSA: core: Use dev_name of card_dev as debugfs directory name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to use temporary string for the debugfs directory name as we can use the device name of the card. This change will also fixes the following compiler warning/error (W=1): sound/core/init.c: In function ‘snd_card_init’: sound/core/init.c:367:28: error: ‘%d’ directive writing between 1 and 10 bytes into a region of size 4 [-Werror=format-overflow=] 367 | sprintf(name, "card%d", idx); | ^~ sound/core/init.c:367:23: note: directive argument in the range [0, 2147483646] 367 | sprintf(name, "card%d", idx); | ^~~~~~~~ sound/core/init.c:367:9: note: ‘sprintf’ output between 6 and 15 bytes into a destination of size 8 367 | sprintf(name, "card%d", idx); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors The idx is guarantied to be less than SNDRV_CARDS (max 256 or 8) by the code in snd_card_init(), however the compiler does not see that. The warnings got brought to light by a recent patch upstream: commit 6d4ab2e97dcf ("extrawarn: enable format and stringop overflow warnings in W=1") Suggested-by: Arnd Bergmann Suggested-by: Takashi Iwai Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230912110113.3166-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- sound/core/init.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sound/core/init.c b/sound/core/init.c index d61bde1225f2..22c0d217b860 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -278,9 +278,6 @@ static int snd_card_init(struct snd_card *card, struct device *parent, size_t extra_size) { int err; -#ifdef CONFIG_SND_DEBUG - char name[8]; -#endif if (extra_size > 0) card->private_data = (char *)card + sizeof(struct snd_card); @@ -364,8 +361,8 @@ static int snd_card_init(struct snd_card *card, struct device *parent, } #ifdef CONFIG_SND_DEBUG - sprintf(name, "card%d", idx); - card->debugfs_root = debugfs_create_dir(name, sound_debugfs_root); + card->debugfs_root = debugfs_create_dir(dev_name(&card->card_dev), + sound_debugfs_root); #endif return 0; From fa6a0c0c1dd53b3949ca56bf7213648dfd6a62ee Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 13:32:40 +0200 Subject: [PATCH 102/333] ASoC: rt5640: Revert "Fix sleep in atomic context" Commit 70a6404ff610 ("ASoC: rt5640: Fix sleep in atomic context") not only switched from request_irq() to request_threaded_irq(), to fix the sleep in atomic context issue, but it also added devm management of the IRQ by actually switching to devm_request_threaded_irq() (without any explanation in the commit message for this change). This is wrong since the IRQ was already explicitly managed by the driver. On unbind the ASoC core will call rt5640_set_jack(NULL) which in turn will call rt5640_disable_jack_detect() which frees the IRQ already. So now we have a double free. Besides the unexplained switch to devm being wrong, the actual fix for the sleep in atomic context issue also is not the best solution. The only thing which rt5640_irq() does is cancel + (re-)queue the jack_work delayed_work. This can be done in a single non sleeping call by replacing queue_delayed_work() with mod_delayed_work(), which does not sleep. Using mod_delayed_work() is a much better fix then adding a thread which does nothing other then queuing a work-item. This patch is a straight revert of the troublesome changes, the switch to mod_delayed_work() is done in a separate follow-up patch. Fixes: 70a6404ff610 ("ASoC: rt5640: Fix sleep in atomic context") Cc: Sameer Pujar Cc: Oder Chiou Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230912113245.320159-2-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5640.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 15e1a62b9e57..05ff8066171b 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2565,10 +2565,9 @@ static void rt5640_enable_jack_detect(struct snd_soc_component *component, if (jack_data && jack_data->use_platform_clock) rt5640->use_platform_clock = jack_data->use_platform_clock; - ret = devm_request_threaded_irq(component->dev, rt5640->irq, - NULL, rt5640_irq, - IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT, - "rt5640", rt5640); + ret = request_irq(rt5640->irq, rt5640_irq, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + "rt5640", rt5640); if (ret) { dev_warn(component->dev, "Failed to request IRQ %d: %d\n", rt5640->irq, ret); rt5640_disable_jack_detect(component); @@ -2621,9 +2620,8 @@ static void rt5640_enable_hda_jack_detect( rt5640->jack = jack; - ret = devm_request_threaded_irq(component->dev, rt5640->irq, - NULL, rt5640_irq, IRQF_TRIGGER_RISING | IRQF_ONESHOT, - "rt5640", rt5640); + ret = request_irq(rt5640->irq, rt5640_irq, + IRQF_TRIGGER_RISING | IRQF_ONESHOT, "rt5640", rt5640); if (ret) { dev_warn(component->dev, "Failed to request IRQ %d: %d\n", rt5640->irq, ret); rt5640->irq = -ENXIO; From df7d595f6bd9dc96cc275cc4b0f313fcfa423c58 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 13:32:41 +0200 Subject: [PATCH 103/333] ASoC: rt5640: Fix sleep in atomic context Following prints are observed while testing audio on Jetson AGX Orin which has onboard RT5640 audio codec: BUG: sleeping function called from invalid context at kernel/workqueue.c:3027 in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/0 preempt_count: 10001, expected: 0 RCU nest depth: 0, expected: 0 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at kernel/irq/handle.c:159 __handle_irq_event_percpu+0x1e0/0x270 ---[ end trace ad1c64905aac14a6 ]- The IRQ handler rt5640_irq() runs in interrupt context and can sleep during cancel_delayed_work_sync(). The only thing which rt5640_irq() does is cancel + (re-)queue the jack_work delayed_work. This can be done in a single non sleeping call by replacing queue_delayed_work() with mod_delayed_work(), avoiding the sleep in atomic context. Fixes: 051dade34695 ("ASoC: rt5640: Fix the wrong state of JD1 and JD2") Reported-by: Sameer Pujar Closes: https://lore.kernel.org/r/1688015537-31682-4-git-send-email-spujar@nvidia.com Cc: Oder Chiou Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230912113245.320159-3-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5640.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 05ff8066171b..5c34c045d396 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2403,13 +2403,11 @@ static irqreturn_t rt5640_irq(int irq, void *data) struct rt5640_priv *rt5640 = data; int delay = 0; - if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) { - cancel_delayed_work_sync(&rt5640->jack_work); + if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) delay = 100; - } if (rt5640->jack) - queue_delayed_work(system_long_wq, &rt5640->jack_work, delay); + mod_delayed_work(system_long_wq, &rt5640->jack_work, delay); return IRQ_HANDLED; } From 786120ebb649b166021f0212250e8627e53d068a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 13:32:42 +0200 Subject: [PATCH 104/333] ASoC: rt5640: Do not disable/enable IRQ twice on suspend/resume When jack-detect was originally added disabling the IRQ during suspend was done by the sound/soc/intel/boards/bytcr_rt5640.c driver calling snd_soc_component_set_jack(NULL) on suspend, which calls rt5640_disable_jack_detect(), which calls free_irq() which also disables it. Commit 5fabcc90e79b ("ASoC: rt5640: Fix Jack work after system suspend") added disable_irq() / enable_irq() calls on suspend/resume for machine drivers which do not call snd_soc_component_set_jack(NULL) on suspend. The new disable_irq() / enable_irq() are made conditional by "if (rt5640->irq)" statements, but this is true for the machine drivers which do call snd_soc_component_set_jack(NULL) on suspend too, causing a disable_irq() call there on the already free-ed IRQ. Change the "if (rt5640->irq)" condition to "if (rt5640->jack)" to fix this, rt5640->jack is only set if the jack-detect IRQ handler is still active when rt5640_suspend() runs. And adjust rt5640_enable_hda_jack_detect()'s request_irq() error handling to set rt5640->jack to NULL to match (note that the old setting of irq to -ENOXIO still resulted in disable_irq(-ENOXIO) calls on suspend). Fixes: 5fabcc90e79b ("ASoC: rt5640: Fix Jack work after system suspend") Cc: Oder Chiou Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230912113245.320159-4-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5640.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 5c34c045d396..1bc281d42ca8 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2622,7 +2622,7 @@ static void rt5640_enable_hda_jack_detect( IRQF_TRIGGER_RISING | IRQF_ONESHOT, "rt5640", rt5640); if (ret) { dev_warn(component->dev, "Failed to request IRQ %d: %d\n", rt5640->irq, ret); - rt5640->irq = -ENXIO; + rt5640->jack = NULL; return; } @@ -2797,7 +2797,7 @@ static int rt5640_suspend(struct snd_soc_component *component) { struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component); - if (rt5640->irq) { + if (rt5640->jack) { /* disable jack interrupts during system suspend */ disable_irq(rt5640->irq); } @@ -2825,10 +2825,9 @@ static int rt5640_resume(struct snd_soc_component *component) regcache_cache_only(rt5640->regmap, false); regcache_sync(rt5640->regmap); - if (rt5640->irq) + if (rt5640->jack) { enable_irq(rt5640->irq); - if (rt5640->jack) { if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) { snd_soc_component_update_bits(component, RT5640_DUMMY2, 0x1100, 0x1100); From b5e85e535551bf82242aa5896e14a136ed3c156d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 13:32:43 +0200 Subject: [PATCH 105/333] ASoC: rt5640: Enable the IRQ on resume after configuring jack-detect The jack-detect IRQ should be enabled *after* the jack-detect related configuration registers have been programmed. Move the enable_irq() call for this to after the register setup. Fixes: 5fabcc90e79b ("ASoC: rt5640: Fix Jack work after system suspend") Cc: Oder Chiou Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230912113245.320159-5-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5640.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 1bc281d42ca8..03c866c04c7a 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2826,8 +2826,6 @@ static int rt5640_resume(struct snd_soc_component *component) regcache_sync(rt5640->regmap); if (rt5640->jack) { - enable_irq(rt5640->irq); - if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) { snd_soc_component_update_bits(component, RT5640_DUMMY2, 0x1100, 0x1100); @@ -2854,6 +2852,7 @@ static int rt5640_resume(struct snd_soc_component *component) } } + enable_irq(rt5640->irq); queue_delayed_work(system_long_wq, &rt5640->jack_work, 0); } From 8c8bf3df6b7c0ed1c4dd373b23eb0ce13a63f452 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 13:32:44 +0200 Subject: [PATCH 106/333] ASoC: rt5640: Fix IRQ not being free-ed for HDA jack detect mode Set "rt5640->irq_requested = true" after a successful request_irq() in rt5640_enable_hda_jack_detect(), so that rt5640_disable_jack_detect() properly frees the IRQ. This fixes the IRQ not being freed on rmmod / driver unbind. Fixes: 2b9c8d2b3c89 ("ASoC: rt5640: Add the HDA header support") Cc: Oder Chiou Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230912113245.320159-6-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5640.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 03c866c04c7a..a4a11407ab10 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2625,6 +2625,7 @@ static void rt5640_enable_hda_jack_detect( rt5640->jack = NULL; return; } + rt5640->irq_requested = true; /* sync initial jack state */ queue_delayed_work(system_long_wq, &rt5640->jack_work, 0); From 8fc7cc507d61fc655172836c74fb7fcc8b7a978b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 13:32:45 +0200 Subject: [PATCH 107/333] ASoC: rt5640: Only cancel jack-detect work on suspend if active If jack-detection is not used; or has already been disabled then there is no need to call rt5640_cancel_work(). Move the rt5640_cancel_work() inside the "if (rt5640->jack) {}" block, grouping it together with the disabling of the IRQ which queues the work in the first place. This also makes suspend() symetrical with resume() which re-queues the work in an "if (rt5640->jack) {}" block. Cc: Oder Chiou Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230912113245.320159-7-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5640.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index a4a11407ab10..e8cdc166bdaa 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2801,9 +2801,9 @@ static int rt5640_suspend(struct snd_soc_component *component) if (rt5640->jack) { /* disable jack interrupts during system suspend */ disable_irq(rt5640->irq); + rt5640_cancel_work(rt5640); } - rt5640_cancel_work(rt5640); snd_soc_component_force_bias_level(component, SND_SOC_BIAS_OFF); rt5640_reset(component); regcache_cache_only(rt5640->regmap, true); From 88956eabfdea7d01d550535af120d4ef265b1d02 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 12 Sep 2023 11:25:00 +1000 Subject: [PATCH 108/333] NFSD: fix possible oops when nfsd/pool_stats is closed. If /proc/fs/nfsd/pool_stats is open when the last nfsd thread exits, then when the file is closed a NULL pointer is dereferenced. This is because nfsd_pool_stats_release() assumes that the pointer to the svc_serv cannot become NULL while a reference is held. This used to be the case but a recent patch split nfsd_last_thread() out from nfsd_put(), and clearing the pointer is done in nfsd_last_thread(). This is easily reproduced by running rpc.nfsd 8 ; ( rpc.nfsd 0;true) < /proc/fs/nfsd/pool_stats Fortunately nfsd_pool_stats_release() has easy access to the svc_serv pointer, and so can call svc_put() on it directly. Fixes: 9f28a971ee9f ("nfsd: separate nfsd_last_thread() from nfsd_put()") Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfssvc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 1582af33e204..c7af1095f6b5 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -1082,11 +1082,12 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) int nfsd_pool_stats_release(struct inode *inode, struct file *file) { + struct seq_file *seq = file->private_data; + struct svc_serv *serv = seq->private; int ret = seq_release(inode, file); - struct net *net = inode->i_sb->s_fs_info; mutex_lock(&nfsd_mutex); - nfsd_put(net); + svc_put(serv); mutex_unlock(&nfsd_mutex); return ret; } From 98a15816636044f25be4644db2a3e09fad68aaf7 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 5 Sep 2023 10:09:22 +0100 Subject: [PATCH 109/333] Revert "comedi: add HAS_IOPORT dependencies" This reverts commit b5c75b68b7ded84d4c82118974ce3975a4dcaa74. The commit makes it impossible to select configuration options that depend on COMEDI_8254, COMEDI_DAS08, COMEDI_NI_LABPC, or COMEDI_AMPLC_DIO200 options due to changing 'select' directives to 'depends on' directives and there being no other way to select those codependent configuration options. Fixes: b5c75b68b7de ("comedi: add HAS_IOPORT dependencies") Cc: Niklas Schnelle Cc: Arnd Bergmann Cc: # v6.5+ Acked-by: Arnd Bergmann Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20230905090922.3314-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/Kconfig | 103 ++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 68 deletions(-) diff --git a/drivers/comedi/Kconfig b/drivers/comedi/Kconfig index 7a8d402f05be..9af280735cba 100644 --- a/drivers/comedi/Kconfig +++ b/drivers/comedi/Kconfig @@ -67,7 +67,6 @@ config COMEDI_TEST config COMEDI_PARPORT tristate "Parallel port support" - depends on HAS_IOPORT help Enable support for the standard parallel port. A cheap and easy way to get a few more digital I/O lines. Steal @@ -80,7 +79,6 @@ config COMEDI_PARPORT config COMEDI_SSV_DNP tristate "SSV Embedded Systems DIL/Net-PC support" depends on X86_32 || COMPILE_TEST - depends on HAS_IOPORT help Enable support for SSV Embedded Systems DIL/Net-PC @@ -91,7 +89,6 @@ endif # COMEDI_MISC_DRIVERS menuconfig COMEDI_ISA_DRIVERS bool "Comedi ISA and PC/104 drivers" - depends on ISA help Enable comedi ISA and PC/104 drivers to be built @@ -103,8 +100,7 @@ if COMEDI_ISA_DRIVERS config COMEDI_PCL711 tristate "Advantech PCL-711/711b and ADlink ACL-8112 ISA card support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for Advantech PCL-711 and 711b, ADlink ACL-8112 @@ -165,9 +161,8 @@ config COMEDI_PCL730 config COMEDI_PCL812 tristate "Advantech PCL-812/813 and ADlink ACL-8112/8113/8113/8216" - depends on HAS_IOPORT select COMEDI_ISADMA if ISA_DMA_API - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for Advantech PCL-812/PG, PCL-813/B, ADLink ACL-8112DG/HG/PG, ACL-8113, ACL-8216, ICP DAS A-821PGH/PGL/PGL-NDA, @@ -178,9 +173,8 @@ config COMEDI_PCL812 config COMEDI_PCL816 tristate "Advantech PCL-814 and PCL-816 ISA card support" - depends on HAS_IOPORT select COMEDI_ISADMA if ISA_DMA_API - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for Advantech PCL-814 and PCL-816 ISA cards @@ -189,9 +183,8 @@ config COMEDI_PCL816 config COMEDI_PCL818 tristate "Advantech PCL-718 and PCL-818 ISA card support" - depends on HAS_IOPORT select COMEDI_ISADMA if ISA_DMA_API - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for Advantech PCL-818 ISA cards PCL-818L, PCL-818H, PCL-818HD, PCL-818HG, PCL-818 and PCL-718 @@ -210,7 +203,7 @@ config COMEDI_PCM3724 config COMEDI_AMPLC_DIO200_ISA tristate "Amplicon PC212E/PC214E/PC215E/PC218E/PC272E" - depends on COMEDI_AMPLC_DIO200 + select COMEDI_AMPLC_DIO200 help Enable support for Amplicon PC212E, PC214E, PC215E, PC218E and PC272E ISA DIO boards @@ -262,8 +255,7 @@ config COMEDI_DAC02 config COMEDI_DAS16M1 tristate "MeasurementComputing CIO-DAS16/M1DAS-16 ISA card support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 select COMEDI_8255 help Enable support for Measurement Computing CIO-DAS16/M1 ISA cards. @@ -273,7 +265,7 @@ config COMEDI_DAS16M1 config COMEDI_DAS08_ISA tristate "DAS-08 compatible ISA and PC/104 card support" - depends on COMEDI_DAS08 + select COMEDI_DAS08 help Enable support for Keithley Metrabyte/ComputerBoards DAS08 and compatible ISA and PC/104 cards: @@ -286,9 +278,8 @@ config COMEDI_DAS08_ISA config COMEDI_DAS16 tristate "DAS-16 compatible ISA and PC/104 card support" - depends on HAS_IOPORT select COMEDI_ISADMA if ISA_DMA_API - depends on COMEDI_8254 + select COMEDI_8254 select COMEDI_8255 help Enable support for Keithley Metrabyte/ComputerBoards DAS16 @@ -305,8 +296,7 @@ config COMEDI_DAS16 config COMEDI_DAS800 tristate "DAS800 and compatible ISA card support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for Keithley Metrabyte DAS800 and compatible ISA cards Keithley Metrabyte DAS-800, DAS-801, DAS-802 @@ -318,9 +308,8 @@ config COMEDI_DAS800 config COMEDI_DAS1800 tristate "DAS1800 and compatible ISA card support" - depends on HAS_IOPORT select COMEDI_ISADMA if ISA_DMA_API - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for DAS1800 and compatible ISA cards Keithley Metrabyte DAS-1701ST, DAS-1701ST-DA, DAS-1701/AO, @@ -334,8 +323,7 @@ config COMEDI_DAS1800 config COMEDI_DAS6402 tristate "DAS6402 and compatible ISA card support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for DAS6402 and compatible ISA cards Computerboards, Keithley Metrabyte DAS6402 and compatibles @@ -414,8 +402,7 @@ config COMEDI_FL512 config COMEDI_AIO_AIO12_8 tristate "I/O Products PC/104 AIO12-8 Analog I/O Board support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 select COMEDI_8255 help Enable support for I/O Products PC/104 AIO12-8 Analog I/O Board @@ -469,9 +456,8 @@ config COMEDI_ADQ12B config COMEDI_NI_AT_A2150 tristate "NI AT-A2150 ISA card support" - depends on HAS_IOPORT select COMEDI_ISADMA if ISA_DMA_API - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for National Instruments AT-A2150 cards @@ -480,8 +466,7 @@ config COMEDI_NI_AT_A2150 config COMEDI_NI_AT_AO tristate "NI AT-AO-6/10 EISA card support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for National Instruments AT-AO-6/10 cards @@ -512,7 +497,7 @@ config COMEDI_NI_ATMIO16D config COMEDI_NI_LABPC_ISA tristate "NI Lab-PC and compatibles ISA support" - depends on COMEDI_NI_LABPC + select COMEDI_NI_LABPC help Enable support for National Instruments Lab-PC and compatibles Lab-PC-1200, Lab-PC-1200AI, Lab-PC+. @@ -576,7 +561,7 @@ endif # COMEDI_ISA_DRIVERS menuconfig COMEDI_PCI_DRIVERS tristate "Comedi PCI drivers" - depends on PCI && HAS_IOPORT + depends on PCI help Enable support for comedi PCI drivers. @@ -725,8 +710,7 @@ config COMEDI_ADL_PCI8164 config COMEDI_ADL_PCI9111 tristate "ADLink PCI-9111HR support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for ADlink PCI9111 cards @@ -736,7 +720,7 @@ config COMEDI_ADL_PCI9111 config COMEDI_ADL_PCI9118 tristate "ADLink PCI-9118DG, PCI-9118HG, PCI-9118HR support" depends on HAS_DMA - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for ADlink PCI-9118DG, PCI-9118HG, PCI-9118HR cards @@ -745,8 +729,7 @@ config COMEDI_ADL_PCI9118 config COMEDI_ADV_PCI1710 tristate "Advantech PCI-171x and PCI-1731 support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for Advantech PCI-1710, PCI-1710HG, PCI-1711, PCI-1713 and PCI-1731 @@ -790,8 +773,7 @@ config COMEDI_ADV_PCI1760 config COMEDI_ADV_PCI_DIO tristate "Advantech PCI DIO card support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 select COMEDI_8255 help Enable support for Advantech PCI DIO cards @@ -804,7 +786,7 @@ config COMEDI_ADV_PCI_DIO config COMEDI_AMPLC_DIO200_PCI tristate "Amplicon PCI215/PCI272/PCIe215/PCIe236/PCIe296 DIO support" - depends on COMEDI_AMPLC_DIO200 + select COMEDI_AMPLC_DIO200 help Enable support for Amplicon PCI215, PCI272, PCIe215, PCIe236 and PCIe296 DIO boards. @@ -832,8 +814,7 @@ config COMEDI_AMPLC_PC263_PCI config COMEDI_AMPLC_PCI224 tristate "Amplicon PCI224 and PCI234 support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for Amplicon PCI224 and PCI234 AO boards @@ -842,8 +823,7 @@ config COMEDI_AMPLC_PCI224 config COMEDI_AMPLC_PCI230 tristate "Amplicon PCI230 and PCI260 support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 select COMEDI_8255 help Enable support for Amplicon PCI230 and PCI260 Multifunction I/O @@ -862,7 +842,7 @@ config COMEDI_CONTEC_PCI_DIO config COMEDI_DAS08_PCI tristate "DAS-08 PCI support" - depends on COMEDI_DAS08 + select COMEDI_DAS08 help Enable support for PCI DAS-08 cards. @@ -949,8 +929,7 @@ config COMEDI_CB_PCIDAS64 config COMEDI_CB_PCIDAS tristate "MeasurementComputing PCI-DAS support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 select COMEDI_8255 help Enable support for ComputerBoards/MeasurementComputing PCI-DAS with @@ -974,8 +953,7 @@ config COMEDI_CB_PCIDDA config COMEDI_CB_PCIMDAS tristate "MeasurementComputing PCIM-DAS1602/16, PCIe-DAS1602/16 support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 select COMEDI_8255 help Enable support for ComputerBoards/MeasurementComputing PCI Migration @@ -995,8 +973,7 @@ config COMEDI_CB_PCIMDDA config COMEDI_ME4000 tristate "Meilhaus ME-4000 support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for Meilhaus PCI data acquisition cards ME-4650, ME-4670i, ME-4680, ME-4680i and ME-4680is @@ -1054,7 +1031,7 @@ config COMEDI_NI_670X config COMEDI_NI_LABPC_PCI tristate "NI Lab-PC PCI-1200 support" - depends on COMEDI_NI_LABPC + select COMEDI_NI_LABPC help Enable support for National Instruments Lab-PC PCI-1200. @@ -1076,7 +1053,6 @@ config COMEDI_NI_PCIDIO config COMEDI_NI_PCIMIO tristate "NI PCI-MIO-E series and M series support" depends on HAS_DMA - depends on HAS_IOPORT select COMEDI_NI_TIOCMD select COMEDI_8255 help @@ -1098,8 +1074,7 @@ config COMEDI_NI_PCIMIO config COMEDI_RTD520 tristate "Real Time Devices PCI4520/DM7520 support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for Real Time Devices PCI4520/DM7520 @@ -1139,8 +1114,7 @@ if COMEDI_PCMCIA_DRIVERS config COMEDI_CB_DAS16_CS tristate "CB DAS16 series PCMCIA support" - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 help Enable support for the ComputerBoards/MeasurementComputing PCMCIA cards DAS16/16, PCM-DAS16D/12 and PCM-DAS16s/16 @@ -1150,7 +1124,7 @@ config COMEDI_CB_DAS16_CS config COMEDI_DAS08_CS tristate "CB DAS08 PCMCIA support" - depends on COMEDI_DAS08 + select COMEDI_DAS08 help Enable support for the ComputerBoards/MeasurementComputing DAS-08 PCMCIA card @@ -1160,7 +1134,6 @@ config COMEDI_DAS08_CS config COMEDI_NI_DAQ_700_CS tristate "NI DAQCard-700 PCMCIA support" - depends on HAS_IOPORT help Enable support for the National Instruments PCMCIA DAQCard-700 DIO @@ -1169,7 +1142,6 @@ config COMEDI_NI_DAQ_700_CS config COMEDI_NI_DAQ_DIO24_CS tristate "NI DAQ-Card DIO-24 PCMCIA support" - depends on HAS_IOPORT select COMEDI_8255 help Enable support for the National Instruments PCMCIA DAQ-Card DIO-24 @@ -1179,7 +1151,7 @@ config COMEDI_NI_DAQ_DIO24_CS config COMEDI_NI_LABPC_CS tristate "NI DAQCard-1200 PCMCIA support" - depends on COMEDI_NI_LABPC + select COMEDI_NI_LABPC help Enable support for the National Instruments PCMCIA DAQCard-1200 @@ -1188,7 +1160,6 @@ config COMEDI_NI_LABPC_CS config COMEDI_NI_MIO_CS tristate "NI DAQCard E series PCMCIA support" - depends on HAS_IOPORT select COMEDI_NI_TIO select COMEDI_8255 help @@ -1201,7 +1172,6 @@ config COMEDI_NI_MIO_CS config COMEDI_QUATECH_DAQP_CS tristate "Quatech DAQP PCMCIA data capture card support" - depends on HAS_IOPORT help Enable support for the Quatech DAQP PCMCIA data capture cards DAQP-208 and DAQP-308 @@ -1278,14 +1248,12 @@ endif # COMEDI_USB_DRIVERS config COMEDI_8254 tristate - depends on HAS_IOPORT config COMEDI_8255 tristate config COMEDI_8255_SA tristate "Standalone 8255 support" - depends on HAS_IOPORT select COMEDI_8255 help Enable support for 8255 digital I/O as a standalone driver. @@ -1317,7 +1285,7 @@ config COMEDI_KCOMEDILIB called kcomedilib. config COMEDI_AMPLC_DIO200 - depends on COMEDI_8254 + select COMEDI_8254 tristate config COMEDI_AMPLC_PC236 @@ -1326,7 +1294,7 @@ config COMEDI_AMPLC_PC236 config COMEDI_DAS08 tristate - depends on COMEDI_8254 + select COMEDI_8254 select COMEDI_8255 config COMEDI_ISADMA @@ -1334,8 +1302,7 @@ config COMEDI_ISADMA config COMEDI_NI_LABPC tristate - depends on HAS_IOPORT - depends on COMEDI_8254 + select COMEDI_8254 select COMEDI_8255 config COMEDI_NI_LABPC_ISADMA From fd6f7ad2fd4d53fa14f4fd190f9b05d043973892 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 28 Aug 2023 17:58:24 +0300 Subject: [PATCH 110/333] driver core: return an error when dev_set_name() hasn't happened The commit d21fdd07cea4 ("driver core: Return proper error code when dev_set_name() fails") rewrote the logic of handling the dev_set_name() error codes, but missed the point that initially set error value to -EINVAL might be rewritten and hence the error path can't be triggered at some circumstances. To fix this, make sure that error variable is set to -EINVAL when other conditionals are false. Reported-by: syzbot+bdfb03b1ec8b342c12cb@syzkaller.appspotmail.com Fixes: d21fdd07cea4 ("driver core: Return proper error code when dev_set_name() fails") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230828145824.3895288-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index b7d7f410c256..4d8b315c48a1 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3537,6 +3537,8 @@ int device_add(struct device *dev) /* subsystems can specify simple device enumeration */ else if (dev->bus && dev->bus->dev_name) error = dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id); + else + error = -EINVAL; if (error) goto name_error; From 4eb94a7793074f799b1f558471019e9a21fa9546 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Sep 2023 22:59:28 -0700 Subject: [PATCH 111/333] selftests/bpf: ensure all CI arches set CONFIG_BPF_KPROBE_OVERRIDE=y Turns out CONFIG_BPF_KPROBE_OVERRIDE=y is only enabled in x86-64 CI, but is not set on aarch64, causing CI failures ([0]). Move CONFIG_BPF_KPROBE_OVERRIDE=y to arch-agnostic CI config. [0] https://github.com/kernel-patches/bpf/actions/runs/6122324047/job/16618390535 Fixes: 7182e56411b9 ("selftests/bpf: Add kprobe_multi override test") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230912055928.1704269-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/config | 1 + tools/testing/selftests/bpf/config.x86_64 | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 1c7584e8dd9e..e41eb33b2704 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -4,6 +4,7 @@ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_BPF=y CONFIG_BPF_EVENTS=y CONFIG_BPF_JIT=y +CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_BPF_LIRC_MODE2=y CONFIG_BPF_LSM=y CONFIG_BPF_STREAM_PARSER=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index b650b2e617b8..2e70a6048278 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -20,7 +20,6 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BONDING=y CONFIG_BOOTTIME_TRACING=y CONFIG_BPF_JIT_ALWAYS_ON=y -CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_BPF_PRELOAD=y CONFIG_BPF_PRELOAD_UMD=y CONFIG_BPFILTER=y From 7e021da80f48582171029714f8a487347f29dddb Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 12 Sep 2023 10:10:39 +0900 Subject: [PATCH 112/333] selftests: tracing: Fix to unmount tracefs for recovering environment Fix to unmount the tracefs if the ftracetest mounted it for recovering system environment. If the tracefs is already mounted, this does nothing. Suggested-by: Mark Brown Link: https://lore.kernel.org/all/29fce076-746c-4650-8358-b4e0fa215cf7@sirena.org.uk/ Fixes: cbd965bde74c ("ftrace/selftests: Return the skip code when tracing directory not configured in kernel") Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Reviewed-by: Mark Brown Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/ftracetest | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 7df8baa0f98f..c778d4dcc17e 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -31,6 +31,9 @@ err_ret=1 # kselftest skip code is 4 err_skip=4 +# umount required +UMOUNT_DIR="" + # cgroup RT scheduling prevents chrt commands from succeeding, which # induces failures in test wakeup tests. Disable for the duration of # the tests. @@ -45,6 +48,9 @@ setup() { cleanup() { echo $sched_rt_runtime_orig > $sched_rt_runtime + if [ -n "${UMOUNT_DIR}" ]; then + umount ${UMOUNT_DIR} ||: + fi } errexit() { # message @@ -161,11 +167,13 @@ if [ -z "$TRACING_DIR" ]; then mount -t tracefs nodev /sys/kernel/tracing || errexit "Failed to mount /sys/kernel/tracing" TRACING_DIR="/sys/kernel/tracing" + UMOUNT_DIR=${TRACING_DIR} # If debugfs exists, then so does /sys/kernel/debug elif [ -d "/sys/kernel/debug" ]; then mount -t debugfs nodev /sys/kernel/debug || errexit "Failed to mount /sys/kernel/debug" TRACING_DIR="/sys/kernel/debug/tracing" + UMOUNT_DIR=${TRACING_DIR} else err_ret=$err_skip errexit "debugfs and tracefs are not configured in this kernel" From 25e73b7e3f72a25aa30cbb2eecb49036e0acf066 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Aug 2023 12:55:46 +0200 Subject: [PATCH 113/333] x86/ibt: Suppress spurious ENDBR It was reported that under certain circumstances GCC emits ENDBR instructions for _THIS_IP_ usage. Specifically, when it appears at the start of a basic block -- but not elsewhere. Since _THIS_IP_ is never used for control flow, these ENDBR instructions are completely superfluous. Override the _THIS_IP_ definition for x86_64 to avoid this. Less ENDBR instructions is better. Fixes: 156ff4a544ae ("x86/ibt: Base IBT bits") Reported-by: David Kaplan Reviewed-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230802110323.016197440@infradead.org --- arch/x86/include/asm/linkage.h | 8 ++++++++ include/linux/instruction_pointer.h | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 97a3de7892d3..5ff49fd67732 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -8,6 +8,14 @@ #undef notrace #define notrace __attribute__((no_instrument_function)) +#ifdef CONFIG_64BIT +/* + * The generic version tends to create spurious ENDBR instructions under + * certain conditions. + */ +#define _THIS_IP_ ({ unsigned long __here; asm ("lea 0(%%rip), %0" : "=r" (__here)); __here; }) +#endif + #ifdef CONFIG_X86_32 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) #endif /* CONFIG_X86_32 */ diff --git a/include/linux/instruction_pointer.h b/include/linux/instruction_pointer.h index cda1f706eaeb..aa0b3ffea935 100644 --- a/include/linux/instruction_pointer.h +++ b/include/linux/instruction_pointer.h @@ -2,7 +2,12 @@ #ifndef _LINUX_INSTRUCTION_POINTER_H #define _LINUX_INSTRUCTION_POINTER_H +#include + #define _RET_IP_ (unsigned long)__builtin_return_address(0) + +#ifndef _THIS_IP_ #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) +#endif #endif /* _LINUX_INSTRUCTION_POINTER_H */ From 7575e5a35267983dcbeb1e0d3a49d21ae3cf0b82 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Aug 2023 12:55:47 +0200 Subject: [PATCH 114/333] x86/ibt: Avoid duplicate ENDBR in __put_user_nocheck*() Commit cb855971d717 ("x86/putuser: Provide room for padding") changed __put_user_nocheck_*() into proper functions but failed to note that SYM_FUNC_START() already provides ENDBR, rendering the explicit ENDBR superfluous. Fixes: cb855971d717 ("x86/putuser: Provide room for padding") Reported-by: David Kaplan Reviewed-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230802110323.086971726@infradead.org --- arch/x86/lib/putuser.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 1451e0c4ae22..235bbda6fc82 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -56,7 +56,6 @@ SYM_FUNC_END(__put_user_1) EXPORT_SYMBOL(__put_user_1) SYM_FUNC_START(__put_user_nocheck_1) - ENDBR ASM_STAC 2: movb %al,(%_ASM_CX) xor %ecx,%ecx @@ -76,7 +75,6 @@ SYM_FUNC_END(__put_user_2) EXPORT_SYMBOL(__put_user_2) SYM_FUNC_START(__put_user_nocheck_2) - ENDBR ASM_STAC 4: movw %ax,(%_ASM_CX) xor %ecx,%ecx @@ -96,7 +94,6 @@ SYM_FUNC_END(__put_user_4) EXPORT_SYMBOL(__put_user_4) SYM_FUNC_START(__put_user_nocheck_4) - ENDBR ASM_STAC 6: movl %eax,(%_ASM_CX) xor %ecx,%ecx @@ -119,7 +116,6 @@ SYM_FUNC_END(__put_user_8) EXPORT_SYMBOL(__put_user_8) SYM_FUNC_START(__put_user_nocheck_8) - ENDBR ASM_STAC 9: mov %_ASM_AX,(%_ASM_CX) #ifdef CONFIG_X86_32 From dad651b2a44eb6b201738f810254279dca29d30d Mon Sep 17 00:00:00 2001 From: Pratyush Yadav Date: Tue, 12 Sep 2023 17:52:49 +0200 Subject: [PATCH 115/333] nvme-pci: do not set the NUMA node of device if it has none If a device has no NUMA node information associated with it, the driver puts the device in node first_memory_node (say node 0). Not having a NUMA node and being associated with node 0 are completely different things and it makes little sense to mix the two. Signed-off-by: Pratyush Yadav Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index baf69af7ea78..f5ba2d7102ea 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2916,9 +2916,6 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, struct nvme_dev *dev; int ret = -ENOMEM; - if (node == NUMA_NO_NODE) - set_dev_node(&pdev->dev, first_memory_node); - dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); if (!dev) return ERR_PTR(-ENOMEM); From 18789be8e0d9fbb78b2290dcf93f500726ed19f0 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 12 Sep 2023 14:38:41 +0100 Subject: [PATCH 116/333] ASoC: cs35l56: Disable low-power hibernation mode Do not allow the CS35L56 to be put into its lowest power "hibernation" mode. This only affects I2C because "hibernation" is already disabled on SPI and SoundWire. Recent firmwares need a different wake-up sequence. Until that sequence has been specified, the chip "hibernation" mode must be disabled otherwise it can intermittently fail to wake. THIS WILL NOT APPLY CLEANLY TO 6.5 AND EARLIER: We will send a separate backport patch to stable. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230912133841.3480466-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-i2c.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/cs35l56-i2c.c b/sound/soc/codecs/cs35l56-i2c.c index 9f4f2f4f23f5..d10e0e2380e8 100644 --- a/sound/soc/codecs/cs35l56-i2c.c +++ b/sound/soc/codecs/cs35l56-i2c.c @@ -27,7 +27,6 @@ static int cs35l56_i2c_probe(struct i2c_client *client) return -ENOMEM; cs35l56->base.dev = dev; - cs35l56->base.can_hibernate = true; i2c_set_clientdata(client, cs35l56); cs35l56->base.regmap = devm_regmap_init_i2c(client, regmap_config); From 05d0f8f55ad60854cb706798da94276a33590445 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 12 Sep 2023 14:08:36 -0500 Subject: [PATCH 117/333] smb3: move server check earlier when setting channel sequence number Smatch warning pointed out by Dan Carpenter: fs/smb/client/smb2pdu.c:105 smb2_hdr_assemble() warn: variable dereferenced before check 'server' (see line 95) Fixes: 09ee7a3bf866 ("[SMB3] send channel sequence number in SMB3 requests after reconnects") Reported-by: Dan Carpenter Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 092b0087c9dc..3403188e3100 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -92,17 +92,22 @@ smb2_hdr_assemble(struct smb2_hdr *shdr, __le16 smb2_cmd, shdr->ProtocolId = SMB2_PROTO_NUMBER; shdr->StructureSize = cpu_to_le16(64); shdr->Command = smb2_cmd; - if (server->dialect >= SMB30_PROT_ID) { - /* After reconnect SMB3 must set ChannelSequence on subsequent reqs */ - smb3_hdr = (struct smb3_hdr_req *)shdr; - /* if primary channel is not set yet, use default channel for chan sequence num */ - if (SERVER_IS_CHAN(server)) - smb3_hdr->ChannelSequence = - cpu_to_le16(server->primary_server->channel_sequence_num); - else - smb3_hdr->ChannelSequence = cpu_to_le16(server->channel_sequence_num); - } + if (server) { + /* After reconnect SMB3 must set ChannelSequence on subsequent reqs */ + if (server->dialect >= SMB30_PROT_ID) { + smb3_hdr = (struct smb3_hdr_req *)shdr; + /* + * if primary channel is not set yet, use default + * channel for chan sequence num + */ + if (SERVER_IS_CHAN(server)) + smb3_hdr->ChannelSequence = + cpu_to_le16(server->primary_server->channel_sequence_num); + else + smb3_hdr->ChannelSequence = + cpu_to_le16(server->channel_sequence_num); + } spin_lock(&server->req_lock); /* Request up to 10 credits but don't go over the limit. */ if (server->credits >= server->max_credits) From a8f12572860ad8ba659d96eee9cf09e181f6ebcc Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 8 Sep 2023 18:33:35 +0200 Subject: [PATCH 118/333] bpf: Fix a erroneous check after snprintf() snprintf() does not return negative error code on error, it returns the number of characters which *would* be generated for the given input. Fix the error handling check. Fixes: 57539b1c0ac2 ("bpf: Enable annotating trusted nested pointers") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/393bdebc87b22563c08ace094defa7160eb7a6c0.1694190795.git.christophe.jaillet@wanadoo.fr Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 1095bbe29859..8090d7fb11ef 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -8501,7 +8501,7 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, tname = btf_name_by_offset(btf, walk_type->name_off); ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix); - if (ret < 0) + if (ret >= sizeof(safe_tname)) return false; safe_id = btf_find_by_name_kind(btf, safe_tname, BTF_INFO_KIND(walk_type->info)); From d128860dbb29cafc3c65ca2d22082745a32829dd Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 12 Sep 2023 14:06:31 +0200 Subject: [PATCH 119/333] selftests/bpf: fix unpriv_disabled check in test_verifier Commit 1d56ade032a49 changed the function get_unpriv_disabled() to return its results as a bool instead of updating a global variable, but test_verifier was not updated to keep in line with these changes. Thus unpriv_disabled is always false in test_verifier and unprivileged tests are not properly skipped on systems with unprivileged bpf disabled. Fixes: 1d56ade032a49 ("selftests/bpf: Unprivileged tests for test_loader.c") Signed-off-by: Artem Savkov Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230912120631.213139-1-asavkov@redhat.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 31f1c935cd07..98107e0452d3 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1880,7 +1880,7 @@ int main(int argc, char **argv) } } - get_unpriv_disabled(); + unpriv_disabled = get_unpriv_disabled(); if (unpriv && unpriv_disabled) { printf("Cannot run as unprivileged user with sysctl %s.\n", UNPRIV_SYSCTL); From 214bfd267f4929722b374b43fda456c21cd6f016 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 11 Sep 2023 23:08:12 -0700 Subject: [PATCH 120/333] bpf, cgroup: fix multiple kernel-doc warnings Fix missing or extra function parameter kernel-doc warnings in cgroup.c: kernel/bpf/cgroup.c:1359: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_skb' kernel/bpf/cgroup.c:1359: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_skb' kernel/bpf/cgroup.c:1439: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_sk' kernel/bpf/cgroup.c:1439: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_sk' kernel/bpf/cgroup.c:1467: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_sock_addr' kernel/bpf/cgroup.c:1467: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_sock_addr' kernel/bpf/cgroup.c:1512: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_sock_ops' kernel/bpf/cgroup.c:1512: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_sock_ops' kernel/bpf/cgroup.c:1685: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_sysctl' kernel/bpf/cgroup.c:1685: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_sysctl' kernel/bpf/cgroup.c:795: warning: Excess function parameter 'type' description in '__cgroup_bpf_replace' kernel/bpf/cgroup.c:795: warning: Function parameter or member 'new_prog' not described in '__cgroup_bpf_replace' Signed-off-by: Randy Dunlap Cc: Martin KaFai Lau Cc: bpf@vger.kernel.org Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Andrii Nakryiko Link: https://lore.kernel.org/r/20230912060812.1715-1-rdunlap@infradead.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/cgroup.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 5b2741aa0d9b..03b3d4492980 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -785,7 +785,8 @@ found: * to descendants * @cgrp: The cgroup which descendants to traverse * @link: A link for which to replace BPF program - * @type: Type of attach operation + * @new_prog: &struct bpf_prog for the target BPF program with its refcnt + * incremented * * Must be called with cgroup_mutex held. */ @@ -1334,7 +1335,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering * @sk: The socket sending or receiving traffic * @skb: The skb that is being sent or received - * @type: The type of program to be executed + * @atype: The type of program to be executed * * If no socket is passed, or the socket is not of type INET or INET6, * this function does nothing and returns 0. @@ -1424,7 +1425,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); /** * __cgroup_bpf_run_filter_sk() - Run a program on a sock * @sk: sock structure to manipulate - * @type: The type of program to be executed + * @atype: The type of program to be executed * * socket is passed is expected to be of type INET or INET6. * @@ -1449,7 +1450,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); * provided by user sockaddr * @sk: sock struct that will use sockaddr * @uaddr: sockaddr struct provided by user - * @type: The type of program to be executed + * @atype: The type of program to be executed * @t_ctx: Pointer to attach type specific context * @flags: Pointer to u32 which contains higher bits of BPF program * return value (OR'ed together). @@ -1496,7 +1497,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains * sk with connection information (IP addresses, etc.) May not contain * cgroup info if it is a req sock. - * @type: The type of program to be executed + * @atype: The type of program to be executed * * socket passed is expected to be of type INET or INET6. * @@ -1670,7 +1671,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { * @ppos: value-result argument: value is position at which read from or write * to sysctl is happening, result is new position if program overrode it, * initial value otherwise - * @type: type of program to be executed + * @atype: type of program to be executed * * Program is run when sysctl is being accessed, either read or written, and * can allow or deny such access. From 1bfb2b618d52e59a4ef1896b46c4698ad2be66b7 Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Wed, 6 Sep 2023 17:58:17 +0800 Subject: [PATCH 121/333] riscv: kexec: Align the kexeced kernel entry The current riscv boot protocol requires 2MB alignment for RV64 and 4MB alignment for RV32. In KEXEC_FILE path, the elf_find_pbase() function should align the kexeced kernel entry according to the requirement, otherwise the kexeced kernel would silently BUG at the setup_vm(). Fixes: 8acea455fafa ("RISC-V: Support for kexec_file on panic") Signed-off-by: Song Shuai Link: https://lore.kernel.org/r/20230906095817.364390-1-songshuaishuai@tinylab.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/elf_kexec.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index f4099059ed8f..e60fbd8660c4 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -98,7 +98,13 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, kbuf.image = image; kbuf.buf_min = lowest_paddr; kbuf.buf_max = ULONG_MAX; - kbuf.buf_align = PAGE_SIZE; + + /* + * Current riscv boot protocol requires 2MB alignment for + * RV64 and 4MB alignment for RV32 + * + */ + kbuf.buf_align = PMD_SIZE; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); kbuf.top_down = false; From 8eb8fe67e2c84324398f5983c41b4f831d0705b3 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Tue, 12 Sep 2023 15:24:10 +0800 Subject: [PATCH 122/333] riscv: errata: fix T-Head dcache.cva encoding The dcache.cva encoding shown in the comments are wrong, it's for dcache.cval1 (which is restricted to L1) instead. Fix this in the comment and in the hardcoded instruction. Signed-off-by: Icenowy Zheng Tested-by: Sergey Matyukevich Reviewed-by: Heiko Stuebner Reviewed-by: Guo Ren Tested-by: Drew Fustini Link: https://lore.kernel.org/r/20230912072410.2481-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/errata_list.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index e2ecd01bfac7..b55b434f0059 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -105,7 +105,7 @@ asm volatile(ALTERNATIVE( \ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | * 0000001 01001 rs1 000 00000 0001011 * dcache.cva rs1 (clean, virtual address) - * 0000001 00100 rs1 000 00000 0001011 + * 0000001 00101 rs1 000 00000 0001011 * * dcache.cipa rs1 (clean then invalidate, physical address) * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | @@ -118,7 +118,7 @@ asm volatile(ALTERNATIVE( \ * 0000000 11001 00000 000 00000 0001011 */ #define THEAD_inval_A0 ".long 0x0265000b" -#define THEAD_clean_A0 ".long 0x0245000b" +#define THEAD_clean_A0 ".long 0x0255000b" #define THEAD_flush_A0 ".long 0x0275000b" #define THEAD_SYNC_S ".long 0x0190000b" From ccf1dab96be4caed7c5235b1cfdb606ac161b996 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Mon, 11 Sep 2023 16:23:58 +0200 Subject: [PATCH 123/333] selinux: fix handling of empty opts in selinux_fs_context_submount() selinux_set_mnt_opts() relies on the fact that the mount options pointer is always NULL when all options are unset (specifically in its !selinux_initialized() branch. However, the new selinux_fs_context_submount() hook breaks this rule by allocating a new structure even if no options are set. That causes any submount created before a SELinux policy is loaded to be rejected in selinux_set_mnt_opts(). Fix this by making selinux_fs_context_submount() leave fc->security set to NULL when there are no options to be copied from the reference superblock. Cc: Reported-by: Adam Williamson Link: https://bugzilla.redhat.com/show_bug.cgi?id=2236345 Fixes: d80a8f1b58c2 ("vfs, security: Fix automount superblock LSM init problem, preventing NFS sb sharing") Signed-off-by: Ondrej Mosnacek Reviewed-by: Jeff Layton Signed-off-by: Paul Moore --- security/selinux/hooks.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 10350534de6d..2aa0e219d721 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2775,14 +2775,20 @@ static int selinux_umount(struct vfsmount *mnt, int flags) static int selinux_fs_context_submount(struct fs_context *fc, struct super_block *reference) { - const struct superblock_security_struct *sbsec; + const struct superblock_security_struct *sbsec = selinux_superblock(reference); struct selinux_mnt_opts *opts; + /* + * Ensure that fc->security remains NULL when no options are set + * as expected by selinux_set_mnt_opts(). + */ + if (!(sbsec->flags & (FSCONTEXT_MNT|CONTEXT_MNT|DEFCONTEXT_MNT))) + return 0; + opts = kzalloc(sizeof(*opts), GFP_KERNEL); if (!opts) return -ENOMEM; - sbsec = selinux_superblock(reference); if (sbsec->flags & FSCONTEXT_MNT) opts->fscontext_sid = sbsec->sid; if (sbsec->flags & CONTEXT_MNT) From edcfe22985d09ee8e2346c9217f5a52ab150099f Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Mon, 11 Sep 2023 14:49:06 -0400 Subject: [PATCH 124/333] drm/amdkfd: Insert missing TLB flush on GFX10 and later Heavy-weight TLB flush is required after unmap on all GPUs for correctness and security. Signed-off-by: Harish Kasiviswanathan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 3d9ce44d88da..fa24e1852493 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1466,8 +1466,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) { - return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) || + return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) || (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0); } From 139e08188babf7a4c5f0df54b605105852fc347a Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 12 Sep 2023 11:06:56 -0700 Subject: [PATCH 125/333] Documentation: embargoed-hardware-issues.rst: Add myself for RISC-V I'm not sure exactly how RISC-V fits into the story here, but I'm happy to voluteer a sort of catch-all for vendors who aren't otherwise represented. Signed-off-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230912180657.31841-1-palmer@rivosinc.com Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index cb686238f21d..ac7c52f130c9 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -251,6 +251,7 @@ an involved disclosed party. The current ambassadors list: IBM Z Christian Borntraeger Intel Tony Luck Qualcomm Trilok Soni + RISC-V Palmer Dabbelt Samsung Javier González Microsoft James Morris From 0342518b0c15481dd4359b499301711b2f9a796c Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 12 Sep 2023 14:27:39 +0100 Subject: [PATCH 126/333] ALSA: hda: cs35l56: Disable low-power hibernation mode Do not allow the CS35L56 to be put into its lowest power "hibernation" mode. This only affects I2C because "hibernation" is already disabled on SPI. Recent firmwares need a different wake-up sequence. Until that sequence has been specified, the chip "hibernation" mode must be disabled otherwise it can intermittently fail to wake. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230912132739.3478441-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda_i2c.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/hda/cs35l56_hda_i2c.c b/sound/pci/hda/cs35l56_hda_i2c.c index 83e4acdd89ac..757a4d193e0f 100644 --- a/sound/pci/hda/cs35l56_hda_i2c.c +++ b/sound/pci/hda/cs35l56_hda_i2c.c @@ -21,7 +21,6 @@ static int cs35l56_hda_i2c_probe(struct i2c_client *clt) return -ENOMEM; cs35l56->base.dev = &clt->dev; - cs35l56->base.can_hibernate = true; cs35l56->base.regmap = devm_regmap_init_i2c(clt, &cs35l56_regmap_i2c); if (IS_ERR(cs35l56->base.regmap)) { ret = PTR_ERR(cs35l56->base.regmap); From 485ddd519fbd89a9d9ac4b02be489e03cbbeebba Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 12 Sep 2023 19:26:17 +0300 Subject: [PATCH 127/333] ALSA: hda: intel-sdw-acpi: Use u8 type for link index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use consistently u8 for sdw link index. The id is limited to 4, u8 is adequate in size to store it. This change will also fixes the following compiler warning/error (W=1): sound/hda/intel-sdw-acpi.c: In function ‘sdw_intel_acpi_scan’: sound/hda/intel-sdw-acpi.c:34:35: error: ‘-subproperties’ directive output may be truncated writing 14 bytes into a region of size between 7 and 17 [-Werror=format-truncation=] 34 | "mipi-sdw-link-%d-subproperties", i); | ^~~~~~~~~~~~~~ In function ‘is_link_enabled’, inlined from ‘sdw_intel_scan_controller’ at sound/hda/intel-sdw-acpi.c:106:8, inlined from ‘sdw_intel_acpi_scan’ at sound/hda/intel-sdw-acpi.c:180:9: sound/hda/intel-sdw-acpi.c:33:9: note: ‘snprintf’ output between 30 and 40 bytes into a destination of size 32 33 | snprintf(name, sizeof(name), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ 34 | "mipi-sdw-link-%d-subproperties", i); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors The warnings got brought to light by a recent patch upstream: commit 6d4ab2e97dcf ("extrawarn: enable format and stringop overflow warnings in W=1") Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230912162617.29178-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-sdw-acpi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index 5cb92f7ccbca..b57d72ea4503 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -23,7 +23,7 @@ static int ctrl_link_mask; module_param_named(sdw_link_mask, ctrl_link_mask, int, 0444); MODULE_PARM_DESC(sdw_link_mask, "Intel link mask (one bit per link)"); -static bool is_link_enabled(struct fwnode_handle *fw_node, int i) +static bool is_link_enabled(struct fwnode_handle *fw_node, u8 idx) { struct fwnode_handle *link; char name[32]; @@ -31,7 +31,7 @@ static bool is_link_enabled(struct fwnode_handle *fw_node, int i) /* Find master handle */ snprintf(name, sizeof(name), - "mipi-sdw-link-%d-subproperties", i); + "mipi-sdw-link-%hhu-subproperties", idx); link = fwnode_get_named_child_node(fw_node, name); if (!link) @@ -51,8 +51,8 @@ static int sdw_intel_scan_controller(struct sdw_intel_acpi_info *info) { struct acpi_device *adev = acpi_fetch_acpi_dev(info->handle); - int ret, i; - u8 count; + u8 count, i; + int ret; if (!adev) return -EINVAL; From 07058dceb038a4b0dd49af07118b6b2a685bb4a6 Mon Sep 17 00:00:00 2001 From: Knyazev Arseniy Date: Wed, 13 Sep 2023 10:33:43 +0500 Subject: [PATCH 128/333] ALSA: hda/realtek: Splitting the UX3402 into two separate models UX3402VA and UX3402ZA models require different hex values, so comibining them into one model is incorrect. Fixes: 491a4ccd8a02 ("ALSA: hda/realtek: Add quirk for ASUS Zenbook using CS35L41") Signed-off-by: Knyazev Arseniy Link: https://lore.kernel.org/r/20230913053343.119798-1-poseaydone@ya.ru Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 887c1b163865..883a7e865bc5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9814,7 +9814,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1d1f, "ASUS ROG Strix G17 2023 (G713PV)", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), - SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), From 3a7d263aea9d505e6272a913d6cfece00b800b4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 5 Sep 2023 21:42:52 +0200 Subject: [PATCH 129/333] w1: ds2482: Switch back to use struct i2c_driver's .probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit b8a1a4cd5a98 ("i2c: Provide a temporary .probe_new() call-back type"), all drivers being converted to .probe_new() and then commit 03c835f498b5 ("i2c: Switch .probe() to not take an id parameter") convert back to (the new) .probe() to be able to eventually drop .probe_new() from struct i2c_driver. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/lkml/20230612072807.839689-1-u.kleine-koenig@pengutronix.de/ Signed-off-by: Uwe Kleine-König Signed-off-by: Wolfram Sang --- drivers/w1/masters/ds2482.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/w1/masters/ds2482.c b/drivers/w1/masters/ds2482.c index c1de8a92e144..b2d76c1784bd 100644 --- a/drivers/w1/masters/ds2482.c +++ b/drivers/w1/masters/ds2482.c @@ -551,7 +551,7 @@ static struct i2c_driver ds2482_driver = { .driver = { .name = "ds2482", }, - .probe_new = ds2482_probe, + .probe = ds2482_probe, .remove = ds2482_remove, .id_table = ds2482_id, }; From 5eb1e6e459cfa025f79c43014f66ff62a55542f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 5 Sep 2023 21:42:53 +0200 Subject: [PATCH 130/333] i2c: Drop legacy callback .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that all drivers are converted to the (new) .probe() callback, the temporary .probe_new() can go away. \o/ Link: https://lore.kernel.org/linux-i2c/20230626094548.559542-1-u.kleine-koenig@pengutronix.de Reviewed-by: Javier Martinez Canillas Reviewed-by: Jean Delvare Signed-off-by: Uwe Kleine-König Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 3430cc2b05a6..0dae9db27538 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -237,7 +237,6 @@ enum i2c_driver_flags { * struct i2c_driver - represent an I2C device driver * @class: What kind of i2c device we instantiate (for detect) * @probe: Callback for device binding - * @probe_new: Transitional callback for device binding - do not use * @remove: Callback for device unbinding * @shutdown: Callback for device shutdown * @alert: Alert callback, for example for the SMBus alert protocol @@ -272,16 +271,8 @@ enum i2c_driver_flags { struct i2c_driver { unsigned int class; - union { /* Standard driver model interfaces */ - int (*probe)(struct i2c_client *client); - /* - * Legacy callback that was part of a conversion of .probe(). - * Today it has the same semantic as .probe(). Don't use for new - * code. - */ - int (*probe_new)(struct i2c_client *client); - }; + int (*probe)(struct i2c_client *client); void (*remove)(struct i2c_client *client); From 24dc13f94367edb314b13923818d98dd565edc44 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 15 Aug 2023 17:29:11 +0200 Subject: [PATCH 131/333] i2c: Make I2C_ATR invisible I2C Address Translator (ATR) support is not a stand-alone driver, but a library. All of its users select I2C_ATR. Hence there is no need for the user to enable this symbol manually, except when compile-testing. Signed-off-by: Geert Uytterhoeven Reviewed-by: Luca Ceresoli Reviewed-by: Tomi Valkeinen Signed-off-by: Wolfram Sang --- drivers/i2c/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index c6d1a345ea6d..9388823bb0bb 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -72,7 +72,7 @@ config I2C_MUX source "drivers/i2c/muxes/Kconfig" config I2C_ATR - tristate "I2C Address Translator (ATR) support" + tristate "I2C Address Translator (ATR) support" if COMPILE_TEST help Enable support for I2C Address Translator (ATR) chips. From b2cacc2e818717545e6d0cc453b72f98249398bf Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 4 Sep 2023 14:00:36 +0200 Subject: [PATCH 132/333] i2c: I2C_MLXCPLD on ARM64 should depend on ACPI The "i2c_mlxcpld" platform device is only instantiated on X86 systems (through drivers/platform/x86/mlx-platform.c), or on ARM64 systems with ACPI (through drivers/platform/mellanox/nvsw-sn2201.c). Hence further restrict the dependency on ARM64 to ACPI, to prevent asking the user about this driver when configuring an ARM64 kernel without ACPI support. While at it, document in the Kconfig help text that the driver supports ARM64/ACPI based systems, too. Signed-off-by: Geert Uytterhoeven Acked-by: Vadim Pasternak Acked-by: Andi Shyti Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 169607e80331..6644eebedaf3 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -1384,10 +1384,10 @@ config I2C_ICY config I2C_MLXCPLD tristate "Mellanox I2C driver" - depends on X86_64 || ARM64 || COMPILE_TEST + depends on X86_64 || (ARM64 && ACPI) || COMPILE_TEST help This exposes the Mellanox platform I2C busses to the linux I2C layer - for X86 based systems. + for X86 and ARM64/ACPI based systems. Controller is implemented as CPLD logic. This driver can also be built as a module. If so, the module will be From fee465150b458351b6d9b9f66084f3cc3022b88b Mon Sep 17 00:00:00 2001 From: Tommy Huang Date: Wed, 6 Sep 2023 08:49:10 +0800 Subject: [PATCH 133/333] i2c: aspeed: Reset the i2c controller when timeout occurs Reset the i2c controller when an i2c transfer timeout occurs. The remaining interrupts and device should be reset to avoid unpredictable controller behavior. Fixes: 2e57b7cebb98 ("i2c: aspeed: Add multi-master use case support") Cc: # v5.1+ Signed-off-by: Tommy Huang Reviewed-by: Andi Shyti Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-aspeed.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 2e5acfeb76c8..5a416b39b818 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -698,13 +698,16 @@ static int aspeed_i2c_master_xfer(struct i2c_adapter *adap, if (time_left == 0) { /* - * If timed out and bus is still busy in a multi master - * environment, attempt recovery at here. + * In a multi-master setup, if a timeout occurs, attempt + * recovery. But if the bus is idle, we still need to reset the + * i2c controller to clear the remaining interrupts. */ if (bus->multi_master && (readl(bus->base + ASPEED_I2C_CMD_REG) & ASPEED_I2CD_BUS_BUSY_STS)) aspeed_i2c_recover_bus(bus); + else + aspeed_i2c_reset(bus); /* * If timed out and the state is still pending, drop the pending From 637f33a4fe864ac8636e22766d67210e801fcd0d Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Thu, 31 Aug 2023 14:32:53 +0530 Subject: [PATCH 134/333] i2c: cadence: Fix the kernel-doc warnings This fixes the below warnings drivers/i2c/busses/i2c-cadence.c:221: warning: Function parameter or member 'rinfo' not described in 'cdns_i2c' Reviewed-by: Andi Shyti Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308171510.bKHBcZQW-lkp@intel.com/ Signed-off-by: Shubhrajyoti Datta Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cadence.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 9849f4502570..de3f58b60dce 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -182,6 +182,7 @@ enum cdns_i2c_slave_state { * @reset: Reset control for the device * @quirks: flag for broken hold bit usage in r1p10 * @ctrl_reg: Cached value of the control register. + * @rinfo: I2C GPIO recovery information * @ctrl_reg_diva_divb: value of fields DIV_A and DIV_B from CR register * @slave: Registered slave instance. * @dev_mode: I2C operating role(master/slave). From 26f7111abd8e15726c93bafe16a349f1db2f14e0 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 13 Sep 2023 12:39:33 +0300 Subject: [PATCH 135/333] ALSA: usb-audio: mixer: Remove temporary string use in parse_clock_source_unit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kctl->id.name can be directly passed to snd_usb_copy_string_desc() and if the string has been fetched the suffix can be appended with the append_ctl_name() call. The temporary name string becomes redundant and can be removed. This change will also fixes the following compiler warning/error (W=1): sound/usb/mixer.c: In function ‘parse_audio_unit’: sound/usb/mixer.c:1972:29: error: ‘ Validity’ directive output may be truncated writing 9 bytes into a region of size between 1 and 44 [-Werror=format-truncation=] 1972 | "%s Validity", name); | ^~~~~~~~~ In function ‘parse_clock_source_unit’, inlined from ‘parse_audio_unit’ at sound/usb/mixer.c:2892:10: sound/usb/mixer.c:1971:17: note: ‘snprintf’ output between 10 and 53 bytes into a destination of size 44 1971 | snprintf(kctl->id.name, sizeof(kctl->id.name), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1972 | "%s Validity", name); | ~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors The warnings got brought to light by a recent patch upstream: commit 6d4ab2e97dcf ("extrawarn: enable format and stringop overflow warnings in W=1") Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230913093933.24564-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 9105ec623120..985b1aea9cdc 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1929,7 +1929,6 @@ static int parse_clock_source_unit(struct mixer_build *state, int unitid, struct uac_clock_source_descriptor *hdr = _ftr; struct usb_mixer_elem_info *cval; struct snd_kcontrol *kctl; - char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; int ret; if (state->mixer->protocol != UAC_VERSION_2) @@ -1966,10 +1965,9 @@ static int parse_clock_source_unit(struct mixer_build *state, int unitid, kctl->private_free = snd_usb_mixer_elem_free; ret = snd_usb_copy_string_desc(state->chip, hdr->iClockSource, - name, sizeof(name)); + kctl->id.name, sizeof(kctl->id.name)); if (ret > 0) - snprintf(kctl->id.name, sizeof(kctl->id.name), - "%s Validity", name); + append_ctl_name(kctl, " Validity"); else snprintf(kctl->id.name, sizeof(kctl->id.name), "Clock Source %d Validity", hdr->bClockID); From cf0ba445f5e4dd74c1e9d7a83ca721ba69204a11 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Sep 2023 11:18:22 +0300 Subject: [PATCH 136/333] ASoC: codecs: aw88395: Fix some error codes These error paths should return -EINVAL instead of success. Fixes: 7f4ec77802aa ("ASoC: codecs: Add code for bin parsing compatible with aw88261") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/81476e78-05c2-4656-b754-f314c7ccdb81@moroto.mountain Signed-off-by: Mark Brown --- sound/soc/codecs/aw88395/aw88395_lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/aw88395/aw88395_lib.c b/sound/soc/codecs/aw88395/aw88395_lib.c index 8ee1baa03269..87dd0ccade4c 100644 --- a/sound/soc/codecs/aw88395/aw88395_lib.c +++ b/sound/soc/codecs/aw88395/aw88395_lib.c @@ -452,11 +452,13 @@ static int aw_dev_parse_reg_bin_with_hdr(struct aw_device *aw_dev, if ((aw_bin->all_bin_parse_num != 1) || (aw_bin->header_info[0].bin_data_type != DATA_TYPE_REGISTER)) { dev_err(aw_dev->dev, "bin num or type error"); + ret = -EINVAL; goto parse_bin_failed; } if (aw_bin->header_info[0].valid_data_len % 4) { dev_err(aw_dev->dev, "bin data len get error!"); + ret = -EINVAL; goto parse_bin_failed; } From 450e749707bc1755f22b505d9cd942d4869dc535 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Thu, 7 Sep 2023 10:42:21 -0700 Subject: [PATCH 137/333] sched/fair: Fix SMT4 group_smt_balance handling For SMT4, any group with more than 2 tasks will be marked as group_smt_balance. Retain the behaviour of group_has_spare by marking the busiest group as the group which has the least number of idle_cpus. Also, handle rounding effect of adding (ncores_local + ncores_busy) when the local is fully idle and busy group imbalance is less than 2 tasks. Local group should try to pull at least 1 task in this case so imbalance should be set to 2 instead. Fixes: fee1759e4f04 ("sched/fair: Determine active load balance for SMT sched groups") Acked-by: Shrikanth Hegde Signed-off-by: Tim Chen Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: http://lkml.kernel.org/r/6cd1633036bb6b651af575c32c2a9608a106702c.camel@linux.intel.com --- kernel/sched/fair.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 33a2b6bba676..cb225921bbca 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9580,7 +9580,7 @@ static inline long sibling_imbalance(struct lb_env *env, imbalance /= ncores_local + ncores_busiest; /* Take advantage of resource in an empty sched group */ - if (imbalance == 0 && local->sum_nr_running == 0 && + if (imbalance <= 1 && local->sum_nr_running == 0 && busiest->sum_nr_running > 1) imbalance = 2; @@ -9768,6 +9768,15 @@ static bool update_sd_pick_busiest(struct lb_env *env, break; case group_smt_balance: + /* + * Check if we have spare CPUs on either SMT group to + * choose has spare or fully busy handling. + */ + if (sgs->idle_cpus != 0 || busiest->idle_cpus != 0) + goto has_spare; + + fallthrough; + case group_fully_busy: /* * Select the fully busy group with highest avg_load. In @@ -9807,6 +9816,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, else return true; } +has_spare: /* * Select not overloaded group with lowest number of idle cpus From 108af4b4bd3813610701379a58538e3339b162e4 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Mon, 14 Aug 2023 20:57:47 -0700 Subject: [PATCH 138/333] x86/sched: Restore the SD_ASYM_PACKING flag in the DIE domain Commit 8f2d6c41e5a6 ("x86/sched: Rewrite topology setup") dropped the SD_ASYM_PACKING flag in the DIE domain added in commit 044f0e27dec6 ("x86/sched: Add the SD_ASYM_PACKING flag to the die domain of hybrid processors"). Restore it on hybrid processors. The die-level domain does not depend on any build configuration and now x86_sched_itmt_flags() is always needed. Remove the build dependency on CONFIG_SCHED_[SMT|CLUSTER|MC]. Fixes: 8f2d6c41e5a6 ("x86/sched: Rewrite topology setup") Signed-off-by: Ricardo Neri Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Chen Yu Tested-by: Caleb Callaway Link: https://lkml.kernel.org/r/20230815035747.11529-1-ricardo.neri-calderon@linux.intel.com --- arch/x86/kernel/smpboot.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index d40ed3a7dc23..266d05e22ac3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -579,7 +579,6 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) } -#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_CLUSTER) || defined(CONFIG_SCHED_MC) static inline int x86_sched_itmt_flags(void) { return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0; @@ -603,7 +602,14 @@ static int x86_cluster_flags(void) return cpu_cluster_flags() | x86_sched_itmt_flags(); } #endif -#endif + +static int x86_die_flags(void) +{ + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) + return x86_sched_itmt_flags(); + + return 0; +} /* * Set if a package/die has multiple NUMA nodes inside. @@ -640,7 +646,7 @@ static void __init build_sched_topology(void) */ if (!x86_has_numa_in_package) { x86_topology[i++] = (struct sched_domain_topology_level){ - cpu_cpu_mask, SD_INIT_NAME(DIE) + cpu_cpu_mask, x86_die_flags, SD_INIT_NAME(DIE) }; } From 41dac81b56c82c51a6d00fda5f3af7691ffee2d7 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 13 Sep 2023 16:00:10 +0100 Subject: [PATCH 139/333] ASoC: cs42l42: Ensure a reset pulse meets minimum pulse width. The CS42L42 can accept very short reset pulses of a few microseconds but there's no reason to force a very short pulse. Allow a wide range for the usleep_range() so it can be relaxed about the choice of timing source. Signed-off-by: Richard Fitzgerald Signed-off-by: Stefan Binding Link: https://lore.kernel.org/r/20230913150012.604775-2-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index a0de0329406a..56d2857a4f01 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -2320,6 +2320,10 @@ int cs42l42_common_probe(struct cs42l42_private *cs42l42, if (cs42l42->reset_gpio) { dev_dbg(cs42l42->dev, "Found reset GPIO\n"); + + /* Ensure minimum reset pulse width */ + usleep_range(10, 500); + gpiod_set_value_cansleep(cs42l42->reset_gpio, 1); } usleep_range(CS42L42_BOOT_TIME_US, CS42L42_BOOT_TIME_US * 2); From a479b44ac0a0ac25cd48e5356200078924d78022 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 13 Sep 2023 16:00:11 +0100 Subject: [PATCH 140/333] ASoC: cs42l42: Don't rely on GPIOD_OUT_LOW to set RESET initially low The ACPI setting for a GPIO default state has higher priority than the flag passed to devm_gpiod_get_optional() so ACPI can override the GPIOD_OUT_LOW. Explicitly set the GPIO low when hard resetting. Although GPIOD_OUT_LOW can't be relied on this doesn't seem like a reason to stop passing it to devm_gpiod_get_optional(). So we still pass it to state our intent, but can deal with it having no effect. Signed-off-by: Richard Fitzgerald Signed-off-by: Stefan Binding Link: https://lore.kernel.org/r/20230913150012.604775-3-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 56d2857a4f01..dc93861ddfb0 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -2321,6 +2321,12 @@ int cs42l42_common_probe(struct cs42l42_private *cs42l42, if (cs42l42->reset_gpio) { dev_dbg(cs42l42->dev, "Found reset GPIO\n"); + /* + * ACPI can override the default GPIO state we requested + * so ensure that we start with RESET low. + */ + gpiod_set_value_cansleep(cs42l42->reset_gpio, 0); + /* Ensure minimum reset pulse width */ usleep_range(10, 500); From 2d066c6a78654c179f95c9beda1985d4c6befa4e Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 13 Sep 2023 16:00:12 +0100 Subject: [PATCH 141/333] ASoC: cs42l42: Avoid stale SoundWire ATTACH after hard reset In SoundWire mode leave hard RESET asserted when exiting probe, and wait for an UNATTACHED notification before deasserting RESET. If the boot state of the reset GPIO was deasserted it is possible that the SoundWire core had already enumerated the CS42L42 before cs42l42_sdw_probe() is called. When cs42l42_common_probe() hard resets the CS42L42 it triggers a race condition: 1) After cs42l42_sdw_probe() returns the thread that called it will call cs42l42_sdw_update_status() to report the last status recorded by the SoundWire core. 2) The SoundWire bus master will see a PING with the CS42L42 now reporting as unenumerated and will trigger the core SoundWire code to start enumerating CS42L42. These two threads are racing against each other. If (1) happens before (2) a stale ATTACHED notification will be reported to the cs42l42 driver when in fact the status of cs42l42 is now unattached. To avoid this race condition: - Leave RESET asserted on exit from cs42l42_sdw_probe(). This ensures that an UNATTACHED notification must be sent to the cs42l42 driver. If cs42l42 was already enumerated it will be seen to drop off the bus, causing an UNATTACH notification. If it was never enumerated the status is already UNATTACHED and this will be reported by thread (1). - When the UNATTACH notification is received, release RESET. This will cause CS42L42 to be enumerated and eventually report an ATTACHED notification. - The ATTACHED notification is now valid. Signed-off-by: Richard Fitzgerald Signed-off-by: Stefan Binding Link: https://lore.kernel.org/r/20230913150012.604775-4-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42-sdw.c | 20 ++++++++++++++++++++ sound/soc/codecs/cs42l42.c | 11 ++++++++++- sound/soc/codecs/cs42l42.h | 1 + 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l42-sdw.c b/sound/soc/codecs/cs42l42-sdw.c index eeab07c850f9..974bae4abfad 100644 --- a/sound/soc/codecs/cs42l42-sdw.c +++ b/sound/soc/codecs/cs42l42-sdw.c @@ -344,6 +344,16 @@ static int cs42l42_sdw_update_status(struct sdw_slave *peripheral, switch (status) { case SDW_SLAVE_ATTACHED: dev_dbg(cs42l42->dev, "ATTACHED\n"); + + /* + * The SoundWire core can report stale ATTACH notifications + * if we hard-reset CS42L42 in probe() but it had already been + * enumerated. Reject the ATTACH if we haven't yet seen an + * UNATTACH report for the device being in reset. + */ + if (cs42l42->sdw_waiting_first_unattach) + break; + /* * Initialise codec, this only needs to be done once. * When resuming from suspend, resume callback will handle re-init of codec, @@ -354,6 +364,16 @@ static int cs42l42_sdw_update_status(struct sdw_slave *peripheral, break; case SDW_SLAVE_UNATTACHED: dev_dbg(cs42l42->dev, "UNATTACHED\n"); + + if (cs42l42->sdw_waiting_first_unattach) { + /* + * SoundWire core has seen that CS42L42 is not on + * the bus so release RESET and wait for ATTACH. + */ + cs42l42->sdw_waiting_first_unattach = false; + gpiod_set_value_cansleep(cs42l42->reset_gpio, 1); + } + break; default: break; diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index dc93861ddfb0..2961340f15e2 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -2330,7 +2330,16 @@ int cs42l42_common_probe(struct cs42l42_private *cs42l42, /* Ensure minimum reset pulse width */ usleep_range(10, 500); - gpiod_set_value_cansleep(cs42l42->reset_gpio, 1); + /* + * On SoundWire keep the chip in reset until we get an UNATTACH + * notification from the SoundWire core. This acts as a + * synchronization point to reject stale ATTACH notifications + * if the chip was already enumerated before we reset it. + */ + if (cs42l42->sdw_peripheral) + cs42l42->sdw_waiting_first_unattach = true; + else + gpiod_set_value_cansleep(cs42l42->reset_gpio, 1); } usleep_range(CS42L42_BOOT_TIME_US, CS42L42_BOOT_TIME_US * 2); diff --git a/sound/soc/codecs/cs42l42.h b/sound/soc/codecs/cs42l42.h index 4bd7b85a5747..7785125b73ab 100644 --- a/sound/soc/codecs/cs42l42.h +++ b/sound/soc/codecs/cs42l42.h @@ -53,6 +53,7 @@ struct cs42l42_private { u8 stream_use; bool hp_adc_up_pending; bool suspended; + bool sdw_waiting_first_unattach; bool init_done; }; From e4e14095cc68a2efefba6f77d95efe1137e751d4 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 12 Sep 2023 23:28:25 +0900 Subject: [PATCH 142/333] ksmbd: remove unneeded mark_inode_dirty in set_info_sec() mark_inode_dirty will be called in notify_change(). This patch remove unneeded mark_inode_dirty in set_info_sec(). Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smbacl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index e5e438bf5499..6c0305be895e 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1420,7 +1420,6 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, out: posix_acl_release(fattr.cf_acls); posix_acl_release(fattr.cf_dacls); - mark_inode_dirty(inode); return rc; } From 59d8d24f4610333560cf2e8fe3f44cafe30322eb Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 12 Sep 2023 23:29:10 +0900 Subject: [PATCH 143/333] ksmbd: fix passing freed memory 'aux_payload_buf' The patch e2b76ab8b5c9: "ksmbd: add support for read compound" leads to the following Smatch static checker warning: fs/smb/server/smb2pdu.c:6329 smb2_read() warn: passing freed memory 'aux_payload_buf' It doesn't matter that we're passing a freed variable because nbytes is zero. This patch set "aux_payload_buf = NULL" to make smatch silence. Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 749660110878..544022dd6d20 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6312,7 +6312,7 @@ int smb2_read(struct ksmbd_work *work) aux_payload_buf, nbytes); kvfree(aux_payload_buf); - + aux_payload_buf = NULL; nbytes = 0; if (remain_bytes < 0) { err = (int)remain_bytes; From 7c6339322ce0c6128acbe36aacc1eeb986dd7bf1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:34:38 -0400 Subject: [PATCH 144/333] NFS: Fix O_DIRECT locking issues The dreq fields are protected by the dreq->lock. Fixes: 954998b60caa ("NFS: Fix error handling for O_DIRECT write scheduling") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index ee88f0a6e7b8..e8a1645857dd 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -553,7 +553,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) /* Bump the transmission count */ req->wb_nio++; if (!nfs_pageio_add_request(&desc, req)) { - spin_lock(&cinfo.inode->i_lock); + spin_lock(&dreq->lock); if (dreq->error < 0) { desc.pg_error = dreq->error; } else if (desc.pg_error != -EAGAIN) { @@ -563,7 +563,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) dreq->error = desc.pg_error; } else dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - spin_unlock(&cinfo.inode->i_lock); + spin_unlock(&dreq->lock); break; } nfs_release_request(req); @@ -871,9 +871,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, /* If the error is soft, defer remaining requests */ nfs_init_cinfo_from_dreq(&cinfo, dreq); - spin_lock(&cinfo.inode->i_lock); + spin_lock(&dreq->lock); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - spin_unlock(&cinfo.inode->i_lock); + spin_unlock(&dreq->lock); nfs_unlock_request(req); nfs_mark_request_commit(req, NULL, &cinfo, 0); desc.pg_error = 0; From 8982f7aff39fb526aba4441fff2525fcedd5e1a3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:34:39 -0400 Subject: [PATCH 145/333] NFS: More O_DIRECT accounting fixes for error paths If we hit a fatal error when retransmitting, we do need to record the removal of the request from the count of written bytes. Fixes: 031d73ed768a ("NFS: Fix O_DIRECT accounting of number of bytes read/written") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e8a1645857dd..a53e50123499 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -93,12 +93,10 @@ nfs_direct_handle_truncated(struct nfs_direct_req *dreq, dreq->max_count = dreq_len; if (dreq->count > dreq_len) dreq->count = dreq_len; - - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) - dreq->error = hdr->error; - else /* Clear outstanding error if this is EOF */ - dreq->error = 0; } + + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && !dreq->error) + dreq->error = hdr->error; } static void @@ -120,6 +118,18 @@ nfs_direct_count_bytes(struct nfs_direct_req *dreq, dreq->count = dreq_len; } +static void nfs_direct_truncate_request(struct nfs_direct_req *dreq, + struct nfs_page *req) +{ + loff_t offs = req_offset(req); + size_t req_start = (size_t)(offs - dreq->io_start); + + if (req_start < dreq->max_count) + dreq->max_count = req_start; + if (req_start < dreq->count) + dreq->count = req_start; +} + /** * nfs_swap_rw - NFS address space operation for swap I/O * @iocb: target I/O control block @@ -537,10 +547,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) nfs_direct_join_group(&reqs, dreq->inode); - dreq->count = 0; - dreq->max_count = 0; - list_for_each_entry(req, &reqs, wb_list) - dreq->max_count += req->wb_bytes; nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); get_dreq(dreq); @@ -574,10 +580,14 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) req = nfs_list_entry(reqs.next); nfs_list_remove_request(req); nfs_unlock_and_release_request(req); - if (desc.pg_error == -EAGAIN) + if (desc.pg_error == -EAGAIN) { nfs_mark_request_commit(req, NULL, &cinfo, 0); - else + } else { + spin_lock(&dreq->lock); + nfs_direct_truncate_request(dreq, req); + spin_unlock(&dreq->lock); nfs_release_request(req); + } } if (put_dreq(dreq)) @@ -597,8 +607,6 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) if (status < 0) { /* Errors in commit are fatal */ dreq->error = status; - dreq->max_count = 0; - dreq->count = 0; dreq->flags = NFS_ODIRECT_DONE; } else { status = dreq->error; @@ -609,7 +617,12 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - if (status >= 0 && !nfs_write_match_verf(verf, req)) { + if (status < 0) { + spin_lock(&dreq->lock); + nfs_direct_truncate_request(dreq, req); + spin_unlock(&dreq->lock); + nfs_release_request(req); + } else if (!nfs_write_match_verf(verf, req)) { dreq->flags = NFS_ODIRECT_RESCHED_WRITES; /* * Despite the reboot, the write was successful, @@ -617,7 +630,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) */ req->wb_nio = 0; nfs_mark_request_commit(req, NULL, &cinfo, 0); - } else /* Error or match */ + } else nfs_release_request(req); nfs_unlock_and_release_request(req); } @@ -670,6 +683,7 @@ static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) while (!list_empty(&reqs)) { req = nfs_list_entry(reqs.next); nfs_list_remove_request(req); + nfs_direct_truncate_request(dreq, req); nfs_release_request(req); nfs_unlock_and_release_request(req); } @@ -719,7 +733,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) } nfs_direct_count_bytes(dreq, hdr); - if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags)) { + if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags) && + !test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { if (!dreq->flags) dreq->flags = NFS_ODIRECT_DO_COMMIT; flags = dreq->flags; From b193a78ddb5ee7dba074d3f28dc050069ba083c0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:34:40 -0400 Subject: [PATCH 146/333] NFS: Use the correct commit info in nfs_join_page_group() Ensure that nfs_clear_request_commit() updates the correct counters when it removes them from the commit list. Fixes: ed5d588fe47f ("NFS: Try to join page groups before an O_DIRECT retransmission") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 8 +++++--- fs/nfs/write.c | 23 ++++++++++++----------- include/linux/nfs_page.h | 4 +++- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index a53e50123499..3391c8b97da5 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -498,7 +498,9 @@ static void nfs_direct_add_page_head(struct list_head *list, kref_get(&head->wb_kref); } -static void nfs_direct_join_group(struct list_head *list, struct inode *inode) +static void nfs_direct_join_group(struct list_head *list, + struct nfs_commit_info *cinfo, + struct inode *inode) { struct nfs_page *req, *subreq; @@ -520,7 +522,7 @@ static void nfs_direct_join_group(struct list_head *list, struct inode *inode) nfs_release_request(subreq); } } while ((subreq = subreq->wb_this_page) != req); - nfs_join_page_group(req, inode); + nfs_join_page_group(req, cinfo, inode); } } @@ -545,7 +547,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) nfs_init_cinfo_from_dreq(&cinfo, dreq); nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); - nfs_direct_join_group(&reqs, dreq->inode); + nfs_direct_join_group(&reqs, &cinfo, dreq->inode); nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); get_dreq(dreq); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f4cca8f00c0c..8c1ee1a1a28f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -59,7 +59,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops; static const struct nfs_rw_ops nfs_rw_write_ops; static void nfs_inode_remove_request(struct nfs_page *req); -static void nfs_clear_request_commit(struct nfs_page *req); +static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, + struct nfs_page *req); static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, struct inode *inode); static struct nfs_page * @@ -502,8 +503,8 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, * the (former) group. All subrequests are removed from any write or commit * lists, unlinked from the group and destroyed. */ -void -nfs_join_page_group(struct nfs_page *head, struct inode *inode) +void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, + struct inode *inode) { struct nfs_page *subreq; struct nfs_page *destroy_list = NULL; @@ -533,7 +534,7 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode) * Commit list removal accounting is done after locks are dropped */ subreq = head; do { - nfs_clear_request_commit(subreq); + nfs_clear_request_commit(cinfo, subreq); subreq = subreq->wb_this_page; } while (subreq != head); @@ -566,8 +567,10 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) { struct inode *inode = folio_file_mapping(folio)->host; struct nfs_page *head; + struct nfs_commit_info cinfo; int ret; + nfs_init_cinfo_from_inode(&cinfo, inode); /* * A reference is taken only on the head request which acts as a * reference to the whole page group - the group will not be destroyed @@ -584,7 +587,7 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) return ERR_PTR(ret); } - nfs_join_page_group(head, inode); + nfs_join_page_group(head, &cinfo, inode); return head; } @@ -955,18 +958,16 @@ static void nfs_folio_clear_commit(struct folio *folio) } /* Called holding the request lock on @req */ -static void -nfs_clear_request_commit(struct nfs_page *req) +static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, + struct nfs_page *req) { if (test_bit(PG_CLEAN, &req->wb_flags)) { struct nfs_open_context *ctx = nfs_req_openctx(req); struct inode *inode = d_inode(ctx->dentry); - struct nfs_commit_info cinfo; - nfs_init_cinfo_from_inode(&cinfo, inode); mutex_lock(&NFS_I(inode)->commit_mutex); - if (!pnfs_clear_request_commit(req, &cinfo)) { - nfs_request_remove_commit_list(req, &cinfo); + if (!pnfs_clear_request_commit(req, cinfo)) { + nfs_request_remove_commit_list(req, cinfo); } mutex_unlock(&NFS_I(inode)->commit_mutex); nfs_folio_clear_commit(nfs_page_to_folio(req)); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index aa9f4c6ebe26..1c315f854ea8 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -157,7 +157,9 @@ extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); -extern void nfs_join_page_group(struct nfs_page *head, struct inode *inode); +extern void nfs_join_page_group(struct nfs_page *head, + struct nfs_commit_info *cinfo, + struct inode *inode); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); From b11243f720ee5f9376861099019c8542969b6318 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:34:41 -0400 Subject: [PATCH 147/333] NFS: More fixes for nfs_direct_write_reschedule_io() Ensure that all requests are put back onto the commit list so that they can be rescheduled. Fixes: 4daaeba93822 ("NFS: Fix nfs_direct_write_reschedule_io()") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 3391c8b97da5..f6c74f424691 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -780,18 +780,23 @@ static void nfs_write_sync_pgio_error(struct list_head *head, int error) static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; + struct nfs_page *req; + struct nfs_commit_info cinfo; trace_nfs_direct_write_reschedule_io(dreq); + nfs_init_cinfo_from_dreq(&cinfo, dreq); spin_lock(&dreq->lock); - if (dreq->error == 0) { + if (dreq->error == 0) dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - /* fake unstable write to let common nfs resend pages */ - hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = hdr->args.offset + hdr->args.count - - hdr->io_start; - } + set_bit(NFS_IOHDR_REDO, &hdr->flags); spin_unlock(&dreq->lock); + while (!list_empty(&hdr->pages)) { + req = nfs_list_entry(hdr->pages.next); + nfs_list_remove_request(req); + nfs_unlock_request(req); + nfs_mark_request_commit(req, NULL, &cinfo, 0); + } } static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { From dd7d7ee3ba2a70d12d02defb478790cf57d5b87b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:43:58 -0400 Subject: [PATCH 148/333] NFS/pNFS: Report EINVAL errors from connect() to the server With IPv6, connect() can occasionally return EINVAL if a route is unavailable. If this happens during I/O to a data server, we want to report it using LAYOUTERROR as an inability to connect. Fixes: dd52128afdde ("NFSv4.1/pnfs Ensure flexfiles reports all connection related errors") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 7deb3cd76abe..a1dc33864906 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1235,6 +1235,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, case -EPFNOSUPPORT: case -EPROTONOSUPPORT: case -EOPNOTSUPP: + case -EINVAL: case -ECONNREFUSED: case -ECONNRESET: case -EHOSTDOWN: From 611fa42dfa9d2f3918ac5f4dd5705dfad81b323d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:50:09 -0400 Subject: [PATCH 149/333] SUNRPC: Mark the cred for revalidation if the server rejects it If the server rejects the credential as being stale, or bad, then we should mark it for revalidation before retransmitting. Fixes: 7f5667a5f8c4 ("SUNRPC: Clean up rpc_verify_header()") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8d75290f1a31..5c37621aa09a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2751,6 +2751,7 @@ out_msg_denied: case rpc_autherr_rejectedverf: case rpcsec_gsserr_credproblem: case rpcsec_gsserr_ctxproblem: + rpcauth_invalcred(task); if (!task->tk_cred_retry) break; task->tk_cred_retry--; From e86fcf0820d914389b46658a5a7e8969c3af2d53 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Sep 2023 21:03:28 -0400 Subject: [PATCH 150/333] Revert "SUNRPC: Fail faster on bad verifier" This reverts commit 0701214cd6e66585a999b132eb72ae0489beb724. The premise of this commit was incorrect. There are exactly 2 cases where rpcauth_checkverf() will return an error: 1) If there was an XDR decode problem (i.e. garbage data). 2) If gss_validate() had a problem verifying the RPCSEC_GSS MIC. In the second case, there are again 2 subcases: a) The GSS context expires, in which case gss_validate() will force a new context negotiation on retry by invalidating the cred. b) The sequence number check failed because an RPC call timed out, and the client retransmitted the request using a new sequence number, as required by RFC2203. In neither subcase is this a fatal error. Reported-by: Russell Cattelan Fixes: 0701214cd6e6 ("SUNRPC: Fail faster on bad verifier") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5c37621aa09a..edbcfdd84e1f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2725,7 +2725,7 @@ out_unparsable: out_verifier: trace_rpc_bad_verifier(task); - goto out_err; + goto out_garbage; out_msg_denied: error = -EACCES; From 806a3bc421a115fbb287c1efce63a48c54ee804b Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 30 Aug 2023 15:29:34 -0400 Subject: [PATCH 151/333] NFSv4.1: fix pnfs MDS=DS session trunking Currently, when GETDEVICEINFO returns multiple locations where each is a different IP but the server's identity is same as MDS, then nfs4_set_ds_client() finds the existing nfs_client structure which has the MDS's max_connect value (and if it's 1), then the 1st IP on the DS's list will get dropped due to MDS trunking rules. Other IPs would be added as they fall under the pnfs trunking rules. For the list of IPs the 1st goes thru calling nfs4_set_ds_client() which will eventually call nfs4_add_trunk() and call into rpc_clnt_test_and_add_xprt() which has the check for MDS trunking. The other IPs (after the 1st one), would call rpc_clnt_add_xprt() which doesn't go thru that check. nfs4_add_trunk() is called when MDS trunking is happening and it needs to enforce the usage of max_connect mount option of the 1st mount. However, this shouldn't be applied to pnfs flow. Instead, this patch proposed to treat MDS=DS as DS trunking and make sure that MDS's max_connect limit does not apply to the 1st IP returned in the GETDEVICEINFO list. It does so by marking the newly created client with a new flag NFS_CS_PNFS which then used to pass max_connect value to use into the rpc_clnt_test_and_add_xprt() instead of the existing rpc client's max_connect value set by the MDS connection. For example, mount was done without max_connect value set so MDS's rpc client has cl_max_connect=1. Upon calling into rpc_clnt_test_and_add_xprt() and using rpc client's value, the caller passes in max_connect value which is previously been set in the pnfs path (as a part of handling GETDEVICEINFO list of IPs) in nfs4_set_ds_client(). However, when NFS_CS_PNFS flag is not set and we know we are doing MDS trunking, comparing a new IP of the same server, we then set the max_connect value to the existing MDS's value and pass that into rpc_clnt_test_and_add_xprt(). Fixes: dc48e0abee24 ("SUNRPC enforce creation of no more than max_connect xprts") Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 6 +++++- include/linux/nfs_fs_sb.h | 1 + net/sunrpc/clnt.c | 11 +++++++---- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 27fb25567ce7..11e3a285594c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -417,6 +417,8 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old) .net = old->cl_net, .servername = old->cl_hostname, }; + int max_connect = test_bit(NFS_CS_PNFS, &clp->cl_flags) ? + clp->cl_max_connect : old->cl_max_connect; if (clp->cl_proto != old->cl_proto) return; @@ -430,7 +432,7 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old) xprt_args.addrlen = clp_salen; rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args, - rpc_clnt_test_and_add_xprt, NULL); + rpc_clnt_test_and_add_xprt, &max_connect); } /** @@ -1010,6 +1012,8 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); __set_bit(NFS_CS_DS, &cl_init.init_flags); + __set_bit(NFS_CS_PNFS, &cl_init.init_flags); + cl_init.max_connect = NFS_MAX_TRANSPORTS; /* * Set an authflavor equual to the MDS value. Use the MDS nfs_client * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 20eeba8b009d..cd628c4b011e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -48,6 +48,7 @@ struct nfs_client { #define NFS_CS_NOPING 6 /* - don't ping on connect */ #define NFS_CS_DS 7 /* - Server is a DS */ #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ +#define NFS_CS_PNFS 9 /* - Server used for pnfs */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index edbcfdd84e1f..37b0b212b934 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2908,19 +2908,22 @@ static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = { * @clnt: pointer to struct rpc_clnt * @xps: pointer to struct rpc_xprt_switch, * @xprt: pointer struct rpc_xprt - * @dummy: unused + * @in_max_connect: pointer to the max_connect value for the passed in xprt transport */ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt_switch *xps, struct rpc_xprt *xprt, - void *dummy) + void *in_max_connect) { struct rpc_cb_add_xprt_calldata *data; struct rpc_task *task; + int max_connect = clnt->cl_max_connect; - if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) { + if (in_max_connect) + max_connect = *(int *)in_max_connect; + if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) { rcu_read_lock(); pr_warn("SUNRPC: reached max allowed number (%d) did not add " - "transport to server: %s\n", clnt->cl_max_connect, + "transport to server: %s\n", max_connect, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); rcu_read_unlock(); return -EINVAL; From 06ed09351b67eb1114ae106a87a0ee3ea9adb3db Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 14 Aug 2023 18:52:08 +0100 Subject: [PATCH 152/333] btrfs: convert btrfs_read_merkle_tree_page() to use a folio Remove a number of hidden calls to compound_head() by using a folio throughout. Also follow core kernel coding style by adding the folio to the page cache immediately after allocation instead of doing the read first, then adding it to the page cache. This ordering makes subsequent readers block waiting for the first reader instead of duplicating the work only to throw it away when they find out they lost the race. Reviewed-by: Boris Burkov Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: David Sterba --- fs/btrfs/verity.c | 64 +++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c index c5ff16f9e9fa..744f4f4d4c68 100644 --- a/fs/btrfs/verity.c +++ b/fs/btrfs/verity.c @@ -715,7 +715,7 @@ static struct page *btrfs_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { - struct page *page; + struct folio *folio; u64 off = (u64)index << PAGE_SHIFT; loff_t merkle_pos = merkle_file_pos(inode); int ret; @@ -726,29 +726,36 @@ static struct page *btrfs_read_merkle_tree_page(struct inode *inode, return ERR_PTR(-EFBIG); index += merkle_pos >> PAGE_SHIFT; again: - page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); - if (page) { - if (PageUptodate(page)) - return page; + folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0); + if (!IS_ERR(folio)) { + if (folio_test_uptodate(folio)) + goto out; - lock_page(page); - /* - * We only insert uptodate pages, so !Uptodate has to be - * an error - */ - if (!PageUptodate(page)) { - unlock_page(page); - put_page(page); + folio_lock(folio); + /* If it's not uptodate after we have the lock, we got a read error. */ + if (!folio_test_uptodate(folio)) { + folio_unlock(folio); + folio_put(folio); return ERR_PTR(-EIO); } - unlock_page(page); - return page; + folio_unlock(folio); + goto out; } - page = __page_cache_alloc(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); - if (!page) + folio = filemap_alloc_folio(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS), + 0); + if (!folio) return ERR_PTR(-ENOMEM); + ret = filemap_add_folio(inode->i_mapping, folio, index, GFP_NOFS); + if (ret) { + folio_put(folio); + /* Did someone else insert a folio here? */ + if (ret == -EEXIST) + goto again; + return ERR_PTR(ret); + } + /* * Merkle item keys are indexed from byte 0 in the merkle tree. * They have the form: @@ -756,28 +763,19 @@ again: * [ inode objectid, BTRFS_MERKLE_ITEM_KEY, offset in bytes ] */ ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY, off, - page_address(page), PAGE_SIZE, page); + folio_address(folio), PAGE_SIZE, &folio->page); if (ret < 0) { - put_page(page); + folio_put(folio); return ERR_PTR(ret); } if (ret < PAGE_SIZE) - memzero_page(page, ret, PAGE_SIZE - ret); + folio_zero_segment(folio, ret, PAGE_SIZE); - SetPageUptodate(page); - ret = add_to_page_cache_lru(page, inode->i_mapping, index, GFP_NOFS); + folio_mark_uptodate(folio); + folio_unlock(folio); - if (!ret) { - /* Inserted and ready for fsverity */ - unlock_page(page); - } else { - put_page(page); - /* Did someone race us into inserting this page? */ - if (ret == -EEXIST) - goto again; - page = ERR_PTR(ret); - } - return page; +out: + return folio_file_page(folio, index); } /* From 9af86694fd5d387992699ec99007ed374966ce9a Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Wed, 6 Sep 2023 17:59:03 +0200 Subject: [PATCH 153/333] btrfs: file_remove_privs needs an exclusive lock in direct io write This was noticed by Miklos that file_remove_privs might call into notify_change(), which requires to hold an exclusive lock. The problem exists in FUSE and btrfs. We can fix it without any additional helpers from VFS, in case the privileges would need to be dropped, change the lock type to be exclusive and redo the loop. Fixes: e9adabb9712e ("btrfs: use shared lock for direct writes within EOF") CC: Miklos Szeredi CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Christoph Hellwig Signed-off-by: Bernd Schubert Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6edad7b9a5d3..e8726a83b649 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1466,8 +1466,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; - /* If the write DIO is within EOF, use a shared lock */ - if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode)) + /* + * If the write DIO is within EOF, use a shared lock and also only if + * security bits will likely not be dropped by file_remove_privs() called + * from btrfs_write_check(). Either will need to be rechecked after the + * lock was acquired. + */ + if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode) && IS_NOSEC(inode)) ilock_flags |= BTRFS_ILOCK_SHARED; relock: @@ -1475,6 +1480,13 @@ relock: if (err < 0) return err; + /* Shared lock cannot be used with security bits set. */ + if ((ilock_flags & BTRFS_ILOCK_SHARED) && !IS_NOSEC(inode)) { + btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); + ilock_flags &= ~BTRFS_ILOCK_SHARED; + goto relock; + } + err = generic_write_checks(iocb, from); if (err <= 0) { btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); From b595d25996329427b2c09d4b90395a165fb3ef8e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 8 Sep 2023 15:31:39 -0400 Subject: [PATCH 154/333] btrfs: don't clear uptodate on write errors We have been consistently seeing hangs with generic/648 in our subpage GitHub CI setup. This is a classic deadlock, we are calling btrfs_read_folio() on a folio, which requires holding the folio lock on the folio, and then finding a ordered extent that overlaps that range and calling btrfs_start_ordered_extent(), which then tries to write out the dirty page, which requires taking the folio lock and then we deadlock. The hang happens because we're writing to range [1271750656, 1271767040), page index [77621, 77622], and page 77621 is !Uptodate. It is also Dirty, so we call btrfs_read_folio() for 77621 and which does btrfs_lock_and_flush_ordered_range() for that range, and we find an ordered extent which is [1271644160, 1271746560), page index [77615, 77621]. The page indexes overlap, but the actual bytes don't overlap. We're holding the page lock for 77621, then call btrfs_lock_and_flush_ordered_range() which tries to flush the dirty page, and tries to lock 77621 again and then we deadlock. The byte ranges do not overlap, but with subpage support if we clear uptodate on any portion of the page we mark the entire thing as not uptodate. We have been clearing page uptodate on write errors, but no other file system does this, and is in fact incorrect. This doesn't hurt us in the !subpage case because we can't end up with overlapped ranges that don't also overlap on the page. Fix this by not clearing uptodate when we have a write error. The only thing we should be doing in this case is setting the mapping error and carrying on. This makes it so we would no longer call btrfs_read_folio() on the page as it's uptodate and eliminates the deadlock. With this patch we're now able to make it through a full fstests run on our subpage blocksize VMs. Note for stable backports: this probably goes beyond 6.1 but the code has been cleaned up and clearing the uptodate bit must be verified on each version independently. CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 9 +-------- fs/btrfs/inode.c | 4 ---- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ac3fca5a5e41..6954ae763b86 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -484,10 +484,8 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio) bvec->bv_offset, bvec->bv_len); btrfs_finish_ordered_extent(bbio->ordered, page, start, len, !error); - if (error) { - btrfs_page_clear_uptodate(fs_info, page, start, len); + if (error) mapping_set_error(page->mapping, error); - } btrfs_page_clear_writeback(fs_info, page, start, len); } @@ -1456,8 +1454,6 @@ done: if (ret) { btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(btrfs_sb(inode->i_sb), page, - page_start, PAGE_SIZE); mapping_set_error(page->mapping, ret); } unlock_page(page); @@ -1624,8 +1620,6 @@ static void extent_buffer_write_end_io(struct btrfs_bio *bbio) struct page *page = bvec->bv_page; u32 len = bvec->bv_len; - if (!uptodate) - btrfs_page_clear_uptodate(fs_info, page, start, len); btrfs_page_clear_writeback(fs_info, page, start, len); bio_offset += len; } @@ -2201,7 +2195,6 @@ void extent_write_locked_range(struct inode *inode, struct page *locked_page, if (ret) { btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, cur, cur_len, !ret); - btrfs_page_clear_uptodate(fs_info, page, cur, cur_len); mapping_set_error(page->mapping, ret); } btrfs_page_unlock_writer(fs_info, page, cur, cur_len); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e211e88c6545..616fdcf40467 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1085,9 +1085,6 @@ static void submit_uncompressed_range(struct btrfs_inode *inode, btrfs_mark_ordered_io_finished(inode, locked_page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(inode->root->fs_info, - locked_page, page_start, - PAGE_SIZE); mapping_set_error(locked_page->mapping, ret); unlock_page(locked_page); } @@ -2791,7 +2788,6 @@ out_page: mapping_set_error(page->mapping, ret); btrfs_mark_ordered_io_finished(inode, page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(fs_info, page, page_start, PAGE_SIZE); clear_page_dirty_for_io(page); } btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE); From 69343ce91435f222052015c5af86b550391bac85 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 13 Sep 2023 17:05:23 +0100 Subject: [PATCH 155/333] firmware: cirrus: cs_dsp: Only log list of algorithms in debug build Change the logging of each algorithm from info level to debug level. On the original devices supported by this code there were typically only one or two algorithms in a firmware and one or two DSPs so this logging only used a small number of log lines. However, for the latest devices there could be 30-40 algorithms in a firmware and 8 DSPs being loaded in parallel, so using 300+ lines of log for information that isn't particularly important to have logged. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230913160523.3701189-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/firmware/cirrus/cs_dsp.c | 34 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index 49b70c70dc69..79d4254d1f9b 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c @@ -1863,15 +1863,15 @@ static int cs_dsp_adsp2_setup_algs(struct cs_dsp *dsp) return PTR_ERR(adsp2_alg); for (i = 0; i < n_algs; i++) { - cs_dsp_info(dsp, - "%d: ID %x v%d.%d.%d XM@%x YM@%x ZM@%x\n", - i, be32_to_cpu(adsp2_alg[i].alg.id), - (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff0000) >> 16, - (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff00) >> 8, - be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff, - be32_to_cpu(adsp2_alg[i].xm), - be32_to_cpu(adsp2_alg[i].ym), - be32_to_cpu(adsp2_alg[i].zm)); + cs_dsp_dbg(dsp, + "%d: ID %x v%d.%d.%d XM@%x YM@%x ZM@%x\n", + i, be32_to_cpu(adsp2_alg[i].alg.id), + (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff0000) >> 16, + (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff00) >> 8, + be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff, + be32_to_cpu(adsp2_alg[i].xm), + be32_to_cpu(adsp2_alg[i].ym), + be32_to_cpu(adsp2_alg[i].zm)); alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_XM, adsp2_alg[i].alg.id, @@ -1996,14 +1996,14 @@ static int cs_dsp_halo_setup_algs(struct cs_dsp *dsp) return PTR_ERR(halo_alg); for (i = 0; i < n_algs; i++) { - cs_dsp_info(dsp, - "%d: ID %x v%d.%d.%d XM@%x YM@%x\n", - i, be32_to_cpu(halo_alg[i].alg.id), - (be32_to_cpu(halo_alg[i].alg.ver) & 0xff0000) >> 16, - (be32_to_cpu(halo_alg[i].alg.ver) & 0xff00) >> 8, - be32_to_cpu(halo_alg[i].alg.ver) & 0xff, - be32_to_cpu(halo_alg[i].xm_base), - be32_to_cpu(halo_alg[i].ym_base)); + cs_dsp_dbg(dsp, + "%d: ID %x v%d.%d.%d XM@%x YM@%x\n", + i, be32_to_cpu(halo_alg[i].alg.id), + (be32_to_cpu(halo_alg[i].alg.ver) & 0xff0000) >> 16, + (be32_to_cpu(halo_alg[i].alg.ver) & 0xff00) >> 8, + be32_to_cpu(halo_alg[i].alg.ver) & 0xff, + be32_to_cpu(halo_alg[i].xm_base), + be32_to_cpu(halo_alg[i].ym_base)); ret = cs_dsp_halo_create_regions(dsp, halo_alg[i].alg.id, halo_alg[i].alg.ver, From 781118bc2fc1026c8285f83ea7ecab07071a09c4 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 13 Sep 2023 17:02:50 +0100 Subject: [PATCH 156/333] ASoC: wm_adsp: Fix missing locking in wm_adsp_[read|write]_ctl() wm_adsp_read_ctl() and wm_adsp_write_ctl() must hold the cs_dsp pwr_lock mutex when calling cs_dsp_coeff_read_ctrl() and cs_dsp_coeff_write_ctrl(). Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230913160250.3700346-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 6fc34f41b175..d1b9238d391e 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -687,7 +687,10 @@ int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type, struct wm_coeff_ctl *ctl; int ret; + mutex_lock(&dsp->cs_dsp.pwr_lock); ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, buf, len); + mutex_unlock(&dsp->cs_dsp.pwr_lock); + if (ret < 0) return ret; @@ -703,8 +706,14 @@ EXPORT_SYMBOL_GPL(wm_adsp_write_ctl); int wm_adsp_read_ctl(struct wm_adsp *dsp, const char *name, int type, unsigned int alg, void *buf, size_t len) { - return cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg), - 0, buf, len); + int ret; + + mutex_lock(&dsp->cs_dsp.pwr_lock); + ret = cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg), + 0, buf, len); + mutex_unlock(&dsp->cs_dsp.pwr_lock); + + return ret; } EXPORT_SYMBOL_GPL(wm_adsp_read_ctl); From 8a19edd4fa6f5b22d5a35bb7c8bb3e7c571a74d4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Sep 2023 13:47:11 +0200 Subject: [PATCH 157/333] selftests/bpf: Fix kprobe_multi_test/attach_override test We need to deny the attach_override test for arm64, denying the whole kprobe_multi_test suite. Also making attach_override static. Fixes: 7182e56411b9 ("selftests/bpf: Add kprobe_multi override test") Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230913114711.499829-1-jolsa@kernel.org --- tools/testing/selftests/bpf/DENYLIST.aarch64 | 10 ++-------- .../selftests/bpf/prog_tests/kprobe_multi_test.c | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 7f768d335698..3babaf3eee5c 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,14 +1,8 @@ bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 fexit_sleep # The test never returns. The remaining tests cannot start. -kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/attach_api_addrs # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/attach_api_pattern # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/attach_api_syms # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/link_api_addrs # link_fd unexpected link_fd: actual -95 < expected 0 -kprobe_multi_test/link_api_syms # link_fd unexpected link_fd: actual -95 < expected 0 -kprobe_multi_test/skel_api # libbpf: failed to load BPF skeleton 'kprobe_multi': -3 +kprobe_multi_bench_attach # needs CONFIG_FPROBE +kprobe_multi_test # needs CONFIG_FPROBE module_attach # prog 'kprobe_multi': failed to auto-attach: -95 fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524 fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524 diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index e05477b210a5..4041cfa670eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -454,7 +454,7 @@ cleanup: } } -void test_attach_override(void) +static void test_attach_override(void) { struct kprobe_multi_override *skel = NULL; struct bpf_link *link = NULL; From 4908d5af16676b9d2901830551c2af911e452524 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 12 Sep 2023 10:56:07 +0200 Subject: [PATCH 158/333] netfilter: conntrack: fix extension size table The size table is incorrect due to copypaste error, this reserves more size than needed. TSTAMP reserved 32 instead of 16 bytes. TIMEOUT reserved 16 instead of 8 bytes. Fixes: 5f31edc0676b ("netfilter: conntrack: move extension sizes into core") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_extend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 0b513f7bf9f3..dd62cc12e775 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -40,10 +40,10 @@ static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = { [NF_CT_EXT_ECACHE] = sizeof(struct nf_conntrack_ecache), #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP - [NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_acct), + [NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_tstamp), #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT - [NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_tstamp), + [NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_timeout), #endif #ifdef CONFIG_NF_CONNTRACK_LABELS [NF_CT_EXT_LABELS] = sizeof(struct nf_conn_labels), From 7fb818f248cff996180b7cdcdcb86b6b4f6e44e2 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 13 Sep 2023 15:51:36 +0200 Subject: [PATCH 159/333] netfilter: nf_tables: Fix entries val in rule reset audit log The value in idx and the number of rules handled in that particular __nf_tables_dump_rules() call is not identical. The former is a cursor to pick up from if multiple netlink messages are needed, so its value is ever increasing. Fixing this is not just a matter of subtracting s_idx from it, though: When resetting rules in multiple chains, __nf_tables_dump_rules() is called for each and cb->args[0] is not adjusted in between. Introduce a dedicated counter to record the number of rules reset in this call in a less confusing way. While being at it, prevent the direct return upon buffer exhaustion: Any rules previously dumped into that skb would evade audit logging otherwise. Fixes: 9b5ba5c9c5109 ("netfilter: nf_tables: Unbreak audit log reset") Signed-off-by: Phil Sutter Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c1e485aee763..d819b4d42962 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3451,6 +3451,8 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, struct net *net = sock_net(skb->sk); const struct nft_rule *rule, *prule; unsigned int s_idx = cb->args[0]; + unsigned int entries = 0; + int ret = 0; u64 handle; prule = NULL; @@ -3473,9 +3475,11 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, NFT_MSG_NEWRULE, NLM_F_MULTI | NLM_F_APPEND, table->family, - table, chain, rule, handle, reset) < 0) - return 1; - + table, chain, rule, handle, reset) < 0) { + ret = 1; + break; + } + entries++; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: prule = rule; @@ -3483,10 +3487,10 @@ cont_skip: (*idx)++; } - if (reset && *idx) - audit_log_rule_reset(table, cb->seq, *idx); + if (reset && entries) + audit_log_rule_reset(table, cb->seq, entries); - return 0; + return ret; } static int nf_tables_dump_rules(struct sk_buff *skb, From e8dbde59ca3fe925d0105bfb380e8429928b16dd Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 13 Sep 2023 15:51:37 +0200 Subject: [PATCH 160/333] selftests: netfilter: Test nf_tables audit logging Compare NETFILTER_CFG type audit logs emitted from kernel upon ruleset modifications against expected output. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/.gitignore | 1 + tools/testing/selftests/netfilter/Makefile | 4 +- .../selftests/netfilter/audit_logread.c | 165 ++++++++++++++++++ tools/testing/selftests/netfilter/config | 1 + .../testing/selftests/netfilter/nft_audit.sh | 108 ++++++++++++ 5 files changed, 277 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/netfilter/audit_logread.c create mode 100755 tools/testing/selftests/netfilter/nft_audit.sh diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore index 4cb887b57413..4b2928e1c19d 100644 --- a/tools/testing/selftests/netfilter/.gitignore +++ b/tools/testing/selftests/netfilter/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only nf-queue connect_close +audit_logread diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 3686bfa6c58d..321db8850da0 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -6,13 +6,13 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ - conntrack_vrf.sh nft_synproxy.sh rpath.sh + conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh HOSTPKG_CONFIG := pkg-config CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) -TEST_GEN_FILES = nf-queue connect_close +TEST_GEN_FILES = nf-queue connect_close audit_logread include ../lib.mk diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/netfilter/audit_logread.c new file mode 100644 index 000000000000..a0a880fc2d9d --- /dev/null +++ b/tools/testing/selftests/netfilter/audit_logread.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int fd; + +#define MAX_AUDIT_MESSAGE_LENGTH 8970 +struct audit_message { + struct nlmsghdr nlh; + union { + struct audit_status s; + char data[MAX_AUDIT_MESSAGE_LENGTH]; + } u; +}; + +int audit_recv(int fd, struct audit_message *rep) +{ + struct sockaddr_nl addr; + socklen_t addrlen = sizeof(addr); + int ret; + + do { + ret = recvfrom(fd, rep, sizeof(*rep), 0, + (struct sockaddr *)&addr, &addrlen); + } while (ret < 0 && errno == EINTR); + + if (ret < 0 || + addrlen != sizeof(addr) || + addr.nl_pid != 0 || + rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */ + return -1; + + return ret; +} + +int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val) +{ + static int seq = 0; + struct audit_message msg = { + .nlh = { + .nlmsg_len = NLMSG_SPACE(sizeof(msg.u.s)), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + .nlmsg_seq = ++seq, + }, + .u.s = { + .mask = key, + .enabled = key == AUDIT_STATUS_ENABLED ? val : 0, + .pid = key == AUDIT_STATUS_PID ? val : 0, + } + }; + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + }; + int ret; + + do { + ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0, + (struct sockaddr *)&addr, sizeof(addr)); + } while (ret < 0 && errno == EINTR); + + if (ret != (int)msg.nlh.nlmsg_len) + return -1; + return 0; +} + +int audit_set(int fd, uint32_t key, uint32_t val) +{ + struct audit_message rep = { 0 }; + int ret; + + ret = audit_send(fd, AUDIT_SET, key, val); + if (ret) + return ret; + + ret = audit_recv(fd, &rep); + if (ret < 0) + return ret; + return 0; +} + +int readlog(int fd) +{ + struct audit_message rep = { 0 }; + int ret = audit_recv(fd, &rep); + const char *sep = ""; + char *k, *v; + + if (ret < 0) + return ret; + + if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG) + return 0; + + /* skip the initial "audit(...): " part */ + strtok(rep.u.data, " "); + + while ((k = strtok(NULL, "="))) { + v = strtok(NULL, " "); + + /* these vary and/or are uninteresting, ignore */ + if (!strcmp(k, "pid") || + !strcmp(k, "comm") || + !strcmp(k, "subj")) + continue; + + /* strip the varying sequence number */ + if (!strcmp(k, "table")) + *strchrnul(v, ':') = '\0'; + + printf("%s%s=%s", sep, k, v); + sep = " "; + } + if (*sep) { + printf("\n"); + fflush(stdout); + } + return 0; +} + +void cleanup(int sig) +{ + audit_set(fd, AUDIT_STATUS_ENABLED, 0); + close(fd); + if (sig) + exit(0); +} + +int main(int argc, char **argv) +{ + struct sigaction act = { + .sa_handler = cleanup, + }; + + fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT); + if (fd < 0) { + perror("Can't open netlink socket"); + return -1; + } + + if (sigaction(SIGTERM, &act, NULL) < 0 || + sigaction(SIGINT, &act, NULL) < 0) { + perror("Can't set signal handler"); + close(fd); + return -1; + } + + audit_set(fd, AUDIT_STATUS_ENABLED, 1); + audit_set(fd, AUDIT_STATUS_PID, getpid()); + + while (1) + readlog(fd); +} diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 4faf2ce021d9..7c42b1b2c69b 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -6,3 +6,4 @@ CONFIG_NFT_REDIR=m CONFIG_NFT_MASQ=m CONFIG_NFT_FLOW_OFFLOAD=m CONFIG_NF_CT_NETLINK=m +CONFIG_AUDIT=y diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh new file mode 100755 index 000000000000..83c271b1c735 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_audit.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that audit logs generated for nft commands are as expected. + +SKIP_RC=4 +RC=0 + +nft --version >/dev/null 2>&1 || { + echo "SKIP: missing nft tool" + exit $SKIP_RC +} + +logfile=$(mktemp) +echo "logging into $logfile" +./audit_logread >"$logfile" & +logread_pid=$! +trap 'kill $logread_pid; rm -f $logfile' EXIT +exec 3<"$logfile" + +do_test() { # (cmd, log) + echo -n "testing for cmd: $1 ... " + cat <&3 >/dev/null + $1 >/dev/null || exit 1 + sleep 0.1 + res=$(diff -a -u <(echo "$2") - <&3) + [ $? -eq 0 ] && { echo "OK"; return; } + echo "FAIL" + echo "$res" + ((RC++)) +} + +nft flush ruleset + +for table in t1 t2; do + do_test "nft add table $table" \ + "table=$table family=2 entries=1 op=nft_register_table" + + do_test "nft add chain $table c1" \ + "table=$table family=2 entries=1 op=nft_register_chain" + + do_test "nft add chain $table c2; add chain $table c3" \ + "table=$table family=2 entries=2 op=nft_register_chain" + + cmd="add rule $table c1 counter" + + do_test "nft $cmd" \ + "table=$table family=2 entries=1 op=nft_register_rule" + + do_test "nft $cmd; $cmd" \ + "table=$table family=2 entries=2 op=nft_register_rule" + + cmd="" + sep="" + for chain in c2 c3; do + for i in {1..3}; do + cmd+="$sep add rule $table $chain counter" + sep=";" + done + done + do_test "nft $cmd" \ + "table=$table family=2 entries=6 op=nft_register_rule" +done + +do_test 'nft reset rules t1 c2' \ +'table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules table t1' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule' + +for ((i = 0; i < 500; i++)); do + echo "add rule t2 c3 counter accept comment \"rule $i\"" +done | do_test 'nft -f -' \ +'table=t2 family=2 entries=500 op=nft_register_rule' + +do_test 'nft reset rules t2 c3' \ +'table=t2 family=2 entries=189 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=126 op=nft_reset_rule' + +do_test 'nft reset rules t2' \ +'table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=186 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=129 op=nft_reset_rule' + +do_test 'nft reset rules' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=180 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=135 op=nft_reset_rule' + +exit $RC From 71996bb835aed58c7ec4967be1d05190a27339ec Mon Sep 17 00:00:00 2001 From: Michal Grzedzicki Date: Wed, 13 Sep 2023 08:56:10 -0700 Subject: [PATCH 161/333] scsi: pm80xx: Use phy-specific SAS address when sending PHY_START command Some cards have more than one SAS address. Using an incorrect address causes communication issues with some devices like expanders. Closes: https://lore.kernel.org/linux-kernel/A57AEA84-5CA0-403E-8053-106033C73C70@fb.com/ Signed-off-by: Michal Grzedzicki Link: https://lore.kernel.org/r/20230913155611.3183612-1-mge@meta.com Acked-by: Jack Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_hwi.c | 2 +- drivers/scsi/pm8001/pm80xx_hwi.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 33053db5a713..90069c7b1642 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -4180,7 +4180,7 @@ pm8001_chip_phy_start_req(struct pm8001_hba_info *pm8001_ha, u8 phy_id) payload.sas_identify.dev_type = SAS_END_DEVICE; payload.sas_identify.initiator_bits = SAS_PROTOCOL_ALL; memcpy(payload.sas_identify.sas_addr, - pm8001_ha->sas_addr, SAS_ADDR_SIZE); + &pm8001_ha->phy[phy_id].dev_sas_addr, SAS_ADDR_SIZE); payload.sas_identify.phy_id = phy_id; return pm8001_mpi_build_cmd(pm8001_ha, 0, opcode, &payload, diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index f6857632dc7c..1b2c40b1381c 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -4671,7 +4671,7 @@ pm80xx_chip_phy_start_req(struct pm8001_hba_info *pm8001_ha, u8 phy_id) payload.sas_identify.dev_type = SAS_END_DEVICE; payload.sas_identify.initiator_bits = SAS_PROTOCOL_ALL; memcpy(payload.sas_identify.sas_addr, - &pm8001_ha->sas_addr, SAS_ADDR_SIZE); + &pm8001_ha->phy[phy_id].dev_sas_addr, SAS_ADDR_SIZE); payload.sas_identify.phy_id = phy_id; return pm8001_mpi_build_cmd(pm8001_ha, 0, opcode, &payload, From c13e7331745852d0dd7c35eabbe181cbd5b01172 Mon Sep 17 00:00:00 2001 From: Michal Grzedzicki Date: Mon, 11 Sep 2023 10:03:40 -0700 Subject: [PATCH 162/333] scsi: pm80xx: Avoid leaking tags when processing OPC_INB_SET_CONTROLLER_CONFIG command Tags allocated for OPC_INB_SET_CONTROLLER_CONFIG command need to be freed when we receive the response. Signed-off-by: Michal Grzedzicki Link: https://lore.kernel.org/r/20230911170340.699533-2-mge@meta.com Acked-by: Jack Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm80xx_hwi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 1b2c40b1381c..3afd9443c425 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -3671,10 +3671,12 @@ static int mpi_set_controller_config_resp(struct pm8001_hba_info *pm8001_ha, (struct set_ctrl_cfg_resp *)(piomb + 4); u32 status = le32_to_cpu(pPayload->status); u32 err_qlfr_pgcd = le32_to_cpu(pPayload->err_qlfr_pgcd); + u32 tag = le32_to_cpu(pPayload->tag); pm8001_dbg(pm8001_ha, MSG, "SET CONTROLLER RESP: status 0x%x qlfr_pgcd 0x%x\n", status, err_qlfr_pgcd); + pm8001_tag_free(pm8001_ha, tag); return 0; } From c91774818b041ed290df29fb1dc0725be9b12e83 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 Sep 2023 08:27:36 +0900 Subject: [PATCH 163/333] scsi: pm8001: Setup IRQs on resume The function pm8001_pci_resume() only calls pm8001_request_irq() without calling pm8001_setup_irq(). This causes the IRQ allocation to fail, which leads all drives being removed from the system. Fix this issue by integrating the code for pm8001_setup_irq() directly inside pm8001_request_irq() so that MSI-X setup is performed both during normal initialization and resume operations. Fixes: dbf9bfe61571 ("[SCSI] pm8001: add SAS/SATA HBA driver") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230911232745.325149-2-dlemoal@kernel.org Acked-by: Jack Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_init.c | 51 +++++++++++-------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index 5e5ce1e74c3b..443a3176c6c0 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -273,7 +273,6 @@ static irqreturn_t pm8001_interrupt_handler_intx(int irq, void *dev_id) return ret; } -static u32 pm8001_setup_irq(struct pm8001_hba_info *pm8001_ha); static u32 pm8001_request_irq(struct pm8001_hba_info *pm8001_ha); /** @@ -294,13 +293,6 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha, pm8001_dbg(pm8001_ha, INIT, "pm8001_alloc: PHY:%x\n", pm8001_ha->chip->n_phy); - /* Setup Interrupt */ - rc = pm8001_setup_irq(pm8001_ha); - if (rc) { - pm8001_dbg(pm8001_ha, FAIL, - "pm8001_setup_irq failed [ret: %d]\n", rc); - goto err_out; - } /* Request Interrupt */ rc = pm8001_request_irq(pm8001_ha); if (rc) @@ -1031,47 +1023,38 @@ static u32 pm8001_request_msix(struct pm8001_hba_info *pm8001_ha) } #endif -static u32 pm8001_setup_irq(struct pm8001_hba_info *pm8001_ha) -{ - struct pci_dev *pdev; - - pdev = pm8001_ha->pdev; - -#ifdef PM8001_USE_MSIX - if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) - return pm8001_setup_msix(pm8001_ha); - pm8001_dbg(pm8001_ha, INIT, "MSIX not supported!!!\n"); -#endif - return 0; -} - /** * pm8001_request_irq - register interrupt * @pm8001_ha: our ha struct. */ static u32 pm8001_request_irq(struct pm8001_hba_info *pm8001_ha) { - struct pci_dev *pdev; + struct pci_dev *pdev = pm8001_ha->pdev; +#ifdef PM8001_USE_MSIX int rc; - pdev = pm8001_ha->pdev; + if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) { + rc = pm8001_setup_msix(pm8001_ha); + if (rc) { + pm8001_dbg(pm8001_ha, FAIL, + "pm8001_setup_irq failed [ret: %d]\n", rc); + return rc; + } -#ifdef PM8001_USE_MSIX - if (pdev->msix_cap && pci_msi_enabled()) - return pm8001_request_msix(pm8001_ha); - else { - pm8001_dbg(pm8001_ha, INIT, "MSIX not supported!!!\n"); - goto intx; + if (pdev->msix_cap && pci_msi_enabled()) + return pm8001_request_msix(pm8001_ha); } + + pm8001_dbg(pm8001_ha, INIT, "MSIX not supported!!!\n"); #endif -intx: /* initialize the INT-X interrupt */ pm8001_ha->irq_vector[0].irq_id = 0; pm8001_ha->irq_vector[0].drv_inst = pm8001_ha; - rc = request_irq(pdev->irq, pm8001_interrupt_handler_intx, IRQF_SHARED, - pm8001_ha->name, SHOST_TO_SAS_HA(pm8001_ha->shost)); - return rc; + + return request_irq(pdev->irq, pm8001_interrupt_handler_intx, + IRQF_SHARED, pm8001_ha->name, + SHOST_TO_SAS_HA(pm8001_ha->shost)); } /** From d14e3e553e05cb763964c991fe6acb0a6a1c6f9c Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Thu, 31 Aug 2023 20:34:59 +0200 Subject: [PATCH 164/333] scsi: target: core: Fix target_cmd_counter leak The target_cmd_counter struct allocated via target_alloc_cmd_counter() is never freed, resulting in leaks across various transport types, e.g.: unreferenced object 0xffff88801f920120 (size 96): comm "sh", pid 102, jiffies 4294892535 (age 713.412s) hex dump (first 32 bytes): 07 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 38 01 92 1f 80 88 ff ff ........8....... backtrace: [<00000000e58a6252>] kmalloc_trace+0x11/0x20 [<0000000043af4b2f>] target_alloc_cmd_counter+0x17/0x90 [target_core_mod] [<000000007da2dfa7>] target_setup_session+0x2d/0x140 [target_core_mod] [<0000000068feef86>] tcm_loop_tpg_nexus_store+0x19b/0x350 [tcm_loop] [<000000006a80e021>] configfs_write_iter+0xb1/0x120 [<00000000e9f4d860>] vfs_write+0x2e4/0x3c0 [<000000008143433b>] ksys_write+0x80/0xb0 [<00000000a7df29b2>] do_syscall_64+0x42/0x90 [<0000000053f45fb8>] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Free the structure alongside the corresponding iscsit_conn / se_sess parent. Signed-off-by: David Disseldorp Link: https://lore.kernel.org/r/20230831183459.6938-1-ddiss@suse.de Fixes: becd9be6069e ("scsi: target: Move sess cmd counter to new struct") Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_transport.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 687adc9e086c..0686882bcbda 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -264,6 +264,7 @@ void target_free_cmd_counter(struct target_cmd_counter *cmd_cnt) percpu_ref_put(&cmd_cnt->refcnt); percpu_ref_exit(&cmd_cnt->refcnt); + kfree(cmd_cnt); } EXPORT_SYMBOL_GPL(target_free_cmd_counter); From 7dcc683db3639eadd11bf0d59a09088a43de5e22 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 6 Sep 2023 11:08:09 +0800 Subject: [PATCH 165/333] scsi: lpfc: Fix the NULL vs IS_ERR() bug for debugfs_create_file() Since debugfs_create_file() returns ERR_PTR and never NULL, use IS_ERR() to check the return value. Fixes: 2fcbc569b9f5 ("scsi: lpfc: Make debugfs ktime stats generic for NVME and SCSI") Fixes: 4c47efc140fa ("scsi: lpfc: Move SCSI and NVME Stats to hardware queue structures") Fixes: 6a828b0f6192 ("scsi: lpfc: Support non-uniform allocation of MSIX vectors to hardware queues") Fixes: 95bfc6d8ad86 ("scsi: lpfc: Make FW logging dynamically configurable") Fixes: 9f77870870d8 ("scsi: lpfc: Add debugfs support for cm framework buffers") Fixes: c490850a0947 ("scsi: lpfc: Adapt partitioned XRI lists to efficient sharing") Signed-off-by: Jinjie Ruan Link: https://lore.kernel.org/r/20230906030809.2847970-1-ruanjinjie@huawei.com Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 7f9b221e7c34..ea9b42225e62 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -6073,7 +6073,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) phba->hba_debugfs_root, phba, &lpfc_debugfs_op_multixripools); - if (!phba->debug_multixri_pools) { + if (IS_ERR(phba->debug_multixri_pools)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "0527 Cannot create debugfs multixripools\n"); goto debug_failed; @@ -6085,7 +6085,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) debugfs_create_file(name, S_IFREG | 0644, phba->hba_debugfs_root, phba, &lpfc_cgn_buffer_op); - if (!phba->debug_cgn_buffer) { + if (IS_ERR(phba->debug_cgn_buffer)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "6527 Cannot create debugfs " "cgn_buffer\n"); @@ -6098,7 +6098,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) debugfs_create_file(name, S_IFREG | 0644, phba->hba_debugfs_root, phba, &lpfc_rx_monitor_op); - if (!phba->debug_rx_monitor) { + if (IS_ERR(phba->debug_rx_monitor)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "6528 Cannot create debugfs " "rx_monitor\n"); @@ -6111,7 +6111,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) debugfs_create_file(name, 0644, phba->hba_debugfs_root, phba, &lpfc_debugfs_ras_log); - if (!phba->debug_ras_log) { + if (IS_ERR(phba->debug_ras_log)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "6148 Cannot create debugfs" " ras_log\n"); @@ -6132,7 +6132,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) debugfs_create_file(name, S_IFREG | 0644, phba->hba_debugfs_root, phba, &lpfc_debugfs_op_lockstat); - if (!phba->debug_lockstat) { + if (IS_ERR(phba->debug_lockstat)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "4610 Can't create debugfs lockstat\n"); goto debug_failed; @@ -6358,7 +6358,7 @@ nvmeio_off: debugfs_create_file(name, 0644, vport->vport_debugfs_root, vport, &lpfc_debugfs_op_scsistat); - if (!vport->debug_scsistat) { + if (IS_ERR(vport->debug_scsistat)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "4611 Cannot create debugfs scsistat\n"); goto debug_failed; @@ -6369,7 +6369,7 @@ nvmeio_off: debugfs_create_file(name, 0644, vport->vport_debugfs_root, vport, &lpfc_debugfs_op_ioktime); - if (!vport->debug_ioktime) { + if (IS_ERR(vport->debug_ioktime)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "0815 Cannot create debugfs ioktime\n"); goto debug_failed; From 9c3034968ed0feeaf72e5b549b19c7767a1a04f2 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Fri, 8 Sep 2023 14:18:52 -0700 Subject: [PATCH 166/333] scsi: lpfc: Early return after marking final NLP_DROPPED flag in dev_loss_tmo When a dev_loss_tmo event occurs, an ndlp lock is taken before checking nlp_flag for NLP_DROPPED. There is an attempt to restore the ndlp lock when exiting the if statement, but the nlp_put kref could be the final decrement causing a use-after-free memory access on a released ndlp object. Instead of trying to reacquire the ndlp lock after checking nlp_flag, just return after calling nlp_put. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20230908211852.37576-1-justintee8345@gmail.com Reviewed-by: "Ewan D. Milne" Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 51afb60859eb..674dd07aae72 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -203,7 +203,7 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) ndlp->nlp_flag |= NLP_DROPPED; spin_unlock_irqrestore(&ndlp->lock, iflags); lpfc_nlp_put(ndlp); - spin_lock_irqsave(&ndlp->lock, iflags); + return; } spin_unlock_irqrestore(&ndlp->lock, iflags); From dae40be7a1a72474e225795c0d6f43a4ac596a3f Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Fri, 8 Sep 2023 14:19:23 -0700 Subject: [PATCH 167/333] scsi: lpfc: Prevent use-after-free during rmmod with mapped NVMe rports During rmmod, when dev_loss_tmo callback is called, an ndlp kref count is decremented twice. Once for SCSI transport registration and second to remove the initial node allocation kref. If there is also an NVMe transport registration, another reference count decrement is expected in lpfc_nvme_unregister_port(). Race conditions between the NVMe transport remoteport_delete and dev_loss_tmo callbacks sometimes results in premature ndlp object release resulting in use-after-free issues. Fix by not dropping the ndlp object in dev_loss_tmo callback with an outstanding NVMe transport registration. Inversely, mark the final NLP_DROPPED flag in lpfc_nvme_unregister_port when rmmod flag is set. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20230908211923.37603-1-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 3 ++- drivers/scsi/lpfc/lpfc_nvme.c | 24 +++++++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 674dd07aae72..5154eeaee0ec 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -199,7 +199,8 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) /* Only 1 thread can drop the initial node reference. If * another thread has set NLP_DROPPED, this thread is done. */ - if (!(ndlp->nlp_flag & NLP_DROPPED)) { + if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD) && + !(ndlp->nlp_flag & NLP_DROPPED)) { ndlp->nlp_flag |= NLP_DROPPED; spin_unlock_irqrestore(&ndlp->lock, iflags); lpfc_nlp_put(ndlp); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 39acbcb7ec66..96e11a26c297 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -228,8 +228,7 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport) spin_unlock_irq(&ndlp->lock); /* On a devloss timeout event, one more put is executed provided the - * NVME and SCSI rport unregister requests are complete. If the vport - * is unloading, this extra put is executed by lpfc_drop_node. + * NVME and SCSI rport unregister requests are complete. */ if (!(ndlp->fc4_xpt_flags & fc4_xpt_flags)) lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM); @@ -2567,11 +2566,7 @@ lpfc_nvme_rescan_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) * nvme_transport perspective. Loss of an rport just means IO cannot * be sent and recovery is completely up to the initator. * For now, the driver just unbinds the DID and port_role so that - * no further IO can be issued. Changes are planned for later. - * - * Notes - the ndlp reference count is not decremented here since - * since there is no nvme_transport api for devloss. Node ref count - * is only adjusted in driver unload. + * no further IO can be issued. */ void lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) @@ -2646,6 +2641,21 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) "6167 NVME unregister failed %d " "port_state x%x\n", ret, remoteport->port_state); + + if (vport->load_flag & FC_UNLOADING) { + /* Only 1 thread can drop the initial node + * reference. Check if another thread has set + * NLP_DROPPED. + */ + spin_lock_irq(&ndlp->lock); + if (!(ndlp->nlp_flag & NLP_DROPPED)) { + ndlp->nlp_flag |= NLP_DROPPED; + spin_unlock_irq(&ndlp->lock); + lpfc_nlp_put(ndlp); + return; + } + spin_unlock_irq(&ndlp->lock); + } } } return; From 4b2d631236931550f2ab0abc9a666958853ae846 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Mon, 11 Sep 2023 22:32:56 +0800 Subject: [PATCH 168/333] memblock tests: Fix compilation errors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fix the follow errors. commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") pass nid parameter to reserve_bootmem_region(), $ make -C tools/testing/memblock/ ... memblock.c: In function ‘memmap_init_reserved_pages’: memblock.c:2111:25: error: too many arguments to function ‘reserve_bootmem_region’ 2111 | reserve_bootmem_region(start, end, nid); | ^~~~~~~~~~~~~~~~~~~~~~ ../../include/linux/mm.h:32:6: note: declared here 32 | void reserve_bootmem_region(phys_addr_t start, phys_addr_t end); | ^~~~~~~~~~~~~~~~~~~~~~ memblock.c:2122:17: error: too many arguments to function ‘reserve_bootmem_region’ 2122 | reserve_bootmem_region(start, end, nid); | ^~~~~~~~~~~~~~~~~~~~~~ commit dcdfdd40fa82 ("mm: Add support for unaccepted memory") call accept_memory() in memblock.c $ make -C tools/testing/memblock/ ... cc -fsanitize=address -fsanitize=undefined main.o memblock.o \ lib/slab.o mmzone.o slab.o tests/alloc_nid_api.o \ tests/alloc_helpers_api.o tests/alloc_api.o tests/basic_api.o \ tests/common.o tests/alloc_exact_nid_api.o -o main /usr/bin/ld: memblock.o: in function `memblock_alloc_range_nid': memblock.c:(.text+0x7ae4): undefined reference to `accept_memory' Signed-off-by: Rong Tao Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") Fixes: 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") Link: https://lore.kernel.org/r/tencent_6F19BC082167F15DF2A8D8BEFE8EF220F60A@qq.com Signed-off-by: Mike Rapoport (IBM) --- tools/include/linux/mm.h | 2 +- tools/testing/memblock/internal.h | 4 ++++ tools/testing/memblock/mmzone.c | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index a03d9bba5151..2bc94079d616 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -29,7 +29,7 @@ static inline void *phys_to_virt(unsigned long address) return __va(address); } -void reserve_bootmem_region(phys_addr_t start, phys_addr_t end); +void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid); static inline void totalram_pages_inc(void) { diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index fdb7f5db7308..f6c6e5474c3a 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -20,4 +20,8 @@ void memblock_free_pages(struct page *page, unsigned long pfn, { } +static inline void accept_memory(phys_addr_t start, phys_addr_t end) +{ +} + #endif diff --git a/tools/testing/memblock/mmzone.c b/tools/testing/memblock/mmzone.c index 7b0909e8b759..d3d58851864e 100644 --- a/tools/testing/memblock/mmzone.c +++ b/tools/testing/memblock/mmzone.c @@ -11,7 +11,7 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) return NULL; } -void reserve_bootmem_region(phys_addr_t start, phys_addr_t end) +void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid) { } From 5e1bffbdb63baf89f3bf0b6bafb50903432a7434 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Thu, 14 Sep 2023 09:24:51 +0300 Subject: [PATCH 169/333] memblock tests: fix warning: "__ALIGN_KERNEL" redefined Building memblock tests produces the following warning: cc -I. -I../../include -Wall -O2 -fsanitize=address -fsanitize=undefined -D CONFIG_PHYS_ADDR_T_64BIT -c -o main.o main.c In file included from ../../include/linux/pfn.h:5, from ./linux/memory_hotplug.h:6, from ./linux/init.h:7, from ./linux/memblock.h:11, from tests/common.h:8, from tests/basic_api.h:5, from main.c:2: ../../include/linux/mm.h:14: warning: "__ALIGN_KERNEL" redefined 14 | #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) | In file included from ../../include/linux/mm.h:6, from ../../include/linux/pfn.h:5, from ./linux/memory_hotplug.h:6, from ./linux/init.h:7, from ./linux/memblock.h:11, from tests/common.h:8, from tests/basic_api.h:5, from main.c:2: ../../include/uapi/linux/const.h:31: note: this is the location of the previous definition 31 | #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) | Remove definitions of __ALIGN_KERNEL and __ALIGN_KERNEL_MASK from tools/include/linux/mm.h to fix it. Signed-off-by: Mike Rapoport (IBM) --- tools/include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index 2bc94079d616..f3c82ab5b14c 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -11,8 +11,6 @@ #define PHYS_ADDR_MAX (~(phys_addr_t)0) -#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) -#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) #define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) From 55122e0130e51eb71f5ec62d10525db0468f28e8 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Thu, 14 Sep 2023 10:45:40 +0300 Subject: [PATCH 170/333] =?UTF-8?q?memblock=20tests:=20fix=20warning=20?= =?UTF-8?q?=E2=80=98struct=20seq=5Ffile=E2=80=99=20declared=20inside=20par?= =?UTF-8?q?ameter=20list?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building memblock tests produces the following warning: cc -I. -I../../include -Wall -O2 -fsanitize=address -fsanitize=undefined -D CONFIG_PHYS_ADDR_T_64BIT -c -o main.o main.c In file included from tests/common.h:9, from tests/basic_api.h:5, from main.c:2: ./linux/memblock.h:601:50: warning: ‘struct seq_file’ declared inside parameter list will not be visible outside of this definition or declaration 601 | static inline void memtest_report_meminfo(struct seq_file *m) { } | ^~~~~~~~ Add declaration of 'struct seq_file' to tools/include/linux/seq_file.h to fix it. Signed-off-by: Mike Rapoport (IBM) --- tools/include/linux/seq_file.h | 2 ++ tools/testing/memblock/tests/basic_api.c | 2 +- tools/testing/memblock/tests/common.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/include/linux/seq_file.h b/tools/include/linux/seq_file.h index 102fd9217f1f..f6bc226af0c1 100644 --- a/tools/include/linux/seq_file.h +++ b/tools/include/linux/seq_file.h @@ -1,4 +1,6 @@ #ifndef _TOOLS_INCLUDE_LINUX_SEQ_FILE_H #define _TOOLS_INCLUDE_LINUX_SEQ_FILE_H +struct seq_file; + #endif /* _TOOLS_INCLUDE_LINUX_SEQ_FILE_H */ diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 411647094cc3..57bf2688edfd 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include "basic_api.h" #include #include -#include "basic_api.h" #define EXPECTED_MEMBLOCK_REGIONS 128 #define FUNC_ADD "memblock_add" diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h index 4f23302ee677..b5ec59aa62d7 100644 --- a/tools/testing/memblock/tests/common.h +++ b/tools/testing/memblock/tests/common.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include From f6c8a312ef0175ea67a1ace29e1d1e5d470ea45a Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 18 Aug 2023 13:04:33 +0300 Subject: [PATCH 171/333] media: pci: ivsc: Select build dependencies Select MEDIA_CONTROLLER, VIDEO_V4L2_SUBDEV_API and V4L2_ASYNC as the IVSC driver depends on all these. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308170227.ymiFlMbT-lkp@intel.com/ Fixes: 29006e196a56 ("media: pci: intel: ivsc: Add CSI submodule") Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/pci/intel/ivsc/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/pci/intel/ivsc/Kconfig b/drivers/media/pci/intel/ivsc/Kconfig index 1ef1c4e3750d..92c975e98cb1 100644 --- a/drivers/media/pci/intel/ivsc/Kconfig +++ b/drivers/media/pci/intel/ivsc/Kconfig @@ -4,6 +4,9 @@ config INTEL_VSC tristate "Intel Visual Sensing Controller" depends on INTEL_MEI && ACPI + select MEDIA_CONTROLLER + select VIDEO_V4L2_SUBDEV_API + select V4L2_ASYNC help This adds support for Intel Visual Sensing Controller (IVSC). From 86e16b87afac20779da1228d690a95c54d7e2ad0 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 18 Aug 2023 12:51:49 +0300 Subject: [PATCH 172/333] media: v4l: Use correct dependency for camera sensor drivers The Kconfig option that enables compiling camera sensor drivers is VIDEO_CAMERA_SENSOR rather than MEDIA_CAMERA_SUPPORT as it was previously. Fix this. Also select VIDEO_OV7670 for marvell platform drivers only if MEDIA_SUBDRV_AUTOSELECT and VIDEO_CAMERA_SENSOR are enabled. Reported-by: Randy Dunlap Fixes: 7d3c7d2a2914 ("media: i2c: Add a camera sensor top level menu") Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/platform/marvell/Kconfig | 4 ++-- drivers/media/usb/em28xx/Kconfig | 4 ++-- drivers/media/usb/go7007/Kconfig | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/media/platform/marvell/Kconfig b/drivers/media/platform/marvell/Kconfig index ec1a16734a28..d6499ffe30e8 100644 --- a/drivers/media/platform/marvell/Kconfig +++ b/drivers/media/platform/marvell/Kconfig @@ -7,7 +7,7 @@ config VIDEO_CAFE_CCIC depends on V4L_PLATFORM_DRIVERS depends on PCI && I2C && VIDEO_DEV depends on COMMON_CLK - select VIDEO_OV7670 + select VIDEO_OV7670 if MEDIA_SUBDRV_AUTOSELECT && VIDEO_CAMERA_SENSOR select VIDEOBUF2_VMALLOC select VIDEOBUF2_DMA_CONTIG select VIDEOBUF2_DMA_SG @@ -22,7 +22,7 @@ config VIDEO_MMP_CAMERA depends on I2C && VIDEO_DEV depends on ARCH_MMP || COMPILE_TEST depends on COMMON_CLK - select VIDEO_OV7670 + select VIDEO_OV7670 if MEDIA_SUBDRV_AUTOSELECT && VIDEO_CAMERA_SENSOR select I2C_GPIO select VIDEOBUF2_VMALLOC select VIDEOBUF2_DMA_CONTIG diff --git a/drivers/media/usb/em28xx/Kconfig b/drivers/media/usb/em28xx/Kconfig index b3c472b8c5a9..cb61fd6cc6c6 100644 --- a/drivers/media/usb/em28xx/Kconfig +++ b/drivers/media/usb/em28xx/Kconfig @@ -12,8 +12,8 @@ config VIDEO_EM28XX_V4L2 select VIDEO_SAA711X if MEDIA_SUBDRV_AUTOSELECT select VIDEO_TVP5150 if MEDIA_SUBDRV_AUTOSELECT select VIDEO_MSP3400 if MEDIA_SUBDRV_AUTOSELECT - select VIDEO_MT9V011 if MEDIA_SUBDRV_AUTOSELECT && MEDIA_CAMERA_SUPPORT - select VIDEO_OV2640 if MEDIA_SUBDRV_AUTOSELECT && MEDIA_CAMERA_SUPPORT + select VIDEO_MT9V011 if MEDIA_SUBDRV_AUTOSELECT && VIDEO_CAMERA_SENSOR + select VIDEO_OV2640 if MEDIA_SUBDRV_AUTOSELECT && VIDEO_CAMERA_SENSOR help This is a video4linux driver for Empia 28xx based TV cards. diff --git a/drivers/media/usb/go7007/Kconfig b/drivers/media/usb/go7007/Kconfig index 4ff79940ad8d..b2a15d9fb1f3 100644 --- a/drivers/media/usb/go7007/Kconfig +++ b/drivers/media/usb/go7007/Kconfig @@ -12,8 +12,8 @@ config VIDEO_GO7007 select VIDEO_TW2804 if MEDIA_SUBDRV_AUTOSELECT select VIDEO_TW9903 if MEDIA_SUBDRV_AUTOSELECT select VIDEO_TW9906 if MEDIA_SUBDRV_AUTOSELECT - select VIDEO_OV7640 if MEDIA_SUBDRV_AUTOSELECT && MEDIA_CAMERA_SUPPORT select VIDEO_UDA1342 if MEDIA_SUBDRV_AUTOSELECT + select VIDEO_OV7640 if MEDIA_SUBDRV_AUTOSELECT && VIDEO_CAMERA_SENSOR help This is a video4linux driver for the WIS GO7007 MPEG encoder chip. From 41425941dfcf47cc6df8e500af6ff16a7be6539f Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 22 Aug 2023 11:10:34 +0300 Subject: [PATCH 173/333] media: via: Use correct dependency for camera sensor drivers The via camera controller driver selected ov7670 driver, however now that driver has dependencies and may no longer be selected unconditionally. Reported-by: Randy Dunlap Fixes: 7d3c7d2a2914 ("media: i2c: Add a camera sensor top level menu") Signed-off-by: Sakari Ailus Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Hans Verkuil --- drivers/media/platform/via/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/via/Kconfig b/drivers/media/platform/via/Kconfig index 8926eb0803b2..6e603c038248 100644 --- a/drivers/media/platform/via/Kconfig +++ b/drivers/media/platform/via/Kconfig @@ -7,7 +7,7 @@ config VIDEO_VIA_CAMERA depends on V4L_PLATFORM_DRIVERS depends on FB_VIA && VIDEO_DEV select VIDEOBUF2_DMA_SG - select VIDEO_OV7670 + select VIDEO_OV7670 if VIDEO_CAMERA_SENSOR help Driver support for the integrated camera controller in VIA Chrome9 chipsets. Currently only tested on OLPC xo-1.5 systems From 7908632f2927b65f7486ae6b67c24071666ba43f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Thu, 14 Sep 2023 07:19:02 -0300 Subject: [PATCH 174/333] Revert "drm/vkms: Fix race-condition between the hrtimer and the atomic commit" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit a0e6a017ab56936c0405fe914a793b241ed25ee0. Unlocking a mutex in the context of a hrtimer callback is violating mutex locking rules, as mutex_unlock() from interrupt context is not permitted. Link: https://lore.kernel.org/dri-devel/ZQLAc%2FFwkv%2FGiVoK@phenom.ffwll.local/T/#t Acked-by: Daniel Vetter Signed-off-by: Maíra Canal Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20230914102024.1789154-1-mcanal@igalia.com --- drivers/gpu/drm/vkms/vkms_composer.c | 9 ++------- drivers/gpu/drm/vkms/vkms_crtc.c | 9 ++++----- drivers/gpu/drm/vkms/vkms_drv.h | 4 +--- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index d5d4f642d367..3c99fb8b54e2 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -408,15 +408,10 @@ void vkms_set_composer(struct vkms_output *out, bool enabled) if (enabled) drm_crtc_vblank_get(&out->crtc); - mutex_lock(&out->enabled_lock); + spin_lock_irq(&out->lock); old_enabled = out->composer_enabled; out->composer_enabled = enabled; - - /* the composition wasn't enabled, so unlock the lock to make sure the lock - * will be balanced even if we have a failed commit - */ - if (!out->composer_enabled) - mutex_unlock(&out->enabled_lock); + spin_unlock_irq(&out->lock); if (old_enabled) drm_crtc_vblank_put(&out->crtc); diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c index 3c5ebf106b66..61e500b8c9da 100644 --- a/drivers/gpu/drm/vkms/vkms_crtc.c +++ b/drivers/gpu/drm/vkms/vkms_crtc.c @@ -16,7 +16,7 @@ static enum hrtimer_restart vkms_vblank_simulate(struct hrtimer *timer) struct drm_crtc *crtc = &output->crtc; struct vkms_crtc_state *state; u64 ret_overrun; - bool ret, fence_cookie, composer_enabled; + bool ret, fence_cookie; fence_cookie = dma_fence_begin_signalling(); @@ -25,15 +25,15 @@ static enum hrtimer_restart vkms_vblank_simulate(struct hrtimer *timer) if (ret_overrun != 1) pr_warn("%s: vblank timer overrun\n", __func__); + spin_lock(&output->lock); ret = drm_crtc_handle_vblank(crtc); if (!ret) DRM_ERROR("vkms failure on handling vblank"); state = output->composer_state; - composer_enabled = output->composer_enabled; - mutex_unlock(&output->enabled_lock); + spin_unlock(&output->lock); - if (state && composer_enabled) { + if (state && output->composer_enabled) { u64 frame = drm_crtc_accurate_vblank_count(crtc); /* update frame_start only if a queued vkms_composer_worker() @@ -295,7 +295,6 @@ int vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, spin_lock_init(&vkms_out->lock); spin_lock_init(&vkms_out->composer_lock); - mutex_init(&vkms_out->enabled_lock); vkms_out->composer_workq = alloc_ordered_workqueue("vkms_composer", 0); if (!vkms_out->composer_workq) diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h index c7ae6c2ba1df..8f5710debb1e 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.h +++ b/drivers/gpu/drm/vkms/vkms_drv.h @@ -108,10 +108,8 @@ struct vkms_output { struct workqueue_struct *composer_workq; /* protects concurrent access to composer */ spinlock_t lock; - /* guarantees that if the composer is enabled, a job will be queued */ - struct mutex enabled_lock; - /* protected by @enabled_lock */ + /* protected by @lock */ bool composer_enabled; struct vkms_crtc_state *composer_state; From fac58baf8fcfcd7481e8f6d60206ce2a47c1476c Mon Sep 17 00:00:00 2001 From: Chancel Liu Date: Wed, 13 Sep 2023 18:26:56 +0800 Subject: [PATCH 175/333] ASoC: imx-rpmsg: Set ignore_pmdown_time for dai_link i.MX rpmsg sound cards work on codec slave mode. MCLK will be disabled by CPU DAI driver in hw_free(). Some codec requires MCLK present at power up/down sequence. So need to set ignore_pmdown_time to power down codec immediately before MCLK is turned off. Take WM8962 as an example, if MCLK is disabled before DAPM power down playback stream, FIFO error will arise in WM8962 which will have bad impact on playback next. Signed-off-by: Chancel Liu Acked-by: Shengjiu Wang Link: https://lore.kernel.org/r/20230913102656.2966757-1-chancel.liu@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-rpmsg.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/fsl/imx-rpmsg.c b/sound/soc/fsl/imx-rpmsg.c index 3c7b95db2eac..b578f9a32d7f 100644 --- a/sound/soc/fsl/imx-rpmsg.c +++ b/sound/soc/fsl/imx-rpmsg.c @@ -89,6 +89,14 @@ static int imx_rpmsg_probe(struct platform_device *pdev) SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBC_CFC; + /* + * i.MX rpmsg sound cards work on codec slave mode. MCLK will be + * disabled by CPU DAI driver in hw_free(). Some codec requires MCLK + * present at power up/down sequence. So need to set ignore_pmdown_time + * to power down codec immediately before MCLK is turned off. + */ + data->dai.ignore_pmdown_time = 1; + /* Optional codec node */ ret = of_parse_phandle_with_fixed_args(np, "audio-codec", 0, 0, &args); if (ret) { From 139a27854bf5ce93ff9805f9f7683b88c13074dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 7 Sep 2023 15:53:38 +0200 Subject: [PATCH 176/333] drm/tests: helpers: Avoid a driver uaf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when using __drm_kunit_helper_alloc_drm_device() the driver may be dereferenced by device-managed resources up until the device is freed, which is typically later than the kunit-managed resource code frees it. Fix this by simply make the driver device-managed as well. In short, the sequence leading to the UAF is as follows: INIT: Code allocates a struct device as a kunit-managed resource. Code allocates a drm driver as a kunit-managed resource. Code allocates a drm device as a device-managed resource. EXIT: Kunit resource cleanup frees the drm driver Kunit resource cleanup puts the struct device, which starts a device-managed resource cleanup device-managed cleanup calls drm_dev_put() drm_dev_put() dereferences the (now freed) drm driver -> Boom. Related KASAN message: [55272.551542] ================================================================== [55272.551551] BUG: KASAN: slab-use-after-free in drm_dev_put.part.0+0xd4/0xe0 [drm] [55272.551603] Read of size 8 at addr ffff888127502828 by task kunit_try_catch/10353 [55272.551612] CPU: 4 PID: 10353 Comm: kunit_try_catch Tainted: G U N 6.5.0-rc7+ #155 [55272.551620] Hardware name: ASUS System Product Name/PRIME B560M-A AC, BIOS 0403 01/26/2021 [55272.551626] Call Trace: [55272.551629] [55272.551633] dump_stack_lvl+0x57/0x90 [55272.551639] print_report+0xcf/0x630 [55272.551645] ? _raw_spin_lock_irqsave+0x5f/0x70 [55272.551652] ? drm_dev_put.part.0+0xd4/0xe0 [drm] [55272.551694] kasan_report+0xd7/0x110 [55272.551699] ? drm_dev_put.part.0+0xd4/0xe0 [drm] [55272.551742] drm_dev_put.part.0+0xd4/0xe0 [drm] [55272.551783] devres_release_all+0x15d/0x1f0 [55272.551790] ? __pfx_devres_release_all+0x10/0x10 [55272.551797] device_unbind_cleanup+0x16/0x1a0 [55272.551802] device_release_driver_internal+0x3e5/0x540 [55272.551808] ? kobject_put+0x5d/0x4b0 [55272.551814] bus_remove_device+0x1f1/0x3f0 [55272.551819] device_del+0x342/0x910 [55272.551826] ? __pfx_device_del+0x10/0x10 [55272.551830] ? lock_release+0x339/0x5e0 [55272.551836] ? kunit_remove_resource+0x128/0x290 [kunit] [55272.551845] ? __pfx_lock_release+0x10/0x10 [55272.551851] platform_device_del.part.0+0x1f/0x1e0 [55272.551856] ? _raw_spin_unlock_irqrestore+0x30/0x60 [55272.551863] kunit_remove_resource+0x195/0x290 [kunit] [55272.551871] ? _raw_spin_unlock_irqrestore+0x30/0x60 [55272.551877] kunit_cleanup+0x78/0x120 [kunit] [55272.551885] ? __kthread_parkme+0xc1/0x1f0 [55272.551891] ? __pfx_kunit_try_run_case_cleanup+0x10/0x10 [kunit] [55272.551900] ? __pfx_kunit_generic_run_threadfn_adapter+0x10/0x10 [kunit] [55272.551909] kunit_generic_run_threadfn_adapter+0x4a/0x90 [kunit] [55272.551919] kthread+0x2e7/0x3c0 [55272.551924] ? __pfx_kthread+0x10/0x10 [55272.551929] ret_from_fork+0x2d/0x70 [55272.551935] ? __pfx_kthread+0x10/0x10 [55272.551940] ret_from_fork_asm+0x1b/0x30 [55272.551948] [55272.551953] Allocated by task 10351: [55272.551956] kasan_save_stack+0x1c/0x40 [55272.551962] kasan_set_track+0x21/0x30 [55272.551966] __kasan_kmalloc+0x8b/0x90 [55272.551970] __kmalloc+0x5e/0x160 [55272.551976] kunit_kmalloc_array+0x1c/0x50 [kunit] [55272.551984] drm_exec_test_init+0xfa/0x2c0 [drm_exec_test] [55272.551991] kunit_try_run_case+0xdd/0x250 [kunit] [55272.551999] kunit_generic_run_threadfn_adapter+0x4a/0x90 [kunit] [55272.552008] kthread+0x2e7/0x3c0 [55272.552012] ret_from_fork+0x2d/0x70 [55272.552017] ret_from_fork_asm+0x1b/0x30 [55272.552024] Freed by task 10353: [55272.552027] kasan_save_stack+0x1c/0x40 [55272.552032] kasan_set_track+0x21/0x30 [55272.552036] kasan_save_free_info+0x27/0x40 [55272.552041] __kasan_slab_free+0x106/0x180 [55272.552046] slab_free_freelist_hook+0xb3/0x160 [55272.552051] __kmem_cache_free+0xb2/0x290 [55272.552056] kunit_remove_resource+0x195/0x290 [kunit] [55272.552064] kunit_cleanup+0x78/0x120 [kunit] [55272.552072] kunit_generic_run_threadfn_adapter+0x4a/0x90 [kunit] [55272.552080] kthread+0x2e7/0x3c0 [55272.552085] ret_from_fork+0x2d/0x70 [55272.552089] ret_from_fork_asm+0x1b/0x30 [55272.552096] The buggy address belongs to the object at ffff888127502800 which belongs to the cache kmalloc-512 of size 512 [55272.552105] The buggy address is located 40 bytes inside of freed 512-byte region [ffff888127502800, ffff888127502a00) [55272.552115] The buggy address belongs to the physical page: [55272.552119] page:00000000af6c70ff refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x127500 [55272.552127] head:00000000af6c70ff order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 [55272.552133] anon flags: 0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff) [55272.552141] page_type: 0xffffffff() [55272.552145] raw: 0017ffffc0010200 ffff888100042c80 0000000000000000 dead000000000001 [55272.552152] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000 [55272.552157] page dumped because: kasan: bad access detected [55272.552163] Memory state around the buggy address: [55272.552167] ffff888127502700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [55272.552173] ffff888127502780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [55272.552178] >ffff888127502800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [55272.552184] ^ [55272.552187] ffff888127502880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [55272.552193] ffff888127502900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [55272.552198] ================================================================== [55272.552203] Disabling lock debugging due to kernel taint v2: - Update commit message, add Fixes: tag and Cc stable. v3: - Further commit message updates (Maxime Ripard). Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: stable@vger.kernel.org # v6.3+ Fixes: d98780310719 ("drm/tests: helpers: Allow to pass a custom drm_driver") Signed-off-by: Thomas Hellström Reviewed-by: Francois Dugast Acked-by: Maxime Ripard Link: https://lore.kernel.org/r/20230907135339.7971-2-thomas.hellstrom@linux.intel.com Signed-off-by: Maxime Ripard --- include/drm/drm_kunit_helpers.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/drm/drm_kunit_helpers.h b/include/drm/drm_kunit_helpers.h index 514c8a7a32f0..ba483c87f0e7 100644 --- a/include/drm/drm_kunit_helpers.h +++ b/include/drm/drm_kunit_helpers.h @@ -3,6 +3,8 @@ #ifndef DRM_KUNIT_HELPERS_H_ #define DRM_KUNIT_HELPERS_H_ +#include + #include struct drm_device; @@ -51,7 +53,7 @@ __drm_kunit_helper_alloc_drm_device(struct kunit *test, { struct drm_driver *driver; - driver = kunit_kzalloc(test, sizeof(*driver), GFP_KERNEL); + driver = devm_kzalloc(dev, sizeof(*driver), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, driver); driver->driver_features = features; From 72ca56664e483de991ae4afa623e54570f81ebde Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 14 Sep 2023 15:08:52 +0100 Subject: [PATCH 177/333] ALSA: hda: cs35l56: Don't 'return ret' if ret is always zero The final return in cs35l56_hda_posture_get() was returning the value of 'ret', but ret is always zero at this point. So this can be a simple 'return 0'. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230914140852.7112-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 9e4976bdb5e0..bc75865b5de8 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -218,7 +218,7 @@ static int cs35l56_hda_posture_get(struct snd_kcontrol *kcontrol, ucontrol->value.integer.value[0] = pos; - return ret; + return 0; } static int cs35l56_hda_posture_put(struct snd_kcontrol *kcontrol, From 6ba59c008f08e84b3c87be10f3391c9735e4f833 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 14 Sep 2023 16:25:04 +0300 Subject: [PATCH 178/333] ASoC: SOF: ipc4-topology: fix wrong sizeof argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit available_fmt is a pointer. Fixes: 4fdef47a44d6 ("ASoC: SOF: ipc4-topology: Add new tokens for input/output pin format count") Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Pierre-Louis Bossart Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230914132504.18463-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index f2a30cd31378..7cb63e6b24dc 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -231,7 +231,7 @@ static int sof_ipc4_get_audio_fmt(struct snd_soc_component *scomp, ret = sof_update_ipc_object(scomp, available_fmt, SOF_AUDIO_FMT_NUM_TOKENS, swidget->tuples, - swidget->num_tuples, sizeof(available_fmt), 1); + swidget->num_tuples, sizeof(*available_fmt), 1); if (ret) { dev_err(scomp->dev, "Failed to parse audio format token count\n"); return ret; From bb0216d4db9ecaa51af45d8504757becbe5c050d Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 14 Sep 2023 15:47:25 +0300 Subject: [PATCH 179/333] ASoC: SOF: sof-audio: Fix DSP core put imbalance on widget setup failure In case the widget setup fails we should only decrement the core usage count if the sof_widget_free_unlocked() has not been called as part of the error handling. sof_widget_free_unlocked() calls snd_sof_dsp_core_put() and the additional core_put will cause imbalance in core usage count. Use the existing use_count_decremented to handle this issue. Signed-off-by: Peter Ujfalusi Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230914124725.17397-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-audio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c index e7ef77012c35..e5405f854a91 100644 --- a/sound/soc/sof/sof-audio.c +++ b/sound/soc/sof/sof-audio.c @@ -212,7 +212,8 @@ widget_free: sof_widget_free_unlocked(sdev, swidget); use_count_decremented = true; core_put: - snd_sof_dsp_core_put(sdev, swidget->core); + if (!use_count_decremented) + snd_sof_dsp_core_put(sdev, swidget->core); pipe_widget_free: if (swidget->id != snd_soc_dapm_scheduler) sof_widget_free_unlocked(sdev, swidget->spipe->pipe_widget); From f6007dce0cd35d634d9be91ef3515a6385dcee16 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 9 Aug 2023 12:44:20 +0200 Subject: [PATCH 180/333] dm: fix a race condition in retrieve_deps There's a race condition in the multipath target when retrieve_deps races with multipath_message calling dm_get_device and dm_put_device. retrieve_deps walks the list of open devices without holding any lock but multipath may add or remove devices to the list while it is running. The end result may be memory corruption or use-after-free memory access. See this description of a UAF with multipath_message(): https://listman.redhat.com/archives/dm-devel/2022-October/052373.html Fix this bug by introducing a new rw semaphore "devices_lock". We grab devices_lock for read in retrieve_deps and we grab it for write in dm_get_device and dm_put_device. Reported-by: Luo Meng Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Tested-by: Li Lingfeng Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 1 + drivers/md/dm-ioctl.c | 7 ++++++- drivers/md/dm-table.c | 32 ++++++++++++++++++++++++-------- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 0d93661f88d3..095b9b49aa82 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -214,6 +214,7 @@ struct dm_table { /* a list of devices used by this table */ struct list_head devices; + struct rw_semaphore devices_lock; /* events get handed up using this callback */ void (*event_fn)(void *data); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index f5ed729a8e0c..21ebb6c39394 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1630,6 +1630,8 @@ static void retrieve_deps(struct dm_table *table, struct dm_dev_internal *dd; struct dm_target_deps *deps; + down_read(&table->devices_lock); + deps = get_result_buffer(param, param_size, &len); /* @@ -1644,7 +1646,7 @@ static void retrieve_deps(struct dm_table *table, needed = struct_size(deps, dev, count); if (len < needed) { param->flags |= DM_BUFFER_FULL_FLAG; - return; + goto out; } /* @@ -1656,6 +1658,9 @@ static void retrieve_deps(struct dm_table *table, deps->dev[count++] = huge_encode_dev(dd->dm_dev->bdev->bd_dev); param->data_size = param->data_start + needed; + +out: + up_read(&table->devices_lock); } static int table_deps(struct file *filp, struct dm_ioctl *param, size_t param_size) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 7d208b2b1a19..37b48f63ae6a 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -135,6 +135,7 @@ int dm_table_create(struct dm_table **result, blk_mode_t mode, return -ENOMEM; INIT_LIST_HEAD(&t->devices); + init_rwsem(&t->devices_lock); if (!num_targets) num_targets = KEYS_PER_NODE; @@ -359,16 +360,20 @@ int __ref dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode, if (dev == disk_devt(t->md->disk)) return -EINVAL; + down_write(&t->devices_lock); + dd = find_device(&t->devices, dev); if (!dd) { dd = kmalloc(sizeof(*dd), GFP_KERNEL); - if (!dd) - return -ENOMEM; + if (!dd) { + r = -ENOMEM; + goto unlock_ret_r; + } r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev); if (r) { kfree(dd); - return r; + goto unlock_ret_r; } refcount_set(&dd->count, 1); @@ -378,12 +383,17 @@ int __ref dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode, } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) { r = upgrade_mode(dd, mode, t->md); if (r) - return r; + goto unlock_ret_r; } refcount_inc(&dd->count); out: + up_write(&t->devices_lock); *result = dd->dm_dev; return 0; + +unlock_ret_r: + up_write(&t->devices_lock); + return r; } EXPORT_SYMBOL(dm_get_device); @@ -419,9 +429,12 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, void dm_put_device(struct dm_target *ti, struct dm_dev *d) { int found = 0; - struct list_head *devices = &ti->table->devices; + struct dm_table *t = ti->table; + struct list_head *devices = &t->devices; struct dm_dev_internal *dd; + down_write(&t->devices_lock); + list_for_each_entry(dd, devices, list) { if (dd->dm_dev == d) { found = 1; @@ -430,14 +443,17 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d) } if (!found) { DMERR("%s: device %s not in table devices list", - dm_device_name(ti->table->md), d->name); - return; + dm_device_name(t->md), d->name); + goto unlock_ret; } if (refcount_dec_and_test(&dd->count)) { - dm_put_table_device(ti->table->md, d); + dm_put_table_device(t->md, d); list_del(&dd->list); kfree(dd); } + +unlock_ret: + up_write(&t->devices_lock); } EXPORT_SYMBOL(dm_put_device); From 1bb0763f1eb7dd015989fdc77dea17a349df2ea9 Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Mon, 11 Sep 2023 10:51:38 +0800 Subject: [PATCH 181/333] jbd2: Fix memory leak in journal_init_common() There is a memory leak reported by kmemleak: unreferenced object 0xff11000105903b80 (size 64): comm "mount", pid 3382, jiffies 4295032021 (age 27.826s) hex dump (first 32 bytes): 04 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 ................ ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] __kmalloc_node+0x50/0x160 [] crypto_alloc_tfmmem.isra.0+0x38/0x110 [] crypto_create_tfm_node+0x85/0x2f0 [] crypto_alloc_tfm_node+0xfc/0x210 [] journal_init_common+0x727/0x1ad0 [] jbd2_journal_init_inode+0x2b5/0x500 [] ext4_load_and_init_journal+0x255/0x2440 [] ext4_fill_super+0x8823/0xa330 ... The root cause was traced to an error handing path in journal_init_common() when malloc memory failed in register_shrinker(). The checksum driver is used to reference to checksum algorithm via cryptoapi and the user should release the memory when the driver is no longer needed or the journal initialization failed. Fix it by calling crypto_free_shash() on the "err_cleanup" error handing path in journal_init_common(). Fixes: c30713084ba5 ("jbd2: move load_superblock() into journal_init_common()") Signed-off-by: Li Zetao Reviewed-by: Jan Kara Reviewed-by: Zhang Yi Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20230911025138.983101-1-lizetao1@huawei.com Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 15e33c26c6cd..3ffda557dbdf 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1604,6 +1604,8 @@ static journal_t *journal_init_common(struct block_device *bdev, err_cleanup: percpu_counter_destroy(&journal->j_checkpoint_jh_count); + if (journal->j_chksum_driver) + crypto_free_shash(journal->j_chksum_driver); kfree(journal->j_wbuf); jbd2_journal_destroy_revoke(journal); journal_fail_superblock(journal); From 45e4ab320c9b5fa67b1fc3b6a9b381cfcc0c8488 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 13 Sep 2023 17:04:54 +0200 Subject: [PATCH 182/333] ext4: move setting of trimmed bit into ext4_try_to_trim_range() Currently we set the group's trimmed bit in ext4_trim_all_free() based on return value of ext4_try_to_trim_range(). However when we will want to abort trimming because of suspend attempt, we want to return success from ext4_try_to_trim_range() but not set the trimmed bit. Instead implementing awkward propagation of this information, just move setting of trimmed bit into ext4_try_to_trim_range() when the whole group is trimmed. Cc: stable@kernel.org Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20230913150504.9054-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c91db9f57524..09091adfde64 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6906,6 +6906,16 @@ __acquires(bitlock) return ret; } +static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, + ext4_group_t grp) +{ + if (grp < ext4_get_groups_count(sb)) + return EXT4_CLUSTERS_PER_GROUP(sb) - 1; + return (ext4_blocks_count(EXT4_SB(sb)->s_es) - + ext4_group_first_block_no(sb, grp) - 1) >> + EXT4_CLUSTER_BITS(sb); +} + static int ext4_try_to_trim_range(struct super_block *sb, struct ext4_buddy *e4b, ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) @@ -6913,9 +6923,12 @@ __acquires(ext4_group_lock_ptr(sb, e4b->bd_group)) __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) { ext4_grpblk_t next, count, free_count; + bool set_trimmed = false; void *bitmap; bitmap = e4b->bd_bitmap; + if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group)) + set_trimmed = true; start = max(e4b->bd_info->bb_first_free, start); count = 0; free_count = 0; @@ -6930,16 +6943,14 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) int ret = ext4_trim_extent(sb, start, next - start, e4b); if (ret && ret != -EOPNOTSUPP) - break; + return count; count += next - start; } free_count += next - start; start = next + 1; - if (fatal_signal_pending(current)) { - count = -ERESTARTSYS; - break; - } + if (fatal_signal_pending(current)) + return -ERESTARTSYS; if (need_resched()) { ext4_unlock_group(sb, e4b->bd_group); @@ -6951,6 +6962,9 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) break; } + if (set_trimmed) + EXT4_MB_GRP_SET_TRIMMED(e4b->bd_info); + return count; } @@ -6961,7 +6975,6 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) * @start: first group block to examine * @max: last group block to examine * @minblocks: minimum extent block count - * @set_trimmed: set the trimmed flag if at least one block is trimmed * * ext4_trim_all_free walks through group's block bitmap searching for free * extents. When the free extent is found, mark it as used in group buddy @@ -6971,7 +6984,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) static ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t max, - ext4_grpblk_t minblocks, bool set_trimmed) + ext4_grpblk_t minblocks) { struct ext4_buddy e4b; int ret; @@ -6988,13 +7001,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_lock_group(sb, group); if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || - minblocks < EXT4_SB(sb)->s_last_trim_minblks) { + minblocks < EXT4_SB(sb)->s_last_trim_minblks) ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); - if (ret >= 0 && set_trimmed) - EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); - } else { + else ret = 0; - } ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); @@ -7027,7 +7037,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) ext4_fsblk_t first_data_blk = le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es); - bool whole_group, eof = false; int ret = 0; start = range->start >> sb->s_blocksize_bits; @@ -7046,10 +7055,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) goto out; } - if (end >= max_blks - 1) { + if (end >= max_blks - 1) end = max_blks - 1; - eof = true; - } if (end <= first_data_blk) goto out; if (start < first_data_blk) @@ -7063,7 +7070,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) /* end now represents the last cluster to discard in this group */ end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; - whole_group = true; for (group = first_group; group <= last_group; group++) { grp = ext4_get_group_info(sb, group); @@ -7082,13 +7088,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) * change it for the last group, note that last_cluster is * already computed earlier by ext4_get_group_no_and_offset() */ - if (group == last_group) { + if (group == last_group) end = last_cluster; - whole_group = eof ? true : end == EXT4_CLUSTERS_PER_GROUP(sb) - 1; - } if (grp->bb_free >= minlen) { cnt = ext4_trim_all_free(sb, group, first_cluster, - end, minlen, whole_group); + end, minlen); if (cnt < 0) { ret = cnt; break; From 5229a658f6453362fbb9da6bf96872ef25a7097e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 13 Sep 2023 17:04:55 +0200 Subject: [PATCH 183/333] ext4: do not let fstrim block system suspend Len Brown has reported that system suspend sometimes fail due to inability to freeze a task working in ext4_trim_fs() for one minute. Trimming a large filesystem on a disk that slowly processes discard requests can indeed take a long time. Since discard is just an advisory call, it is perfectly fine to interrupt it at any time and the return number of discarded blocks until that moment. Do that when we detect the task is being frozen. Cc: stable@kernel.org Reported-by: Len Brown Suggested-by: Dave Chinner References: https://bugzilla.kernel.org/show_bug.cgi?id=216322 Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20230913150504.9054-2-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 09091adfde64..1e599305d85f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -16,6 +16,7 @@ #include #include #include +#include #include /* @@ -6916,6 +6917,11 @@ static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, EXT4_CLUSTER_BITS(sb); } +static bool ext4_trim_interrupted(void) +{ + return fatal_signal_pending(current) || freezing(current); +} + static int ext4_try_to_trim_range(struct super_block *sb, struct ext4_buddy *e4b, ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) @@ -6949,8 +6955,8 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) free_count += next - start; start = next + 1; - if (fatal_signal_pending(current)) - return -ERESTARTSYS; + if (ext4_trim_interrupted()) + return count; if (need_resched()) { ext4_unlock_group(sb, e4b->bd_group); @@ -7072,6 +7078,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; for (group = first_group; group <= last_group; group++) { + if (ext4_trim_interrupted()) + break; grp = ext4_get_group_info(sb, group); if (!grp) continue; From 7fda67e8c3ab6069f75888f67958a6d30454a9f6 Mon Sep 17 00:00:00 2001 From: Shida Zhang Date: Thu, 3 Aug 2023 14:09:38 +0800 Subject: [PATCH 184/333] ext4: fix rec_len verify error With the configuration PAGE_SIZE 64k and filesystem blocksize 64k, a problem occurred when more than 13 million files were directly created under a directory: EXT4-fs error (device xx): ext4_dx_csum_set:492: inode #xxxx: comm xxxxx: dir seems corrupt? Run e2fsck -D. EXT4-fs error (device xx): ext4_dx_csum_verify:463: inode #xxxx: comm xxxxx: dir seems corrupt? Run e2fsck -D. EXT4-fs error (device xx): dx_probe:856: inode #xxxx: block 8188: comm xxxxx: Directory index failed checksum When enough files are created, the fake_dirent->reclen will be 0xffff. it doesn't equal to the blocksize 65536, i.e. 0x10000. But it is not the same condition when blocksize equals to 4k. when enough files are created, the fake_dirent->reclen will be 0x1000. it equals to the blocksize 4k, i.e. 0x1000. The problem seems to be related to the limitation of the 16-bit field when the blocksize is set to 64k. To address this, helpers like ext4_rec_len_{from,to}_disk has already been introduced to complete the conversion between the encoded and the plain form of rec_len. So fix this one by using the helper, and all the other in this file too. Cc: stable@kernel.org Fixes: dbe89444042a ("ext4: Calculate and verify checksums for htree nodes") Suggested-by: Andreas Dilger Suggested-by: Darrick J. Wong Signed-off-by: Shida Zhang Reviewed-by: Andreas Dilger Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20230803060938.1929759-1-zhangshida@kylinos.cn Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index c0f0b4e2413b..c1ceccab05f5 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -343,17 +343,17 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, struct buffer_head *bh) { struct ext4_dir_entry_tail *t; + int blocksize = EXT4_BLOCK_SIZE(inode->i_sb); #ifdef PARANOID struct ext4_dir_entry *d, *top; d = (struct ext4_dir_entry *)bh->b_data; top = (struct ext4_dir_entry *)(bh->b_data + - (EXT4_BLOCK_SIZE(inode->i_sb) - - sizeof(struct ext4_dir_entry_tail))); - while (d < top && d->rec_len) + (blocksize - sizeof(struct ext4_dir_entry_tail))); + while (d < top && ext4_rec_len_from_disk(d->rec_len, blocksize)) d = (struct ext4_dir_entry *)(((void *)d) + - le16_to_cpu(d->rec_len)); + ext4_rec_len_from_disk(d->rec_len, blocksize)); if (d != top) return NULL; @@ -364,7 +364,8 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, #endif if (t->det_reserved_zero1 || - le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) || + (ext4_rec_len_from_disk(t->det_rec_len, blocksize) != + sizeof(struct ext4_dir_entry_tail)) || t->det_reserved_zero2 || t->det_reserved_ft != EXT4_FT_DIR_CSUM) return NULL; @@ -445,13 +446,14 @@ static struct dx_countlimit *get_dx_countlimit(struct inode *inode, struct ext4_dir_entry *dp; struct dx_root_info *root; int count_offset; + int blocksize = EXT4_BLOCK_SIZE(inode->i_sb); + unsigned int rlen = ext4_rec_len_from_disk(dirent->rec_len, blocksize); - if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb)) + if (rlen == blocksize) count_offset = 8; - else if (le16_to_cpu(dirent->rec_len) == 12) { + else if (rlen == 12) { dp = (struct ext4_dir_entry *)(((void *)dirent) + 12); - if (le16_to_cpu(dp->rec_len) != - EXT4_BLOCK_SIZE(inode->i_sb) - 12) + if (ext4_rec_len_from_disk(dp->rec_len, blocksize) != blocksize - 12) return NULL; root = (struct dx_root_info *)(((void *)dp + 12)); if (root->reserved_zero || @@ -1315,6 +1317,7 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh, unsigned int buflen = bh->b_size; char *base = bh->b_data; struct dx_hash_info h = *hinfo; + int blocksize = EXT4_BLOCK_SIZE(dir->i_sb); if (ext4_has_metadata_csum(dir->i_sb)) buflen -= sizeof(struct ext4_dir_entry_tail); @@ -1335,11 +1338,12 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh, map_tail--; map_tail->hash = h.hash; map_tail->offs = ((char *) de - base)>>2; - map_tail->size = le16_to_cpu(de->rec_len); + map_tail->size = ext4_rec_len_from_disk(de->rec_len, + blocksize); count++; cond_resched(); } - de = ext4_next_entry(de, dir->i_sb->s_blocksize); + de = ext4_next_entry(de, blocksize); } return count; } From c21a8027ad8a68c340d0d58bf1cc61dcb0bc4d2f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 14 Sep 2023 16:51:09 +0100 Subject: [PATCH 185/333] io_uring/net: fix iter retargeting for selected buf When using selected buffer feature, io_uring delays data iter setup until later. If io_setup_async_msg() is called before that it might see not correctly setup iterator. Pre-init nr_segs and judge from its state whether we repointing. Cc: stable@vger.kernel.org Reported-by: syzbot+a4c6e5ef999b68b26ed1@syzkaller.appspotmail.com Fixes: 0455d4ccec548 ("io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0000000000002770be06053c7757@google.com Signed-off-by: Jens Axboe --- io_uring/net.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/io_uring/net.c b/io_uring/net.c index 3d07bf79c1e0..7a8e298af81b 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -183,6 +183,10 @@ static int io_setup_async_msg(struct io_kiocb *req, memcpy(async_msg, kmsg, sizeof(*kmsg)); if (async_msg->msg.msg_name) async_msg->msg.msg_name = &async_msg->addr; + + if ((req->flags & REQ_F_BUFFER_SELECT) && !async_msg->msg.msg_iter.nr_segs) + return -EAGAIN; + /* if were using fast_iov, set it to the new one */ if (iter_is_iovec(&kmsg->msg.msg_iter) && !kmsg->free_iov) { size_t fast_idx = iter_iov(&kmsg->msg.msg_iter) - kmsg->fast_iov; @@ -542,6 +546,7 @@ static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { iomsg->msg.msg_name = &iomsg->addr; + iomsg->msg.msg_iter.nr_segs = 0; #ifdef CONFIG_COMPAT if (req->ctx->compat) From c8870379a21fbd9ad14ca36204ccfbe9d25def43 Mon Sep 17 00:00:00 2001 From: Mariusz Tkaczyk Date: Thu, 14 Sep 2023 17:24:16 +0200 Subject: [PATCH 186/333] md: Put the right device in md_seq_next If there are multiple arrays in system and one mddevice is marked with MD_DELETED and md_seq_next() is called in the middle of removal then it _get()s proper device but it may _put() deleted one. As a result, active counter may never be zeroed for mddevice and it cannot be removed. Put the device which has been _get with previous md_seq_next() call. Cc: stable@vger.kernel.org Fixes: 12a6caf27324 ("md: only delete entries from all_mddevs when the disk is freed") Reported-by: AceLan Kao Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217798 Cc: Yu Kuai Signed-off-by: Mariusz Tkaczyk Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230914152416.10819-1-mariusz.tkaczyk@linux.intel.com --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 73758b754127..a104a025084d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8265,7 +8265,7 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos) spin_unlock(&all_mddevs_lock); if (to_put) - mddev_put(mddev); + mddev_put(to_put); return next_mddev; } From c86e9ae5e3ad82969fe395414d1d9f173f8e9fd4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 10 Sep 2023 21:44:13 +0900 Subject: [PATCH 187/333] kbuild: fix kernel-devel RPM package and linux-headers Deb package Since commit fe66b5d2ae72 ("kbuild: refactor kernel-devel RPM package and linux-headers Deb package"), the kernel-devel RPM package and linux-headers Deb package are broken. I double-quoted the $(find ... -type d), which resulted in newlines being included in the argument to the outer find comment. find: 'arch/arm64/include\narch/arm64/kvm/hyp/include': No such file or directory The outer find command is unneeded. Fixes: fe66b5d2ae72 ("kbuild: refactor kernel-devel RPM package and linux-headers Deb package") Reported-by: Karolis M Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- scripts/package/install-extmod-build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index af7fe9f5b1e4..8a7051fad087 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -20,7 +20,7 @@ mkdir -p "${destdir}" find "arch/${SRCARCH}" -maxdepth 1 -name 'Makefile*' find include scripts -type f -o -type l find "arch/${SRCARCH}" -name Kbuild.platforms -o -name Platform - find "$(find "arch/${SRCARCH}" -name include -o -name scripts -type d)" -type f + find "arch/${SRCARCH}" -name include -o -name scripts -type d ) | tar -c -f - -C "${srctree}" -T - | tar -xf - -C "${destdir}" { From 552c5013f2bc648611395ea80df6250aa4fe28f6 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Mon, 11 Sep 2023 10:01:29 +0200 Subject: [PATCH 188/333] kbuild: avoid long argument lists in make modules_install Running "make modules_install" may fail with make[2]: execvp: /bin/sh: Argument list too long if many modules are built and INSTALL_MOD_PATH is long. This is because scripts/Makefile.modinst creates all directories with one mkdir command. Use $(foreach ...) instead to prevent an excessive argument list. Fixes: 2dfec887c0fd ("kbuild: reduce the number of mkdir calls during modules_install") Signed-off-by: Michal Kubecek Signed-off-by: Masahiro Yamada --- scripts/Makefile.modinst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index c59cc57286ba..346f5ec50682 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -113,7 +113,7 @@ quiet_cmd_sign := endif # Create necessary directories -$(shell mkdir -p $(sort $(dir $(install-y)))) +$(foreach dir, $(sort $(dir $(install-y))), $(shell mkdir -p $(dir))) $(dst)/%.ko: $(extmod_prefix)%.ko FORCE $(call cmd,install) From fb2c10245f201278804a6f28e196e95436059d6d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 14 Sep 2023 21:42:20 +0200 Subject: [PATCH 189/333] thermal: core: Fix disabled trip point check in handle_thermal_trip() Commit bc840ea5f9a9 ("thermal: core: Do not handle trip points with invalid temperature") added a check for invalid temperature to the disabled trip point check in handle_thermal_trip(), but that check was added at a point when the trip structure has not been initialized yet. This may cause handle_thermal_trip() to skip a valid trip point in some cases, so fix it by moving the check to a suitable place, after __thermal_zone_get_trip() has been called to populate the trip structure. Fixes: bc840ea5f9a9 ("thermal: core: Do not handle trip points with invalid temperature") Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 8717a3343512..58533ea75cd9 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -348,12 +348,14 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip_id) struct thermal_trip trip; /* Ignore disabled trip points */ - if (test_bit(trip_id, &tz->trips_disabled) || - trip.temperature == THERMAL_TEMP_INVALID) + if (test_bit(trip_id, &tz->trips_disabled)) return; __thermal_zone_get_trip(tz, trip_id, &trip); + if (trip.temperature == THERMAL_TEMP_INVALID) + return; + if (tz->last_temperature != THERMAL_TEMP_INVALID) { if (tz->last_temperature < trip.temperature && tz->temperature >= trip.temperature) From 6cc834ba62998c65c42d0c63499bdd35067151ec Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 12 Sep 2023 14:38:58 -0700 Subject: [PATCH 190/333] nvme: avoid bogus CRTO values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some devices are reporting controller ready mode support, but return 0 for CRTO. These devices require a much higher time to ready than that, so they are failing to initialize after the driver starter preferring that value over CAP.TO. The spec requires that CAP.TO match the appropritate CRTO value, or be set to 0xff if CRTO is larger than that. This means that CAP.TO can be used to validate if CRTO is reliable, and provides an appropriate fallback for setting the timeout value if not. Use whichever is larger. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217863 Reported-by: Cláudio Sampaio Reported-by: Felix Yan Tested-by: Felix Yan Based-on-a-patch-by: Felix Yan Cc: stable@vger.kernel.org Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 54 ++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 37b6fa746662..0685ed4f2dc4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2245,25 +2245,8 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) else ctrl->ctrl_config = NVME_CC_CSS_NVM; - if (ctrl->cap & NVME_CAP_CRMS_CRWMS) { - u32 crto; - - ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto); - if (ret) { - dev_err(ctrl->device, "Reading CRTO failed (%d)\n", - ret); - return ret; - } - - if (ctrl->cap & NVME_CAP_CRMS_CRIMS) { - ctrl->ctrl_config |= NVME_CC_CRIME; - timeout = NVME_CRTO_CRIMT(crto); - } else { - timeout = NVME_CRTO_CRWMT(crto); - } - } else { - timeout = NVME_CAP_TIMEOUT(ctrl->cap); - } + if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS) + ctrl->ctrl_config |= NVME_CC_CRIME; ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; @@ -2277,6 +2260,39 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) if (ret) return ret; + /* CAP value may change after initial CC write */ + ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap); + if (ret) + return ret; + + timeout = NVME_CAP_TIMEOUT(ctrl->cap); + if (ctrl->cap & NVME_CAP_CRMS_CRWMS) { + u32 crto, ready_timeout; + + ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto); + if (ret) { + dev_err(ctrl->device, "Reading CRTO failed (%d)\n", + ret); + return ret; + } + + /* + * CRTO should always be greater or equal to CAP.TO, but some + * devices are known to get this wrong. Use the larger of the + * two values. + */ + if (ctrl->ctrl_config & NVME_CC_CRIME) + ready_timeout = NVME_CRTO_CRIMT(crto); + else + ready_timeout = NVME_CRTO_CRWMT(crto); + + if (ready_timeout < timeout) + dev_warn_once(ctrl->device, "bad crto:%x cap:%llx\n", + crto, ctrl->cap); + else + timeout = ready_timeout; + } + ctrl->ctrl_config |= NVME_CC_ENABLE; ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); if (ret) From 357950361cbc6d54fb68ed878265c647384684ae Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 9 Sep 2023 13:08:31 +0100 Subject: [PATCH 191/333] btrfs: set last dir index to the current last index when opening dir When opening a directory for reading it, we set the last index where we stop iteration to the value in struct btrfs_inode::index_cnt. That value does not match the index of the most recently added directory entry but it's instead the index number that will be assigned the next directory entry. This means that if after the call to opendir(3) new directory entries are added, a readdir(3) call will return the first new directory entry. This is fine because POSIX says the following [1]: "If a file is removed from or added to the directory after the most recent call to opendir() or rewinddir(), whether a subsequent call to readdir() returns an entry for that file is unspecified." For example for the test script from commit 9b378f6ad48c ("btrfs: fix infinite directory reads"), where we have 2000 files in a directory, ext4 doesn't return any new directory entry after opendir(3), while xfs returns the first 13 new directory entries added after the opendir(3) call. If we move to a shorter example with an empty directory when opendir(3) is called, and 2 files added to the directory after the opendir(3) call, then readdir(3) on btrfs will return the first file, ext4 and xfs return the 2 files (but in a different order). A test program for this, reported by Ian Johnson, is the following: #include #include int main(void) { DIR *dir = opendir("test"); FILE *file; file = fopen("test/1", "w"); fwrite("1", 1, 1, file); fclose(file); file = fopen("test/2", "w"); fwrite("2", 1, 1, file); fclose(file); struct dirent *entry; while ((entry = readdir(dir))) { printf("%s\n", entry->d_name); } closedir(dir); return 0; } To make this less odd, change the behaviour to never return new entries that were added after the opendir(3) call. This is done by setting the last_index field of the struct btrfs_file_private attached to the directory's file handle with a value matching btrfs_inode::index_cnt minus 1, since that value always matches the index of the next new directory entry and not the index of the most recently added entry. [1] https://pubs.opengroup.org/onlinepubs/007904875/functions/readdir_r.html Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/ CC: stable@vger.kernel.org # 6.5+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 616fdcf40467..dee4fce6ab72 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5780,7 +5780,8 @@ static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) } } - *index = dir->index_cnt; + /* index_cnt is the index number of next new entry, so decrement it. */ + *index = dir->index_cnt - 1; return 0; } From e60aa5da14d01fed8411202dbe4adf6c44bd2a57 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 9 Sep 2023 13:08:32 +0100 Subject: [PATCH 192/333] btrfs: refresh dir last index during a rewinddir(3) call When opening a directory we find what's the index of its last entry and then store it in the directory's file handle private data (struct btrfs_file_private::last_index), so that in the case new directory entries are added to a directory after an opendir(3) call we don't end up in an infinite loop (see commit 9b378f6ad48c ("btrfs: fix infinite directory reads")) when calling readdir(3). However once rewinddir(3) is called, POSIX states [1] that any new directory entries added after the previous opendir(3) call, must be returned by subsequent calls to readdir(3): "The rewinddir() function shall reset the position of the directory stream to which dirp refers to the beginning of the directory. It shall also cause the directory stream to refer to the current state of the corresponding directory, as a call to opendir() would have done." We currently don't refresh the last_index field of the struct btrfs_file_private associated to the directory, so after a rewinddir(3) we are not returning any new entries added after the opendir(3) call. Fix this by finding the current last index of the directory when llseek is called against the directory. This can be reproduced by the following C program provided by Ian Johnson: #include #include int main(void) { DIR *dir = opendir("test"); FILE *file; file = fopen("test/1", "w"); fwrite("1", 1, 1, file); fclose(file); file = fopen("test/2", "w"); fwrite("2", 1, 1, file); fclose(file); rewinddir(dir); struct dirent *entry; while ((entry = readdir(dir))) { printf("%s\n", entry->d_name); } closedir(dir); return 0; } Reported-by: Ian Johnson Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/ Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads") CC: stable@vger.kernel.org # 6.5+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dee4fce6ab72..2961f1e1e778 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5818,6 +5818,19 @@ static int btrfs_opendir(struct inode *inode, struct file *file) return 0; } +static loff_t btrfs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct btrfs_file_private *private = file->private_data; + int ret; + + ret = btrfs_get_dir_last_index(BTRFS_I(file_inode(file)), + &private->last_index); + if (ret) + return ret; + + return generic_file_llseek(file, offset, whence); +} + struct dir_entry { u64 ino; u64 offset; @@ -10891,7 +10904,7 @@ static const struct inode_operations btrfs_dir_inode_operations = { }; static const struct file_operations btrfs_dir_file_operations = { - .llseek = generic_file_llseek, + .llseek = btrfs_dir_llseek, .read = generic_read_dir, .iterate_shared = btrfs_real_readdir, .open = btrfs_opendir, From 8e7f82deb0c0386a03b62e30082574347f8b57d5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 12 Sep 2023 11:45:39 +0100 Subject: [PATCH 193/333] btrfs: fix race between reading a directory and adding entries to it When opening a directory (opendir(3)) or rewinding it (rewinddir(3)), we are not holding the directory's inode locked, and this can result in later attempting to add two entries to the directory with the same index number, resulting in a transaction abort, with -EEXIST (-17), when inserting the second delayed dir index. This results in a trace like the following: Sep 11 22:34:59 myhostname kernel: BTRFS error (device dm-3): err add delayed dir index item(name: cockroach-stderr.log) into the insertion tree of the delayed node(root id: 5, inode id: 4539217, errno: -17) Sep 11 22:34:59 myhostname kernel: ------------[ cut here ]------------ Sep 11 22:34:59 myhostname kernel: kernel BUG at fs/btrfs/delayed-inode.c:1504! Sep 11 22:34:59 myhostname kernel: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI Sep 11 22:34:59 myhostname kernel: CPU: 0 PID: 7159 Comm: cockroach Not tainted 6.4.15-200.fc38.x86_64 #1 Sep 11 22:34:59 myhostname kernel: Hardware name: ASUS ESC500 G3/P9D WS, BIOS 2402 06/27/2018 Sep 11 22:34:59 myhostname kernel: RIP: 0010:btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: Code: eb dd 48 (...) Sep 11 22:34:59 myhostname kernel: RSP: 0000:ffffa9980e0fbb28 EFLAGS: 00010282 Sep 11 22:34:59 myhostname kernel: RAX: 0000000000000000 RBX: ffff8b10b8f4a3c0 RCX: 0000000000000000 Sep 11 22:34:59 myhostname kernel: RDX: 0000000000000000 RSI: ffff8b177ec21540 RDI: ffff8b177ec21540 Sep 11 22:34:59 myhostname kernel: RBP: ffff8b110cf80888 R08: 0000000000000000 R09: ffffa9980e0fb938 Sep 11 22:34:59 myhostname kernel: R10: 0000000000000003 R11: ffffffff86146508 R12: 0000000000000014 Sep 11 22:34:59 myhostname kernel: R13: ffff8b1131ae5b40 R14: ffff8b10b8f4a418 R15: 00000000ffffffef Sep 11 22:34:59 myhostname kernel: FS: 00007fb14a7fe6c0(0000) GS:ffff8b177ec00000(0000) knlGS:0000000000000000 Sep 11 22:34:59 myhostname kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Sep 11 22:34:59 myhostname kernel: CR2: 000000c00143d000 CR3: 00000001b3b4e002 CR4: 00000000001706f0 Sep 11 22:34:59 myhostname kernel: Call Trace: Sep 11 22:34:59 myhostname kernel: Sep 11 22:34:59 myhostname kernel: ? die+0x36/0x90 Sep 11 22:34:59 myhostname kernel: ? do_trap+0xda/0x100 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: ? do_error_trap+0x6a/0x90 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: ? exc_invalid_op+0x50/0x70 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: ? asm_exc_invalid_op+0x1a/0x20 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: btrfs_insert_dir_item+0x200/0x280 Sep 11 22:34:59 myhostname kernel: btrfs_add_link+0xab/0x4f0 Sep 11 22:34:59 myhostname kernel: ? ktime_get_real_ts64+0x47/0xe0 Sep 11 22:34:59 myhostname kernel: btrfs_create_new_inode+0x7cd/0xa80 Sep 11 22:34:59 myhostname kernel: btrfs_symlink+0x190/0x4d0 Sep 11 22:34:59 myhostname kernel: ? schedule+0x5e/0xd0 Sep 11 22:34:59 myhostname kernel: ? __d_lookup+0x7e/0xc0 Sep 11 22:34:59 myhostname kernel: vfs_symlink+0x148/0x1e0 Sep 11 22:34:59 myhostname kernel: do_symlinkat+0x130/0x140 Sep 11 22:34:59 myhostname kernel: __x64_sys_symlinkat+0x3d/0x50 Sep 11 22:34:59 myhostname kernel: do_syscall_64+0x5d/0x90 Sep 11 22:34:59 myhostname kernel: ? syscall_exit_to_user_mode+0x2b/0x40 Sep 11 22:34:59 myhostname kernel: ? do_syscall_64+0x6c/0x90 Sep 11 22:34:59 myhostname kernel: entry_SYSCALL_64_after_hwframe+0x72/0xdc The race leading to the problem happens like this: 1) Directory inode X is loaded into memory, its ->index_cnt field is initialized to (u64)-1 (at btrfs_alloc_inode()); 2) Task A is adding a new file to directory X, holding its vfs inode lock, and calls btrfs_set_inode_index() to get an index number for the entry. Because the inode's index_cnt field is set to (u64)-1 it calls btrfs_inode_delayed_dir_index_count() which fails because no dir index entries were added yet to the delayed inode and then it calls btrfs_set_inode_index_count(). This functions finds the last dir index key and then sets index_cnt to that index value + 1. It found that the last index key has an offset of 100. However before it assigns a value of 101 to index_cnt... 3) Task B calls opendir(3), ending up at btrfs_opendir(), where the VFS lock for inode X is not taken, so it calls btrfs_get_dir_last_index() and sees index_cnt still with a value of (u64)-1. Because of that it calls btrfs_inode_delayed_dir_index_count() which fails since no dir index entries were added to the delayed inode yet, and then it also calls btrfs_set_inode_index_count(). This also finds that the last index key has an offset of 100, and before it assigns the value 101 to the index_cnt field of inode X... 4) Task A assigns a value of 101 to index_cnt. And then the code flow goes to btrfs_set_inode_index() where it increments index_cnt from 101 to 102. Task A then creates a delayed dir index entry with a sequence number of 101 and adds it to the delayed inode; 5) Task B assigns 101 to the index_cnt field of inode X; 6) At some later point when someone tries to add a new entry to the directory, btrfs_set_inode_index() will return 101 again and shortly after an attempt to add another delayed dir index key with index number 101 will fail with -EEXIST resulting in a transaction abort. Fix this by locking the inode at btrfs_get_dir_last_index(), which is only only used when opening a directory or attempting to lseek on it. Reported-by: ken Link: https://lore.kernel.org/linux-btrfs/CAE6xmH+Lp=Q=E61bU+v9eWX8gYfLvu6jLYxjxjFpo3zHVPR0EQ@mail.gmail.com/ Reported-by: syzbot+d13490c82ad5353c779d@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/ Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads") CC: stable@vger.kernel.org # 6.5+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2961f1e1e778..5e1b15e69ed6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5769,21 +5769,24 @@ out: static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) { - if (dir->index_cnt == (u64)-1) { - int ret; + int ret = 0; + btrfs_inode_lock(dir, 0); + if (dir->index_cnt == (u64)-1) { ret = btrfs_inode_delayed_dir_index_count(dir); if (ret) { ret = btrfs_set_inode_index_count(dir); if (ret) - return ret; + goto out; } } /* index_cnt is the index number of next new entry, so decrement it. */ *index = dir->index_cnt - 1; +out: + btrfs_inode_unlock(dir, 0); - return 0; + return ret; } /* From c42d116ccb72b6a33728e2b4b76ab175197ffb07 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 28 Aug 2023 10:57:18 +0200 Subject: [PATCH 194/333] media: ivsc: Depend on VIDEO_DEV CONFIG_VIDEO_DEV is required by other selected symbols. Depend on it. Link: https://lore.kernel.org/linux-media/20230828085718.3912335-1-sakari.ailus@linux.intel.com Reported-by: Randy Dunlap Fixes: 29006e196a56 ("media: pci: intel: ivsc: Add CSI submodule") Signed-off-by: Sakari Ailus Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Hans Verkuil --- drivers/media/pci/intel/ivsc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/pci/intel/ivsc/Kconfig b/drivers/media/pci/intel/ivsc/Kconfig index 92c975e98cb1..212753450576 100644 --- a/drivers/media/pci/intel/ivsc/Kconfig +++ b/drivers/media/pci/intel/ivsc/Kconfig @@ -3,7 +3,7 @@ config INTEL_VSC tristate "Intel Visual Sensing Controller" - depends on INTEL_MEI && ACPI + depends on INTEL_MEI && ACPI && VIDEO_DEV select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API select V4L2_ASYNC From e784e78efba87571bcfaab09e8bd81a77c8feaa1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 30 Aug 2023 22:57:36 +0200 Subject: [PATCH 195/333] media: i2c: max9286: Remove an incorrect fwnode_handle_put() call The commit in Fixes has removed an fwnode_handle_put() call in the error handling path of max9286_v4l2_register(). Remove the same call from max9286_v4l2_unregister(). Fixes: 1029939b3782 ("media: v4l: async: Simplify async sub-device fwnode matching") Signed-off-by: Christophe JAILLET Reviewed-by: Laurent Pinchart Reviewed-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/i2c/max9286.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/i2c/max9286.c b/drivers/media/i2c/max9286.c index 20e7c7cf5eeb..be84ff1e2b17 100644 --- a/drivers/media/i2c/max9286.c +++ b/drivers/media/i2c/max9286.c @@ -1110,7 +1110,6 @@ err_async: static void max9286_v4l2_unregister(struct max9286_priv *priv) { - fwnode_handle_put(priv->sd.fwnode); v4l2_ctrl_handler_free(&priv->ctrls); v4l2_async_unregister_subdev(&priv->sd); max9286_v4l2_notifier_unregister(priv); From 5cb218ffc54f1865edbe0c2a5ac4e906753817fb Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 31 Aug 2023 16:57:45 +0300 Subject: [PATCH 196/333] media: i2c: imx219: Fix a typo referring to a wrong variable The imx219_init_cfg() function has stopped operating on the try format since commit 7e700847b1fe ("media: i2c: imx219: Switch from open to init_cfg"), but a comment in the function wasn't updated. Fix it. While at it, improve spelling in a second comment in the function. Fixes: 7e700847b1fe ("media: i2c: imx219: Switch from open to init_cfg") Signed-off-by: Laurent Pinchart Reviewed-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/i2c/imx219.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/imx219.c b/drivers/media/i2c/imx219.c index a1136fdfbed2..6f88e002c8d8 100644 --- a/drivers/media/i2c/imx219.c +++ b/drivers/media/i2c/imx219.c @@ -691,12 +691,12 @@ static int imx219_init_cfg(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *format; struct v4l2_rect *crop; - /* Initialize try_fmt */ + /* Initialize the format. */ format = v4l2_subdev_get_pad_format(sd, state, 0); imx219_update_pad_format(imx219, &supported_modes[0], format, MEDIA_BUS_FMT_SRGGB10_1X10); - /* Initialize crop rectangle. */ + /* Initialize the crop rectangle. */ crop = v4l2_subdev_get_pad_crop(sd, state, 0); crop->top = IMX219_PIXEL_ARRAY_TOP; crop->left = IMX219_PIXEL_ARRAY_LEFT; From bb2d01127f5d8e5034daa60a08e68f719ad71ec2 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 31 Aug 2023 16:57:46 +0300 Subject: [PATCH 197/333] media: i2c: imx219: Fix crop rectangle setting when changing format When moving the imx219 driver to the subdev active state, commit e8a5b1df000e ("media: i2c: imx219: Use subdev active state") used the pad crop rectangle stored in the subdev state to report the crop rectangle of the active mode. That crop rectangle was however not set in the state when setting the format, which resulted in reporting an incorrect crop rectangle to userspace. Fix it. Fixes: e8a5b1df000e ("media: i2c: imx219: Use subdev active state") Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/i2c/imx219.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/imx219.c b/drivers/media/i2c/imx219.c index 6f88e002c8d8..f19c828b6943 100644 --- a/drivers/media/i2c/imx219.c +++ b/drivers/media/i2c/imx219.c @@ -750,6 +750,7 @@ static int imx219_set_pad_format(struct v4l2_subdev *sd, const struct imx219_mode *mode; int exposure_max, exposure_def, hblank; struct v4l2_mbus_framefmt *format; + struct v4l2_rect *crop; mode = v4l2_find_nearest_size(supported_modes, ARRAY_SIZE(supported_modes), @@ -757,11 +758,16 @@ static int imx219_set_pad_format(struct v4l2_subdev *sd, fmt->format.width, fmt->format.height); imx219_update_pad_format(imx219, mode, &fmt->format, fmt->format.code); + format = v4l2_subdev_get_pad_format(sd, sd_state, 0); + crop = v4l2_subdev_get_pad_crop(sd, sd_state, 0); if (imx219->mode == mode && format->code == fmt->format.code) return 0; + *format = fmt->format; + *crop = mode->crop; + if (fmt->which == V4L2_SUBDEV_FORMAT_ACTIVE) { imx219->mode = mode; /* Update limits and set FPS to default */ @@ -788,8 +794,6 @@ static int imx219_set_pad_format(struct v4l2_subdev *sd, hblank); } - *format = fmt->format; - return 0; } From faece4ad72b06308101d7f9cacaf8dd6df4fdc1f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 31 Aug 2023 16:57:47 +0300 Subject: [PATCH 198/333] media: i2c: imx219: Perform a full mode set unconditionally The .set_fmt() handler tries to avoid updating the sensor configuration when the mode hasn't changed. It does so by comparing both the mode and the media bus code. While the latter correctly uses the media bus code stored in the subdev state, the former compares the mode being set with the active mode, regardless of whether .set_fmt() is called for the ACTIVE or TRY format. This can lead to .set_fmt() returning early when operating on TRY formats. This could be fixed by replacing the mode comparison with width and height comparisons, using the frame size stored in the subdev state. However, the optimization that avoids updates to the sensor configuration is not very useful, and is not commonly found in sensor drivers. To improve consistency across sensor drivers, it is better, in addition to being easier, to simply drop it. Do so. Fixes: e8a5b1df000e ("media: i2c: imx219: Use subdev active state") Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/i2c/imx219.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/media/i2c/imx219.c b/drivers/media/i2c/imx219.c index f19c828b6943..ec53abe2e84e 100644 --- a/drivers/media/i2c/imx219.c +++ b/drivers/media/i2c/imx219.c @@ -762,9 +762,6 @@ static int imx219_set_pad_format(struct v4l2_subdev *sd, format = v4l2_subdev_get_pad_format(sd, sd_state, 0); crop = v4l2_subdev_get_pad_crop(sd, sd_state, 0); - if (imx219->mode == mode && format->code == fmt->format.code) - return 0; - *format = fmt->format; *crop = mode->crop; From 12d21fc2ba88e3bb41167afb5c6c0e961f2ab0c9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 30 Aug 2023 22:34:51 +0200 Subject: [PATCH 199/333] media: i2c: rdacm21: Remove an incorrect fwnode_handle_put() call The commit in Fixes has removed an fwnode_handle_put() call in the error handling path of the probe. Remove the same call from the remove function. Fixes: 1029939b3782 ("media: v4l: async: Simplify async sub-device fwnode matching") Signed-off-by: Christophe JAILLET Reviewed-by: Jacopo Mondi Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/i2c/rdacm21.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/i2c/rdacm21.c b/drivers/media/i2c/rdacm21.c index a36a709243fd..3e22df36354f 100644 --- a/drivers/media/i2c/rdacm21.c +++ b/drivers/media/i2c/rdacm21.c @@ -608,7 +608,6 @@ static void rdacm21_remove(struct i2c_client *client) v4l2_async_unregister_subdev(&dev->sd); v4l2_ctrl_handler_free(&dev->ctrls); i2c_unregister_device(dev->isp); - fwnode_handle_put(dev->sd.fwnode); } static const struct of_device_id rdacm21_of_ids[] = { From 861ab817b5ebe5e34bfbf01943b86ded6bba97b3 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 11 Sep 2023 11:55:55 +0200 Subject: [PATCH 200/333] media: bt8xx: bttv_risc_packed(): remove field checks Do not turn on the vcr_hack based on the btv->field value. This was a change in the bttv vb2 conversion that caused green lines at the bottom of the picture in tvtime. It was originally added to the vb2 conversion based on faulty information that without this there would be glitches in the video. However, later tests suggest that this is a problem in the utilities used to test this since tvtime behaves fine. This patch reverts the bttv driver to the original pre-vb2 behavior w.r.t. vcr_hack. Fixes: b7ec3212a73a ("media: bttv: convert to vb2") Signed-off-by: Hans Verkuil --- drivers/media/pci/bt8xx/bttv-risc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/media/pci/bt8xx/bttv-risc.c b/drivers/media/pci/bt8xx/bttv-risc.c index 436baf6c8b08..241a696e374a 100644 --- a/drivers/media/pci/bt8xx/bttv-risc.c +++ b/drivers/media/pci/bt8xx/bttv-risc.c @@ -68,9 +68,7 @@ bttv_risc_packed(struct bttv *btv, struct btcx_riscmem *risc, sg = sglist; for (line = 0; line < store_lines; line++) { if ((line >= (store_lines - VCR_HACK_LINES)) && - (btv->opt_vcr_hack || - (V4L2_FIELD_HAS_BOTH(btv->field) || - btv->field == V4L2_FIELD_ALTERNATE))) + btv->opt_vcr_hack) continue; while (offset && offset >= sg_dma_len(sg)) { offset -= sg_dma_len(sg); From 41ebaa5e0eebea4c3bac96b72f9f8ae0d77c0bdb Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Thu, 20 Jul 2023 17:46:54 +0000 Subject: [PATCH 201/333] media: uvcvideo: Fix OOB read If the index provided by the user is bigger than the mask size, we might do an out of bound read. CC: stable@kernel.org Fixes: 40140eda661e ("media: uvcvideo: Implement mask for V4L2_CTRL_TYPE_MENU") Reported-by: Zubin Mithra Signed-off-by: Ricardo Ribalda Reviewed-by: Sergey Senozhatsky Reviewed-by: Laurent Pinchart Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- drivers/media/usb/uvc/uvc_ctrl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index 5e9d3da862dd..e59a463c2761 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -1402,6 +1402,9 @@ int uvc_query_v4l2_menu(struct uvc_video_chain *chain, query_menu->id = id; query_menu->index = index; + if (index >= BITS_PER_TYPE(mapping->menu_mask)) + return -EINVAL; + ret = mutex_lock_interruptible(&chain->ctrl_mutex); if (ret < 0) return -ERESTARTSYS; From 735de5caf79e06cc9fb96b1b4f4974674ae3e917 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 17 Aug 2023 12:41:32 +0200 Subject: [PATCH 202/333] media: vb2: frame_vector.c: replace WARN_ONCE with a comment The WARN_ONCE was issued also in cases that had nothing to do with VM_IO (e.g. if the start address was just a random value and uaccess fails with -EFAULT). There are no reports of WARN_ONCE being issued for actual VM_IO cases, so just drop it and instead add a note to the comment before the function. Signed-off-by: Hans Verkuil Reviewed-by: David Hildenbrand Reported-by: Yikebaer Aizezi --- drivers/media/common/videobuf2/frame_vector.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c index 0f430ddc1f67..fd87747be9b1 100644 --- a/drivers/media/common/videobuf2/frame_vector.c +++ b/drivers/media/common/videobuf2/frame_vector.c @@ -31,6 +31,10 @@ * different type underlying the specified range of virtual addresses. * When the function isn't able to map a single page, it returns error. * + * Note that get_vaddr_frames() cannot follow VM_IO mappings. It used + * to be able to do that, but that could (racily) return non-refcounted + * pfns. + * * This function takes care of grabbing mmap_lock as necessary. */ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, bool write, @@ -59,8 +63,6 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, bool write, if (likely(ret > 0)) return ret; - /* This used to (racily) return non-refcounted pfns. Let people know */ - WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping"); vec->nr_frames = 0; return ret ? ret : -EFAULT; } From 737dd811a3dbfd7edd4ad2ba5152e93d99074f83 Mon Sep 17 00:00:00 2001 From: Szuying Chen Date: Thu, 7 Sep 2023 16:17:10 +0800 Subject: [PATCH 203/333] ata: libahci: clear pending interrupt status When a CRC error occurs, the HBA asserts an interrupt to indicate an interface fatal error (PxIS.IFS). The ISR clears PxIE and PxIS, then does error recovery. If the adapter receives another SDB FIS with an error (PxIS.TFES) from the device before the start of the EH recovery process, the interrupt signaling the new SDB cannot be serviced as PxIE was cleared already. This in turn results in the HBA inability to issue any command during the error recovery process after setting PxCMD.ST to 1 because PxIS.TFES is still set. According to AHCI 1.3.1 specifications section 6.2.2, fatal errors notified by setting PxIS.HBFS, PxIS.HBDS, PxIS.IFS or PxIS.TFES will cause the HBA to enter the ERR:Fatal state. In this state, the HBA shall not issue any new commands. To avoid this situation, introduce the function ahci_port_clear_pending_irq() to clear pending interrupts before executing a COMRESET. This follows the AHCI 1.3.1 - section 6.2.2.2 specification. Signed-off-by: Szuying Chen Fixes: e0bfd149973d ("[PATCH] ahci: stop engine during hard reset") Cc: stable@vger.kernel.org Reviewed-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/libahci.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index e2bacedf28ef..f1263364fa97 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1256,6 +1256,26 @@ static ssize_t ahci_activity_show(struct ata_device *dev, char *buf) return sprintf(buf, "%d\n", emp->blink_policy); } +static void ahci_port_clear_pending_irq(struct ata_port *ap) +{ + struct ahci_host_priv *hpriv = ap->host->private_data; + void __iomem *port_mmio = ahci_port_base(ap); + u32 tmp; + + /* clear SError */ + tmp = readl(port_mmio + PORT_SCR_ERR); + dev_dbg(ap->host->dev, "PORT_SCR_ERR 0x%x\n", tmp); + writel(tmp, port_mmio + PORT_SCR_ERR); + + /* clear port IRQ */ + tmp = readl(port_mmio + PORT_IRQ_STAT); + dev_dbg(ap->host->dev, "PORT_IRQ_STAT 0x%x\n", tmp); + if (tmp) + writel(tmp, port_mmio + PORT_IRQ_STAT); + + writel(1 << ap->port_no, hpriv->mmio + HOST_IRQ_STAT); +} + static void ahci_port_init(struct device *dev, struct ata_port *ap, int port_no, void __iomem *mmio, void __iomem *port_mmio) @@ -1270,18 +1290,7 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap, if (rc) dev_warn(dev, "%s (%d)\n", emsg, rc); - /* clear SError */ - tmp = readl(port_mmio + PORT_SCR_ERR); - dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp); - writel(tmp, port_mmio + PORT_SCR_ERR); - - /* clear port IRQ */ - tmp = readl(port_mmio + PORT_IRQ_STAT); - dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp); - if (tmp) - writel(tmp, port_mmio + PORT_IRQ_STAT); - - writel(1 << port_no, mmio + HOST_IRQ_STAT); + ahci_port_clear_pending_irq(ap); /* mark esata ports */ tmp = readl(port_mmio + PORT_CMD); @@ -1603,6 +1612,8 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class, tf.status = ATA_BUSY; ata_tf_to_fis(&tf, 0, 0, d2h_fis); + ahci_port_clear_pending_irq(ap); + rc = sata_link_hardreset(link, timing, deadline, online, ahci_check_ready); From e3da4c401f2d088cf049769eb1e39c299867ee9d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 15 Sep 2023 11:33:12 +0900 Subject: [PATCH 204/333] ata: pata_parport: Fix code style issues Fix indentation and other code style issues in the comm.c file. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309150646.n3iBvbPj-lkp@intel.com/ Signed-off-by: Damien Le Moal --- drivers/ata/pata_parport/comm.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/ata/pata_parport/comm.c b/drivers/ata/pata_parport/comm.c index 4839becbbd56..94b8d352102e 100644 --- a/drivers/ata/pata_parport/comm.c +++ b/drivers/ata/pata_parport/comm.c @@ -37,7 +37,7 @@ static int comm_read_regr(struct pi_adapter *pi, int cont, int regr) { int l, h, r; - r = regr + cont_map[cont]; + r = regr + cont_map[cont]; switch (pi->mode) { case 0: @@ -90,7 +90,6 @@ static void comm_connect(struct pi_adapter *pi) } static void comm_disconnect(struct pi_adapter *pi) - { w2(0); w2(0); w2(0); w2(4); w0(pi->saved_r0); @@ -172,12 +171,12 @@ static void comm_write_block(struct pi_adapter *pi, char *buf, int count) w4l(swab16(((u16 *)buf)[2 * k]) | swab16(((u16 *)buf)[2 * k + 1]) << 16); break; - } + } } static void comm_log_adapter(struct pi_adapter *pi) - -{ char *mode_string[5] = { "4-bit", "8-bit", "EPP-8", "EPP-16", "EPP-32" }; +{ + char *mode_string[5] = { "4-bit", "8-bit", "EPP-8", "EPP-16", "EPP-32" }; dev_info(&pi->dev, "DataStor Commuter at 0x%x, mode %d (%s), delay %d\n", From ebc3d4e44a7e05457825e03d0560153687265523 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 15 Sep 2023 01:10:40 -0500 Subject: [PATCH 205/333] smb3: correct places where ENOTSUPP is used instead of preferred EOPNOTSUPP checkpatch flagged a few places with: WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP Also fixed minor typo Signed-off-by: Steve French --- fs/smb/client/inode.c | 2 +- fs/smb/client/smb2ops.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index de2dfbaae821..d7c302442c1e 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2680,7 +2680,7 @@ int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start, } cifsFileInfo_put(cfile); - return -ENOTSUPP; + return -EOPNOTSUPP; } int cifs_truncate_page(struct address_space *mapping, loff_t from) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index d9eda2e958b4..9aeecee6b91b 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -297,7 +297,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server, cifs_server_dbg(VFS, "request has less credits (%d) than required (%d)", credits->value, new_val); - return -ENOTSUPP; + return -EOPNOTSUPP; } spin_lock(&server->req_lock); @@ -1161,7 +1161,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, /* Use a fudge factor of 256 bytes in case we collide * with a different set_EAs command. */ - if(CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE - + if (CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE - MAX_SMB2_CLOSE_RESPONSE_SIZE - 256 < used_len + ea_name_len + ea_value_len + 1) { rc = -ENOSPC; @@ -4591,7 +4591,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, if (shdr->Command != SMB2_READ) { cifs_server_dbg(VFS, "only big read responses are supported\n"); - return -ENOTSUPP; + return -EOPNOTSUPP; } if (server->ops->is_session_expired && From f037fc9905ffa6fa19b89bfbc86946798cede071 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 12 Sep 2023 19:03:06 +0800 Subject: [PATCH 206/333] net: microchip: sparx5: Fix memory leak for vcap_api_rule_add_keyvalue_test() Inject fault while probing kunit-example-test.ko, the field which is allocated by kzalloc in vcap_rule_add_key() of vcap_rule_add_key_bit/u32/u128() is not freed, and it cause the memory leaks below. unreferenced object 0xffff0276c14b7240 (size 64): comm "kunit_try_catch", pid 284, jiffies 4294894220 (age 920.072s) hex dump (first 32 bytes): 28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff (] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180 [<00000000ff8002d3>] vcap_api_rule_add_keyvalue_test+0x100/0xba8 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c14b7280 (size 64): comm "kunit_try_catch", pid 284, jiffies 4294894221 (age 920.068s) hex dump (first 32 bytes): 28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff (] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180 [<00000000f5ac9dc7>] vcap_api_rule_add_keyvalue_test+0x168/0xba8 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c14b72c0 (size 64): comm "kunit_try_catch", pid 284, jiffies 4294894221 (age 920.068s) hex dump (first 32 bytes): 28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff (] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180 [<00000000c918ae7f>] vcap_api_rule_add_keyvalue_test+0x1d0/0xba8 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c14b7300 (size 64): comm "kunit_try_catch", pid 284, jiffies 4294894221 (age 920.084s) hex dump (first 32 bytes): 28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff (] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180 [<0000000003352814>] vcap_api_rule_add_keyvalue_test+0x240/0xba8 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c14b7340 (size 64): comm "kunit_try_catch", pid 284, jiffies 4294894221 (age 920.084s) hex dump (first 32 bytes): 28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff (] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180 [<000000001516f109>] vcap_api_rule_add_keyvalue_test+0x2cc/0xba8 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 Fixes: c956b9b318d9 ("net: microchip: sparx5: Adding KUNIT tests of key/action values in VCAP API") Signed-off-by: Jinjie Ruan Signed-off-by: David S. Miller --- .../net/ethernet/microchip/vcap/vcap_api_kunit.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index c07f25e791c7..2fb0b8cf2b0c 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -995,6 +995,16 @@ static void vcap_api_encode_rule_actionset_test(struct kunit *test) KUNIT_EXPECT_EQ(test, (u32)0x00000000, actwords[11]); } +static void vcap_free_ckf(struct vcap_rule *rule) +{ + struct vcap_client_keyfield *ckf, *next_ckf; + + list_for_each_entry_safe(ckf, next_ckf, &rule->keyfields, ctrl.list) { + list_del(&ckf->ctrl.list); + kfree(ckf); + } +} + static void vcap_api_rule_add_keyvalue_test(struct kunit *test) { struct vcap_admin admin = { @@ -1027,6 +1037,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, kf->ctrl.type); KUNIT_EXPECT_EQ(test, 0x0, kf->data.u1.value); KUNIT_EXPECT_EQ(test, 0x1, kf->data.u1.mask); + vcap_free_ckf(rule); INIT_LIST_HEAD(&rule->keyfields); ret = vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS, VCAP_BIT_1); @@ -1039,6 +1050,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, kf->ctrl.type); KUNIT_EXPECT_EQ(test, 0x1, kf->data.u1.value); KUNIT_EXPECT_EQ(test, 0x1, kf->data.u1.mask); + vcap_free_ckf(rule); INIT_LIST_HEAD(&rule->keyfields); ret = vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS, @@ -1052,6 +1064,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, kf->ctrl.type); KUNIT_EXPECT_EQ(test, 0x0, kf->data.u1.value); KUNIT_EXPECT_EQ(test, 0x0, kf->data.u1.mask); + vcap_free_ckf(rule); INIT_LIST_HEAD(&rule->keyfields); ret = vcap_rule_add_key_u32(rule, VCAP_KF_TYPE, 0x98765432, 0xff00ffab); @@ -1064,6 +1077,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_FIELD_U32, kf->ctrl.type); KUNIT_EXPECT_EQ(test, 0x98765432, kf->data.u32.value); KUNIT_EXPECT_EQ(test, 0xff00ffab, kf->data.u32.mask); + vcap_free_ckf(rule); INIT_LIST_HEAD(&rule->keyfields); ret = vcap_rule_add_key_u128(rule, VCAP_KF_L3_IP6_SIP, &dip); @@ -1078,6 +1092,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, dip.value[idx], kf->data.u128.value[idx]); for (idx = 0; idx < ARRAY_SIZE(dip.mask); ++idx) KUNIT_EXPECT_EQ(test, dip.mask[idx], kf->data.u128.mask[idx]); + vcap_free_ckf(rule); } static void vcap_api_rule_add_actionvalue_test(struct kunit *test) From 39d0ccc185315408e7cecfcaf06d167927b51052 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 12 Sep 2023 19:03:07 +0800 Subject: [PATCH 207/333] net: microchip: sparx5: Fix memory leak for vcap_api_rule_add_actionvalue_test() Inject fault while probing kunit-example-test.ko, the field which is allocated by kzalloc in vcap_rule_add_action() of vcap_rule_add_action_bit/u32() is not freed, and it cause the memory leaks below. unreferenced object 0xffff0276c496b300 (size 64): comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.072s) hex dump (first 32 bytes): 68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff h] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178 [<00000000ae66c16c>] vcap_api_rule_add_actionvalue_test+0xa4/0x990 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c496b2c0 (size 64): comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.072s) hex dump (first 32 bytes): 68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff h] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178 [<00000000607782aa>] vcap_api_rule_add_actionvalue_test+0x100/0x990 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c496b280 (size 64): comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.072s) hex dump (first 32 bytes): 68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff h] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178 [<000000004e640602>] vcap_api_rule_add_actionvalue_test+0x15c/0x990 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c496b240 (size 64): comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.092s) hex dump (first 32 bytes): 68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff h] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178 [<0000000011141bf8>] vcap_api_rule_add_actionvalue_test+0x1bc/0x990 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c496b200 (size 64): comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.092s) hex dump (first 32 bytes): 68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff h] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178 [<00000000d5ed3088>] vcap_api_rule_add_actionvalue_test+0x22c/0x990 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 Fixes: c956b9b318d9 ("net: microchip: sparx5: Adding KUNIT tests of key/action values in VCAP API") Signed-off-by: Jinjie Ruan Signed-off-by: David S. Miller --- .../net/ethernet/microchip/vcap/vcap_api_kunit.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index 2fb0b8cf2b0c..f268383a7570 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -1095,6 +1095,17 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test) vcap_free_ckf(rule); } +static void vcap_free_caf(struct vcap_rule *rule) +{ + struct vcap_client_actionfield *caf, *next_caf; + + list_for_each_entry_safe(caf, next_caf, + &rule->actionfields, ctrl.list) { + list_del(&caf->ctrl.list); + kfree(caf); + } +} + static void vcap_api_rule_add_actionvalue_test(struct kunit *test) { struct vcap_admin admin = { @@ -1120,6 +1131,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_AF_POLICE_ENA, af->ctrl.action); KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, af->ctrl.type); KUNIT_EXPECT_EQ(test, 0x0, af->data.u1.value); + vcap_free_caf(rule); INIT_LIST_HEAD(&rule->actionfields); ret = vcap_rule_add_action_bit(rule, VCAP_AF_POLICE_ENA, VCAP_BIT_1); @@ -1131,6 +1143,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_AF_POLICE_ENA, af->ctrl.action); KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, af->ctrl.type); KUNIT_EXPECT_EQ(test, 0x1, af->data.u1.value); + vcap_free_caf(rule); INIT_LIST_HEAD(&rule->actionfields); ret = vcap_rule_add_action_bit(rule, VCAP_AF_POLICE_ENA, VCAP_BIT_ANY); @@ -1142,6 +1155,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_AF_POLICE_ENA, af->ctrl.action); KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, af->ctrl.type); KUNIT_EXPECT_EQ(test, 0x0, af->data.u1.value); + vcap_free_caf(rule); INIT_LIST_HEAD(&rule->actionfields); ret = vcap_rule_add_action_u32(rule, VCAP_AF_TYPE, 0x98765432); @@ -1153,6 +1167,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_AF_TYPE, af->ctrl.action); KUNIT_EXPECT_EQ(test, VCAP_FIELD_U32, af->ctrl.type); KUNIT_EXPECT_EQ(test, 0x98765432, af->data.u32.value); + vcap_free_caf(rule); INIT_LIST_HEAD(&rule->actionfields); ret = vcap_rule_add_action_u32(rule, VCAP_AF_MASK_MODE, 0xaabbccdd); @@ -1164,6 +1179,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_AF_MASK_MODE, af->ctrl.action); KUNIT_EXPECT_EQ(test, VCAP_FIELD_U32, af->ctrl.type); KUNIT_EXPECT_EQ(test, 0xaabbccdd, af->data.u32.value); + vcap_free_caf(rule); } static void vcap_api_rule_find_keyset_basic_test(struct kunit *test) From 89e3af0277388f32d56915a6715c735e4afae5d6 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 12 Sep 2023 19:03:08 +0800 Subject: [PATCH 208/333] net: microchip: sparx5: Fix possible memory leak in vcap_api_encode_rule_test() Inject fault while probing kunit-example-test.ko, the duprule which is allocated in vcap_dup_rule() and the vcap enabled port which is allocated in vcap_enable() of vcap_enable_lookups in vcap_api_encode_rule_test() is not freed, and it cause the memory leaks below. Use vcap_enable_lookups() with false arg to free the vcap enabled port as other drivers do it. And use vcap_del_rule() to free the duprule. unreferenced object 0xffff677a0278bb00 (size 64): comm "kunit_try_catch", pid 388, jiffies 4294895987 (age 1101.840s) hex dump (first 32 bytes): 18 bd a5 82 00 80 ff ff 18 bd a5 82 00 80 ff ff ................ 40 fe c8 0e be c6 ff ff 00 00 00 00 00 00 00 00 @............... backtrace: [<000000007d53023a>] slab_post_alloc_hook+0xb8/0x368 [<0000000076e3f654>] __kmem_cache_alloc_node+0x174/0x290 [<0000000034d76721>] kmalloc_trace+0x40/0x164 [<00000000013380a5>] vcap_enable_lookups+0x1c8/0x70c [<00000000bbec496b>] vcap_api_encode_rule_test+0x2f8/0xb18 [<000000002c2bfb7b>] kunit_try_run_case+0x50/0xac [<00000000ff74642b>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<000000004af845ca>] kthread+0x124/0x130 [<0000000038a000ca>] ret_from_fork+0x10/0x20 unreferenced object 0xffff677a027803c0 (size 192): comm "kunit_try_catch", pid 388, jiffies 4294895988 (age 1101.836s) hex dump (first 32 bytes): 00 12 7a 00 05 00 00 00 0a 00 00 00 64 00 00 00 ..z.........d... 00 00 00 00 00 00 00 00 d8 03 78 02 7a 67 ff ff ..........x.zg.. backtrace: [<000000007d53023a>] slab_post_alloc_hook+0xb8/0x368 [<0000000076e3f654>] __kmem_cache_alloc_node+0x174/0x290 [<0000000034d76721>] kmalloc_trace+0x40/0x164 [<00000000c1010131>] vcap_dup_rule+0x34/0x14c [<00000000d43c54a4>] vcap_add_rule+0x29c/0x32c [<0000000073f1c26d>] vcap_api_encode_rule_test+0x304/0xb18 [<000000002c2bfb7b>] kunit_try_run_case+0x50/0xac [<00000000ff74642b>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<000000004af845ca>] kthread+0x124/0x130 [<0000000038a000ca>] ret_from_fork+0x10/0x20 Fixes: c956b9b318d9 ("net: microchip: sparx5: Adding KUNIT tests of key/action values in VCAP API") Signed-off-by: Jinjie Ruan Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index f268383a7570..8c61a5dbce55 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -1439,6 +1439,10 @@ static void vcap_api_encode_rule_test(struct kunit *test) ret = list_empty(&is2_admin.rules); KUNIT_EXPECT_EQ(test, false, ret); KUNIT_EXPECT_EQ(test, 0, ret); + + vcap_enable_lookups(&test_vctrl, &test_netdev, 0, 0, + rule->cookie, false); + vcap_free_rule(rule); /* Check that the rule has been freed: tricky to access since this @@ -1449,6 +1453,8 @@ static void vcap_api_encode_rule_test(struct kunit *test) KUNIT_EXPECT_EQ(test, true, ret); ret = list_empty(&rule->actionfields); KUNIT_EXPECT_EQ(test, true, ret); + + vcap_del_rule(&test_vctrl, &test_netdev, id); } static void vcap_api_set_rule_counter_test(struct kunit *test) From 20146fa73ab8db2ab9f4916bbaf4610646787a09 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 12 Sep 2023 19:03:09 +0800 Subject: [PATCH 209/333] net: microchip: sparx5: Fix possible memory leaks in test_vcap_xn_rule_creator() Inject fault while probing kunit-example-test.ko, the rule which is allocated by kzalloc in vcap_alloc_rule(), the field which is allocated by kzalloc in vcap_rule_add_action() and vcap_rule_add_key() is not freed, and it cause the memory leaks below. Use vcap_free_rule() to free them as other drivers do it. And since the return rule of test_vcap_xn_rule_creator() is not used, remove it and switch to void. unreferenced object 0xffff058383334240 (size 192): comm "kunit_try_catch", pid 309, jiffies 4294894222 (age 639.800s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 14 00 00 00 90 01 00 00 .'.............. 00 00 00 00 00 00 00 00 00 81 93 84 83 05 ff ff ................ backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328 [<00000000231b1097>] vcap_api_rule_insert_in_order_test+0xcc/0x184 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583849380c0 (size 64): comm "kunit_try_catch", pid 309, jiffies 4294894222 (age 639.800s) hex dump (first 32 bytes): 40 81 93 84 83 05 ff ff 68 42 33 83 83 05 ff ff @.......hB3..... 22 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 "............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178 [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328 [<00000000231b1097>] vcap_api_rule_insert_in_order_test+0xcc/0x184 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff058384938100 (size 64): comm "kunit_try_catch", pid 309, jiffies 4294894222 (age 639.800s) hex dump (first 32 bytes): 80 81 93 84 83 05 ff ff 58 42 33 83 83 05 ff ff ........XB3..... 7d 00 00 00 01 00 00 00 02 00 00 00 ff 00 00 00 }............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<0000000043c78991>] vcap_rule_add_key+0x104/0x180 [<00000000ba73cfbe>] vcap_add_type_keyfield+0xfc/0x128 [<000000002b00f7df>] vcap_val_rule+0x274/0x3e8 [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328 [<00000000231b1097>] vcap_api_rule_insert_in_order_test+0xcc/0x184 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583833b6240 (size 192): comm "kunit_try_catch", pid 311, jiffies 4294894225 (age 639.844s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 1e 00 00 00 2c 01 00 00 .'..........,... 00 00 00 00 00 00 00 00 40 91 8f 84 83 05 ff ff ........@....... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328 [<00000000509de3f4>] vcap_api_rule_insert_reverse_order_test+0x10c/0x654 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583848f9100 (size 64): comm "kunit_try_catch", pid 311, jiffies 4294894225 (age 639.844s) hex dump (first 32 bytes): 80 91 8f 84 83 05 ff ff 68 62 3b 83 83 05 ff ff ........hb;..... 22 00 00 00 01 00 00 00 00 00 00 00 a5 b4 ff ff "............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178 [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328 [<00000000509de3f4>] vcap_api_rule_insert_reverse_order_test+0x10c/0x654 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583848f9140 (size 64): comm "kunit_try_catch", pid 311, jiffies 4294894225 (age 639.844s) hex dump (first 32 bytes): c0 91 8f 84 83 05 ff ff 58 62 3b 83 83 05 ff ff ........Xb;..... 7d 00 00 00 01 00 00 00 02 00 00 00 ff 00 00 00 }............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<0000000043c78991>] vcap_rule_add_key+0x104/0x180 [<00000000ba73cfbe>] vcap_add_type_keyfield+0xfc/0x128 [<000000002b00f7df>] vcap_val_rule+0x274/0x3e8 [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328 [<00000000509de3f4>] vcap_api_rule_insert_reverse_order_test+0x10c/0x654 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff05838264e0c0 (size 192): comm "kunit_try_catch", pid 313, jiffies 4294894230 (age 639.864s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 0a 00 00 00 f4 01 00 00 .'.............. 00 00 00 00 00 00 00 00 40 3a 97 84 83 05 ff ff ........@:...... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328 [<00000000a29794d8>] vcap_api_rule_remove_at_end_test+0xbc/0xb48 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff058384973a80 (size 64): comm "kunit_try_catch", pid 313, jiffies 4294894230 (age 639.864s) hex dump (first 32 bytes): e8 e0 64 82 83 05 ff ff e8 e0 64 82 83 05 ff ff ..d.......d..... 22 00 00 00 01 00 00 00 00 00 00 00 00 80 ff ff "............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178 [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328 [<00000000a29794d8>] vcap_api_rule_remove_at_end_test+0xbc/0xb48 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff058384973a40 (size 64): comm "kunit_try_catch", pid 313, jiffies 4294894230 (age 639.880s) hex dump (first 32 bytes): 80 39 97 84 83 05 ff ff d8 e0 64 82 83 05 ff ff .9........d..... 7d 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 }............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<0000000043c78991>] vcap_rule_add_key+0x104/0x180 [<0000000094335477>] vcap_add_type_keyfield+0xbc/0x128 [<000000002b00f7df>] vcap_val_rule+0x274/0x3e8 [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328 [<00000000a29794d8>] vcap_api_rule_remove_at_end_test+0xbc/0xb48 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583832fa240 (size 192): comm "kunit_try_catch", pid 315, jiffies 4294894233 (age 639.920s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 14 00 00 00 90 01 00 00 .'.............. 00 00 00 00 00 00 00 00 00 a1 8b 84 83 05 ff ff ................ backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328 [<00000000be638a45>] vcap_api_rule_remove_in_middle_test+0xc4/0xb80 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583848ba0c0 (size 64): comm "kunit_try_catch", pid 315, jiffies 4294894233 (age 639.920s) hex dump (first 32 bytes): 40 a1 8b 84 83 05 ff ff 68 a2 2f 83 83 05 ff ff @.......h./..... 22 00 00 00 01 00 00 00 00 00 00 00 00 80 ff ff "............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178 [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328 [<00000000be638a45>] vcap_api_rule_remove_in_middle_test+0xc4/0xb80 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583848ba100 (size 64): comm "kunit_try_catch", pid 315, jiffies 4294894233 (age 639.920s) hex dump (first 32 bytes): 80 a1 8b 84 83 05 ff ff 58 a2 2f 83 83 05 ff ff ........X./..... 7d 00 00 00 01 00 00 00 02 00 00 00 ff 00 00 00 }............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<0000000043c78991>] vcap_rule_add_key+0x104/0x180 [<00000000ba73cfbe>] vcap_add_type_keyfield+0xfc/0x128 [<000000002b00f7df>] vcap_val_rule+0x274/0x3e8 [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328 [<00000000be638a45>] vcap_api_rule_remove_in_middle_test+0xc4/0xb80 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583827d2180 (size 192): comm "kunit_try_catch", pid 317, jiffies 4294894238 (age 639.956s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 14 00 00 00 90 01 00 00 .'.............. 00 00 00 00 00 00 00 00 00 e1 06 83 83 05 ff ff ................ backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328 [<00000000e1ed8350>] vcap_api_rule_remove_in_front_test+0x144/0x6c0 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff05838306e0c0 (size 64): comm "kunit_try_catch", pid 317, jiffies 4294894238 (age 639.956s) hex dump (first 32 bytes): 40 e1 06 83 83 05 ff ff a8 21 7d 82 83 05 ff ff @........!}..... 22 00 00 00 01 00 00 00 00 00 00 00 00 80 ff ff "............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178 [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328 [<00000000e1ed8350>] vcap_api_rule_remove_in_front_test+0x144/0x6c0 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff05838306e180 (size 64): comm "kunit_try_catch", pid 317, jiffies 4294894238 (age 639.968s) hex dump (first 32 bytes): 98 21 7d 82 83 05 ff ff 00 e1 06 83 83 05 ff ff .!}............. 67 00 00 00 00 00 00 00 01 01 00 00 ff 00 00 00 g............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<0000000043c78991>] vcap_rule_add_key+0x104/0x180 [<000000006ce4945d>] test_add_def_fields+0x84/0x8c [<00000000507e0ab6>] vcap_val_rule+0x294/0x3e8 [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328 [<00000000e1ed8350>] vcap_api_rule_remove_in_front_test+0x144/0x6c0 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 Fixes: dccc30cc4906 ("net: microchip: sparx5: Add KUNIT test of counters and sorted rules") Signed-off-by: Jinjie Ruan Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309090950.uOTEKQq3-lkp@intel.com/ Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index 8c61a5dbce55..99f04a53a442 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -243,10 +243,9 @@ static void vcap_test_api_init(struct vcap_admin *admin) } /* Helper function to create a rule of a specific size */ -static struct vcap_rule * -test_vcap_xn_rule_creator(struct kunit *test, int cid, enum vcap_user user, - u16 priority, - int id, int size, int expected_addr) +static void test_vcap_xn_rule_creator(struct kunit *test, int cid, + enum vcap_user user, u16 priority, + int id, int size, int expected_addr) { struct vcap_rule *rule; struct vcap_rule_internal *ri; @@ -311,7 +310,7 @@ test_vcap_xn_rule_creator(struct kunit *test, int cid, enum vcap_user user, ret = vcap_add_rule(rule); KUNIT_EXPECT_EQ(test, 0, ret); KUNIT_EXPECT_EQ(test, expected_addr, ri->addr); - return rule; + vcap_free_rule(rule); } /* Prepare testing rule deletion */ From 2a2dffd911d4139258b828b9c5056cb64b826758 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 12 Sep 2023 19:03:10 +0800 Subject: [PATCH 210/333] net: microchip: sparx5: Fix possible memory leaks in vcap_api_kunit Inject fault while probing kunit-example-test.ko, the duprule which is allocated by kzalloc in vcap_dup_rule() of test_vcap_xn_rule_creator() is not freed, and it cause the memory leaks below. Use vcap_del_rule() to free them as other functions do it. unreferenced object 0xffff6eb4846f6180 (size 192): comm "kunit_try_catch", pid 405, jiffies 4294895522 (age 880.004s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 0a 00 00 00 f4 01 00 00 .'.............. 00 00 00 00 00 00 00 00 98 61 6f 84 b4 6e ff ff .........ao..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<00000000d2ac4ccb>] vcap_api_rule_insert_in_order_test+0xa4/0x114 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4846f6240 (size 192): comm "kunit_try_catch", pid 405, jiffies 4294895524 (age 879.996s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 14 00 00 00 90 01 00 00 .'.............. 00 00 00 00 00 00 00 00 58 62 6f 84 b4 6e ff ff ........Xbo..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<0000000052e6ad35>] vcap_api_rule_insert_in_order_test+0xbc/0x114 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4846f6300 (size 192): comm "kunit_try_catch", pid 405, jiffies 4294895524 (age 879.996s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 1e 00 00 00 2c 01 00 00 .'..........,... 00 00 00 00 00 00 00 00 18 63 6f 84 b4 6e ff ff .........co..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<000000001b0895d4>] vcap_api_rule_insert_in_order_test+0xd4/0x114 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4846f63c0 (size 192): comm "kunit_try_catch", pid 405, jiffies 4294895524 (age 880.012s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 28 00 00 00 c8 00 00 00 .'......(....... 00 00 00 00 00 00 00 00 d8 63 6f 84 b4 6e ff ff .........co..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<00000000134c151f>] vcap_api_rule_insert_in_order_test+0xec/0x114 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4845fc180 (size 192): comm "kunit_try_catch", pid 407, jiffies 4294895527 (age 880.000s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 14 00 00 00 c8 00 00 00 .'.............. 00 00 00 00 00 00 00 00 98 c1 5f 84 b4 6e ff ff .........._..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<00000000fa5f64d3>] vcap_api_rule_insert_reverse_order_test+0xc8/0x600 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4845fc240 (size 192): comm "kunit_try_catch", pid 407, jiffies 4294895527 (age 880.000s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 1e 00 00 00 2c 01 00 00 .'..........,... 00 00 00 00 00 00 00 00 58 c2 5f 84 b4 6e ff ff ........X._..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000453dcd80>] vcap_add_rule+0x134/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<00000000a7db42de>] vcap_api_rule_insert_reverse_order_test+0x108/0x600 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4845fc300 (size 192): comm "kunit_try_catch", pid 407, jiffies 4294895527 (age 880.000s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 28 00 00 00 90 01 00 00 .'......(....... 00 00 00 00 00 00 00 00 18 c3 5f 84 b4 6e ff ff .........._..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000453dcd80>] vcap_add_rule+0x134/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<00000000ea416c94>] vcap_api_rule_insert_reverse_order_test+0x150/0x600 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4845fc3c0 (size 192): comm "kunit_try_catch", pid 407, jiffies 4294895527 (age 880.020s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 32 00 00 00 f4 01 00 00 .'......2....... 00 00 00 00 00 00 00 00 d8 c3 5f 84 b4 6e ff ff .........._..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000453dcd80>] vcap_add_rule+0x134/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<00000000764a39b4>] vcap_api_rule_insert_reverse_order_test+0x198/0x600 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb484cd4240 (size 192): comm "kunit_try_catch", pid 413, jiffies 4294895543 (age 879.956s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 1e 00 00 00 2c 01 00 00 .'..........,... 00 00 00 00 00 00 00 00 58 42 cd 84 b4 6e ff ff ........XB...n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<0000000023976dd4>] vcap_api_rule_remove_in_front_test+0x158/0x658 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb484cd4300 (size 192): comm "kunit_try_catch", pid 413, jiffies 4294895543 (age 879.956s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 28 00 00 00 c8 00 00 00 .'......(....... 00 00 00 00 00 00 00 00 18 43 cd 84 b4 6e ff ff .........C...n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<000000000b4760ff>] vcap_api_rule_remove_in_front_test+0x170/0x658 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 Fixes: dccc30cc4906 ("net: microchip: sparx5: Add KUNIT test of counters and sorted rules") Signed-off-by: Jinjie Ruan Signed-off-by: David S. Miller --- .../net/ethernet/microchip/vcap/vcap_api_kunit.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index 99f04a53a442..fe4e166de8a0 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -1597,6 +1597,11 @@ static void vcap_api_rule_insert_in_order_test(struct kunit *test) test_vcap_xn_rule_creator(test, 10000, VCAP_USER_QOS, 20, 400, 6, 774); test_vcap_xn_rule_creator(test, 10000, VCAP_USER_QOS, 30, 300, 3, 771); test_vcap_xn_rule_creator(test, 10000, VCAP_USER_QOS, 40, 200, 2, 768); + + vcap_del_rule(&test_vctrl, &test_netdev, 200); + vcap_del_rule(&test_vctrl, &test_netdev, 300); + vcap_del_rule(&test_vctrl, &test_netdev, 400); + vcap_del_rule(&test_vctrl, &test_netdev, 500); } static void vcap_api_rule_insert_reverse_order_test(struct kunit *test) @@ -1655,6 +1660,11 @@ static void vcap_api_rule_insert_reverse_order_test(struct kunit *test) ++idx; } KUNIT_EXPECT_EQ(test, 768, admin.last_used_addr); + + vcap_del_rule(&test_vctrl, &test_netdev, 500); + vcap_del_rule(&test_vctrl, &test_netdev, 400); + vcap_del_rule(&test_vctrl, &test_netdev, 300); + vcap_del_rule(&test_vctrl, &test_netdev, 200); } static void vcap_api_rule_remove_at_end_test(struct kunit *test) @@ -1855,6 +1865,9 @@ static void vcap_api_rule_remove_in_front_test(struct kunit *test) KUNIT_EXPECT_EQ(test, 786, test_init_start); KUNIT_EXPECT_EQ(test, 8, test_init_count); KUNIT_EXPECT_EQ(test, 794, admin.last_used_addr); + + vcap_del_rule(&test_vctrl, &test_netdev, 200); + vcap_del_rule(&test_vctrl, &test_netdev, 300); } static struct kunit_case vcap_api_rule_remove_test_cases[] = { From 2c75426c1fea591bb338ba072068f83d2f6be088 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 15 Sep 2023 01:37:33 -0500 Subject: [PATCH 211/333] smb3: fix some minor typos and repeated words Minor cleanup pointed out by checkpatch (repeated words, missing blank lines) in smb2pdu.c and old header location referred to in transport.c Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 6 ++++-- fs/smb/client/transport.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 3403188e3100..44d4943e9c56 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -89,6 +89,7 @@ smb2_hdr_assemble(struct smb2_hdr *shdr, __le16 smb2_cmd, struct TCP_Server_Info *server) { struct smb3_hdr_req *smb3_hdr; + shdr->ProtocolId = SMB2_PROTO_NUMBER; shdr->StructureSize = cpu_to_le16(64); shdr->Command = smb2_cmd; @@ -2239,7 +2240,7 @@ create_durable_v2_buf(struct cifs_open_parms *oparms) * (most servers default to 120 seconds) and most clients default to 0. * This can be overridden at mount ("handletimeout=") if the user wants * a different persistent (or resilient) handle timeout for all opens - * opens on a particular SMB3 mount. + * on a particular SMB3 mount. */ buf->dcontext.Timeout = cpu_to_le32(oparms->tcon->handle_timeout); buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT); @@ -2384,7 +2385,7 @@ add_twarp_context(struct kvec *iov, unsigned int *num_iovec, __u64 timewarp) return 0; } -/* See See http://technet.microsoft.com/en-us/library/hh509017(v=ws.10).aspx */ +/* See http://technet.microsoft.com/en-us/library/hh509017(v=ws.10).aspx */ static void setup_owner_group_sids(char *buf) { struct owner_group_sids *sids = (struct owner_group_sids *)buf; @@ -3129,6 +3130,7 @@ void SMB2_ioctl_free(struct smb_rqst *rqst) { int i; + if (rqst && rqst->rq_iov) { cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */ for (i = 1; i < rqst->rq_nvec; i++) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 1b5d9794ed5b..d52057a511ee 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include From c326ca98446e0ae4fee43a40acf79412b74cfedb Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 12 Sep 2023 16:16:25 +0200 Subject: [PATCH 212/333] selftests: tls: swap the TX and RX sockets in some tests tls.sendmsg_large and tls.sendmsg_multiple are trying to send through the self->cfd socket (only configured with TLS_RX) and to receive through the self->fd socket (only configured with TLS_TX), so they're not using kTLS at all. Swap the sockets. Fixes: 7f657d5bf507 ("selftests: tls: add selftests for TLS sockets") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- tools/testing/selftests/net/tls.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 297d972558fb..464853a7f982 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -613,11 +613,11 @@ TEST_F(tls, sendmsg_large) msg.msg_iov = &vec; msg.msg_iovlen = 1; - EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len); + EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len); } while (recvs++ < sends) { - EXPECT_NE(recv(self->fd, mem, send_len, 0), -1); + EXPECT_NE(recv(self->cfd, mem, send_len, 0), -1); } free(mem); @@ -646,9 +646,9 @@ TEST_F(tls, sendmsg_multiple) msg.msg_iov = vec; msg.msg_iovlen = iov_len; - EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len); + EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len); buf = malloc(total_len); - EXPECT_NE(recv(self->fd, buf, total_len, 0), -1); + EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1); for (i = 0; i < iov_len; i++) { EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp, strlen(test_strs[i])), From 057a28ef93bdbe84326d34cdb5543afdaab49fe1 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 7 Sep 2023 15:24:34 +0800 Subject: [PATCH 213/333] ALSA: hda: Disable power save for solving pop issue on Lenovo ThinkCentre M70q Lenovo ThinkCentre M70q had boot up pop noise. Disable power save will solve pop issue. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/315900e2efef42fd9855eacfeb443abd@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 765d95e79861..ca765ac4765f 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2211,6 +2211,7 @@ static const struct snd_pci_quirk power_save_denylist[] = { SND_PCI_QUIRK(0x8086, 0x2068, "Intel NUC7i3BNB", 0), /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */ SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0), + SND_PCI_QUIRK(0x17aa, 0x316e, "Lenovo ThinkCentre M70q", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1689623 */ SND_PCI_QUIRK(0x17aa, 0x367b, "Lenovo IdeaCentre B550", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */ From 21484e43b936c4f323d232c6a71c1f47a6af3278 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 14 Sep 2023 16:25:25 +0100 Subject: [PATCH 214/333] ALSA: hda: cs35l56: Fix missing RESET GPIO if _SUB is missing In cs35l56_hda_read_acpi() do not return if ACPI _SUB is missing. A missing _SUB means that the driver cannot load a system-specific firmware, because the firmware is identified by the _SUB. But it can fallback to a generic firmware. Unfortunately this was being handled by immediately returning 0, which would skip the remaining ACPI configuration in cs35l56_hda_read_acpi() and so it would not get the RESET GPIO. Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Link: https://lore.kernel.org/r/20230914152525.20829-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index bc75865b5de8..87ffe8fbff99 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -865,15 +865,13 @@ static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int id) sub = acpi_get_subsystem_id(ACPI_HANDLE(cs35l56->base.dev)); if (IS_ERR(sub)) { - /* If no ACPI SUB, return 0 and fallback to legacy firmware path, otherwise fail */ - if (PTR_ERR(sub) == -ENODATA) - return 0; - else - return PTR_ERR(sub); + dev_info(cs35l56->base.dev, + "Read ACPI _SUB failed(%ld): fallback to generic firmware\n", + PTR_ERR(sub)); + } else { + cs35l56->system_name = sub; } - cs35l56->system_name = sub; - cs35l56->base.reset_gpio = devm_gpiod_get_index_optional(cs35l56->base.dev, "reset", cs35l56->index, From cccd32816506cbac3a4c65d9dff51b3125ef1a03 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 15 Sep 2023 09:55:39 +0200 Subject: [PATCH 215/333] panic: Reenable preemption in WARN slowpath Commit: 5a5d7e9badd2 ("cpuidle: lib/bug: Disable rcu_is_watching() during WARN/BUG") amended warn_slowpath_fmt() to disable preemption until the WARN splat has been emitted. However the commit neglected to reenable preemption in the !fmt codepath, i.e. when a WARN splat is emitted without additional format string. One consequence is that users may see more splats than intended. E.g. a WARN splat emitted in a work item results in at least two extra splats: BUG: workqueue leaked lock or atomic (emitted by process_one_work()) BUG: scheduling while atomic (emitted by worker_thread() -> schedule()) Ironically the point of the commit was to *avoid* extra splats. ;) Fix it. Fixes: 5a5d7e9badd2 ("cpuidle: lib/bug: Disable rcu_is_watching() during WARN/BUG") Signed-off-by: Lukas Wunner Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Paul E. McKenney Link: https://lore.kernel.org/r/3ec48fde01e4ee6505f77908ba351bad200ae3d1.1694763684.git.lukas@wunner.de --- kernel/panic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/panic.c b/kernel/panic.c index 07239d4ad81e..ffa037fa777d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -697,6 +697,7 @@ void warn_slowpath_fmt(const char *file, int line, unsigned taint, if (!fmt) { __warn(file, line, __builtin_return_address(0), taint, NULL, NULL); + warn_rcu_exit(rcu); return; } From a8f367f7e131e76713b2949b168ac97f671fce7a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 12 Sep 2023 20:54:51 +0200 Subject: [PATCH 216/333] net: ti: icssg-prueth: add PTP dependency The driver can now use PTP if enabled but fails to link built-in if PTP is a loadable module: aarch64-linux-ld: drivers/net/ethernet/ti/icssg/icss_iep.o: in function `icss_iep_get_ptp_clock_idx': icss_iep.c:(.text+0x200): undefined reference to `ptp_clock_index' Add the usual dependency to avoid this. Fixes: 186734c158865 ("net: ti: icssg-prueth: add packet timestamping and ptp support") Signed-off-by: Arnd Bergmann Reviewed-by: MD Danish Anwar Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 88b5b1b47779..0a3346650e03 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -199,6 +199,7 @@ config TI_ICSSG_PRUETH config TI_ICSS_IEP tristate "TI PRU ICSS IEP driver" + depends on PTP_1588_CLOCK_OPTIONAL depends on TI_PRUSS default TI_PRUSS help From 3c70de9b580998e5d644f4e80a9944c30aa1197b Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 15 Sep 2023 18:33:59 +0900 Subject: [PATCH 217/333] Revert "firewire: core: obsolete usage of GFP_ATOMIC at building node tree" This reverts commit 06f45435d985d60d7d2fe2424fbb9909d177a63d. John Ogness reports the case that the allocation is in atomic context under acquired spin-lock. [ 12.555784] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:306 [ 12.555808] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 70, name: kworker/1:2 [ 12.555814] preempt_count: 1, expected: 0 [ 12.555820] INFO: lockdep is turned off. [ 12.555824] irq event stamp: 208 [ 12.555828] hardirqs last enabled at (207): [] ._raw_spin_unlock_irq+0x44/0x80 [ 12.555850] hardirqs last disabled at (208): [] .__schedule+0x854/0xfe0 [ 12.555859] softirqs last enabled at (188): [] .addrconf_verify_rtnl+0x2c4/0xb70 [ 12.555872] softirqs last disabled at (182): [] .addrconf_verify_rtnl+0x70/0xb70 [ 12.555884] CPU: 1 PID: 70 Comm: kworker/1:2 Tainted: G S 6.6.0-rc1 #1 [ 12.555893] Hardware name: PowerMac7,2 PPC970 0x390202 PowerMac [ 12.555898] Workqueue: firewire_ohci .bus_reset_work [firewire_ohci] [ 12.555939] Call Trace: [ 12.555944] [c000000009677830] [c0000000010d83c0] .dump_stack_lvl+0x8c/0xd0 (unreliable) [ 12.555963] [c0000000096778b0] [c000000000140270] .__might_resched+0x320/0x340 [ 12.555978] [c000000009677940] [c000000000497600] .__kmem_cache_alloc_node+0x390/0x460 [ 12.555993] [c000000009677a10] [c0000000003fe620] .__kmalloc+0x70/0x310 [ 12.556007] [c000000009677ac0] [c0003d00004e2268] .fw_core_handle_bus_reset+0x2c8/0xba0 [firewire_core] [ 12.556060] [c000000009677c20] [c0003d0000491190] .bus_reset_work+0x330/0x9b0 [firewire_ohci] [ 12.556079] [c000000009677d10] [c00000000011d0d0] .process_one_work+0x280/0x6f0 [ 12.556094] [c000000009677e10] [c00000000011d8a0] .worker_thread+0x360/0x500 [ 12.556107] [c000000009677ef0] [c00000000012e3b4] .kthread+0x154/0x160 [ 12.556120] [c000000009677f90] [c00000000000bfa8] .start_kernel_thread+0x10/0x14 Cc: stable@kernel.org Reported-by: John Ogness Link: https://lore.kernel.org/lkml/87jzsuv1xk.fsf@jogness.linutronix.de/raw Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-device.c | 2 +- drivers/firewire/core-topology.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index a3104e35412c..aa597cda0d88 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -1211,7 +1211,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) * without actually having a link. */ create: - device = kzalloc(sizeof(*device), GFP_KERNEL); + device = kzalloc(sizeof(*device), GFP_ATOMIC); if (device == NULL) break; diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c index 88466b663482..f40c81534381 100644 --- a/drivers/firewire/core-topology.c +++ b/drivers/firewire/core-topology.c @@ -101,7 +101,7 @@ static struct fw_node *fw_node_create(u32 sid, int port_count, int color) { struct fw_node *node; - node = kzalloc(struct_size(node, ports, port_count), GFP_KERNEL); + node = kzalloc(struct_size(node, ports, port_count), GFP_ATOMIC); if (node == NULL) return NULL; From 75ad80ed88a182ab2ad5513e448cf07b403af5c3 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Wed, 13 Sep 2023 09:39:05 +0300 Subject: [PATCH 218/333] net/core: Fix ETH_P_1588 flow dissector When a PTP ethernet raw frame with a size of more than 256 bytes followed by a 0xff pattern is sent to __skb_flow_dissect, nhoff value calculation is wrong. For example: hdr->message_length takes the wrong value (0xffff) and it does not replicate real header length. In this case, 'nhoff' value was overridden and the PTP header was badly dissected. This leads to a kernel crash. net/core: flow_dissector net/core flow dissector nhoff = 0x0000000e net/core flow dissector hdr->message_length = 0x0000ffff net/core flow dissector nhoff = 0x0001000d (u16 overflow) ... skb linear: 00000000: 00 a0 c9 00 00 00 00 a0 c9 00 00 00 88 skb frag: 00000000: f7 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff Using the size of the ptp_header struct will allow the corrected calculation of the nhoff value. net/core flow dissector nhoff = 0x0000000e net/core flow dissector nhoff = 0x00000030 (sizeof ptp_header) ... skb linear: 00000000: 00 a0 c9 00 00 00 00 a0 c9 00 00 00 88 f7 ff ff skb linear: 00000010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff skb linear: 00000020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff skb frag: 00000000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff Kernel trace: [ 74.984279] ------------[ cut here ]------------ [ 74.989471] kernel BUG at include/linux/skbuff.h:2440! [ 74.995237] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 75.001098] CPU: 4 PID: 0 Comm: swapper/4 Tainted: G U 5.15.85-intel-ese-standard-lts #1 [ 75.011629] Hardware name: Intel Corporation A-Island (CPU:AlderLake)/A-Island (ID:06), BIOS SB_ADLP.01.01.00.01.03.008.D-6A9D9E73-dirty Mar 30 2023 [ 75.026507] RIP: 0010:eth_type_trans+0xd0/0x130 [ 75.031594] Code: 03 88 47 78 eb c7 8b 47 68 2b 47 6c 48 8b 97 c0 00 00 00 83 f8 01 7e 1b 48 85 d2 74 06 66 83 3a ff 74 09 b8 00 04 00 00 eb ab <0f> 0b b8 00 01 00 00 eb a2 48 85 ff 74 eb 48 8d 54 24 06 31 f6 b9 [ 75.052612] RSP: 0018:ffff9948c0228de0 EFLAGS: 00010297 [ 75.058473] RAX: 00000000000003f2 RBX: ffff8e47047dc300 RCX: 0000000000001003 [ 75.066462] RDX: ffff8e4e8c9ea040 RSI: ffff8e4704e0a000 RDI: ffff8e47047dc300 [ 75.074458] RBP: ffff8e4704e2acc0 R08: 00000000000003f3 R09: 0000000000000800 [ 75.082466] R10: 000000000000000d R11: ffff9948c0228dec R12: ffff8e4715e4e010 [ 75.090461] R13: ffff9948c0545018 R14: 0000000000000001 R15: 0000000000000800 [ 75.098464] FS: 0000000000000000(0000) GS:ffff8e4e8fb00000(0000) knlGS:0000000000000000 [ 75.107530] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 75.113982] CR2: 00007f5eb35934a0 CR3: 0000000150e0a002 CR4: 0000000000770ee0 [ 75.121980] PKRU: 55555554 [ 75.125035] Call Trace: [ 75.127792] [ 75.130063] ? eth_get_headlen+0xa4/0xc0 [ 75.134472] igc_process_skb_fields+0xcd/0x150 [ 75.139461] igc_poll+0xc80/0x17b0 [ 75.143272] __napi_poll+0x27/0x170 [ 75.147192] net_rx_action+0x234/0x280 [ 75.151409] __do_softirq+0xef/0x2f4 [ 75.155424] irq_exit_rcu+0xc7/0x110 [ 75.159432] common_interrupt+0xb8/0xd0 [ 75.163748] [ 75.166112] [ 75.168473] asm_common_interrupt+0x22/0x40 [ 75.173175] RIP: 0010:cpuidle_enter_state+0xe2/0x350 [ 75.178749] Code: 85 c0 0f 8f 04 02 00 00 31 ff e8 39 6c 67 ff 45 84 ff 74 12 9c 58 f6 c4 02 0f 85 50 02 00 00 31 ff e8 52 b0 6d ff fb 45 85 f6 <0f> 88 b1 00 00 00 49 63 ce 4c 2b 2c 24 48 89 c8 48 6b d1 68 48 c1 [ 75.199757] RSP: 0018:ffff9948c013bea8 EFLAGS: 00000202 [ 75.205614] RAX: ffff8e4e8fb00000 RBX: ffffb948bfd23900 RCX: 000000000000001f [ 75.213619] RDX: 0000000000000004 RSI: ffffffff94206161 RDI: ffffffff94212e20 [ 75.221620] RBP: 0000000000000004 R08: 000000117568973a R09: 0000000000000001 [ 75.229622] R10: 000000000000afc8 R11: ffff8e4e8fb29ce4 R12: ffffffff945ae980 [ 75.237628] R13: 000000117568973a R14: 0000000000000004 R15: 0000000000000000 [ 75.245635] ? cpuidle_enter_state+0xc7/0x350 [ 75.250518] cpuidle_enter+0x29/0x40 [ 75.254539] do_idle+0x1d9/0x260 [ 75.258166] cpu_startup_entry+0x19/0x20 [ 75.262582] secondary_startup_64_no_verify+0xc2/0xcb [ 75.268259] [ 75.270721] Modules linked in: 8021q snd_sof_pci_intel_tgl snd_sof_intel_hda_common tpm_crb snd_soc_hdac_hda snd_sof_intel_hda snd_hda_ext_core snd_sof_pci snd_sof snd_sof_xtensa_dsp snd_soc_acpi_intel_match snd_soc_acpi snd_soc_core snd_compress iTCO_wdt ac97_bus intel_pmc_bxt mei_hdcp iTCO_vendor_support snd_hda_codec_hdmi pmt_telemetry intel_pmc_core pmt_class snd_hda_intel x86_pkg_temp_thermal snd_intel_dspcfg snd_hda_codec snd_hda_core kvm_intel snd_pcm snd_timer kvm snd mei_me soundcore tpm_tis irqbypass i2c_i801 mei tpm_tis_core pcspkr intel_rapl_msr tpm i2c_smbus intel_pmt thermal sch_fq_codel uio uhid i915 drm_buddy video drm_display_helper drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm fuse configfs [ 75.342736] ---[ end trace 3785f9f360400e3a ]--- [ 75.347913] RIP: 0010:eth_type_trans+0xd0/0x130 [ 75.352984] Code: 03 88 47 78 eb c7 8b 47 68 2b 47 6c 48 8b 97 c0 00 00 00 83 f8 01 7e 1b 48 85 d2 74 06 66 83 3a ff 74 09 b8 00 04 00 00 eb ab <0f> 0b b8 00 01 00 00 eb a2 48 85 ff 74 eb 48 8d 54 24 06 31 f6 b9 [ 75.373994] RSP: 0018:ffff9948c0228de0 EFLAGS: 00010297 [ 75.379860] RAX: 00000000000003f2 RBX: ffff8e47047dc300 RCX: 0000000000001003 [ 75.387856] RDX: ffff8e4e8c9ea040 RSI: ffff8e4704e0a000 RDI: ffff8e47047dc300 [ 75.395864] RBP: ffff8e4704e2acc0 R08: 00000000000003f3 R09: 0000000000000800 [ 75.403857] R10: 000000000000000d R11: ffff9948c0228dec R12: ffff8e4715e4e010 [ 75.411863] R13: ffff9948c0545018 R14: 0000000000000001 R15: 0000000000000800 [ 75.419875] FS: 0000000000000000(0000) GS:ffff8e4e8fb00000(0000) knlGS:0000000000000000 [ 75.428946] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 75.435403] CR2: 00007f5eb35934a0 CR3: 0000000150e0a002 CR4: 0000000000770ee0 [ 75.443410] PKRU: 55555554 [ 75.446477] Kernel panic - not syncing: Fatal exception in interrupt [ 75.453738] Kernel Offset: 0x11c00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 75.465794] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fixes: 4f1cc51f3488 ("net: flow_dissector: Parse PTP L2 packet header") Signed-off-by: Sasha Neftin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index b3b3af0e7844..272f09251343 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1446,7 +1446,7 @@ proto_again: break; } - nhoff += ntohs(hdr->message_length); + nhoff += sizeof(struct ptp_header); fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } From 0d42260867f9ff3e3a5bcfa8750fa06a658e0b1c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:50 +0200 Subject: [PATCH 219/333] ALSA: seq: ump: Fix -Wformat-truncation warning The filling of a port name string got a warning with W=1 due to the potentially too long group name. Add the string precision to limit the size. Fixes: 81fd444aa371 ("ALSA: seq: Bind UMP device") Link: https://lore.kernel.org/r/20230915082802.28684-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_ump_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/seq/seq_ump_client.c b/sound/core/seq/seq_ump_client.c index a60e3f069a80..2db371d79930 100644 --- a/sound/core/seq/seq_ump_client.c +++ b/sound/core/seq/seq_ump_client.c @@ -207,7 +207,7 @@ static void fill_port_info(struct snd_seq_port_info *port, SNDRV_SEQ_PORT_TYPE_PORT; port->midi_channels = 16; if (*group->name) - snprintf(port->name, sizeof(port->name), "Group %d (%s)", + snprintf(port->name, sizeof(port->name), "Group %d (%.53s)", group->group + 1, group->name); else sprintf(port->name, "Group %d", group->group + 1); From 9830c3851fd6f7ca977691632b786ba11a44be77 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:51 +0200 Subject: [PATCH 220/333] ALSA: seq: midi: Fix -Wformat-truncation warning The compile warnings with -Wformat-truncation appearing at snd_seq_midisynth_probe() in seq_midi.c are false-positive; those must fit within the given string size. For suppressing the warning, replace snprintf() with scnprintf(). As stated in the above, truncation doesn't matter. Link: https://lore.kernel.org/r/20230915082802.28684-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_midi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/seq_midi.c b/sound/core/seq/seq_midi.c index 44302d98950e..18320a248aa7 100644 --- a/sound/core/seq/seq_midi.c +++ b/sound/core/seq/seq_midi.c @@ -349,9 +349,9 @@ snd_seq_midisynth_probe(struct device *_dev) if (! port->name[0]) { if (info->name[0]) { if (ports > 1) - snprintf(port->name, sizeof(port->name), "%s-%u", info->name, p); + scnprintf(port->name, sizeof(port->name), "%s-%u", info->name, p); else - snprintf(port->name, sizeof(port->name), "%s", info->name); + scnprintf(port->name, sizeof(port->name), "%s", info->name); } else { /* last resort */ if (ports > 1) From 78bd8f5126f854872cc109cbac21c675d8539f3f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:52 +0200 Subject: [PATCH 221/333] ALSA: usb-audio: scarlett_gen2: Fix -Wformat-truncation warning The recent enablement of -Wformat-truncation leads to a false-positive warning for mixer_scarlett_gen2.c. For suppressing the warning, replace snprintf() with scnprintf(). As stated in the above, truncation doesn't matter. Link: https://lore.kernel.org/r/20230915082802.28684-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 9d11bb08667e..5c6f50f38840 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -3218,8 +3218,8 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer) int from = i * info->inputs_per_phantom + 1; int to = (i + 1) * info->inputs_per_phantom; - snprintf(s, sizeof(s), fmt2, from, to, - "Phantom Power", "Switch"); + scnprintf(s, sizeof(s), fmt2, from, to, + "Phantom Power", "Switch"); err = scarlett2_add_new_ctl( mixer, &scarlett2_phantom_ctl, i, 1, s, &private->phantom_ctls[i]); From e9dde5a98288d05313bea24466f76484b8d324bb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:53 +0200 Subject: [PATCH 222/333] ALSA: caiaq: Fix -Wformat-truncation warning The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915082802.28684-5-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/caiaq/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index 49f63f878e6f..b5cbf1f195c4 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -485,7 +485,7 @@ static int init_card(struct snd_usb_caiaqdev *cdev) } usb_make_path(usb_dev, usbpath, sizeof(usbpath)); - snprintf(card->longname, sizeof(card->longname), "%s %s (%s)", + scnprintf(card->longname, sizeof(card->longname), "%s %s (%s)", cdev->vendor_name, cdev->product_name, usbpath); setup_card(cdev); From 2a471452599a2e94fcc53ee2b7eac87fccd4ba04 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:54 +0200 Subject: [PATCH 223/333] ALSA: sscape: Fix -Wformat-truncation warning The warning with -Wformat-truncation at sscape_upload_microcode() is false-positive; the version number can be only a single digit, hence fitting with the given string size. For suppressing the warning, replace snprintf() with scnprintf(). As stated in the above, truncation doesn't matter. Link: https://lore.kernel.org/r/20230915082802.28684-6-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/sscape.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/isa/sscape.c b/sound/isa/sscape.c index 0bc0025f7c19..cc56fafd27b1 100644 --- a/sound/isa/sscape.c +++ b/sound/isa/sscape.c @@ -557,7 +557,7 @@ static int sscape_upload_microcode(struct snd_card *card, int version) char name[14]; int err; - snprintf(name, sizeof(name), "sndscape.co%d", version); + scnprintf(name, sizeof(name), "sndscape.co%d", version); err = request_firmware(&init_fw, name, card->dev); if (err < 0) { From 399245d3046d7182a9ae97cb0671fc9ed8a579a6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:55 +0200 Subject: [PATCH 224/333] ALSA: cs4236: Fix -Wformat-truncation warning The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915082802.28684-7-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/cs423x/cs4236.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index 10112e1bb25d..7226cbf2d7de 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -367,14 +367,14 @@ static int snd_cs423x_probe(struct snd_card *card, int dev) strscpy(card->driver, chip->pcm->name, sizeof(card->driver)); strscpy(card->shortname, chip->pcm->name, sizeof(card->shortname)); if (dma2[dev] < 0) - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %i, dma %i", - chip->pcm->name, chip->port, irq[dev], dma1[dev]); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %i, dma %i", + chip->pcm->name, chip->port, irq[dev], dma1[dev]); else - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %i, dma %i&%d", - chip->pcm->name, chip->port, irq[dev], dma1[dev], - dma2[dev]); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %i, dma %i&%d", + chip->pcm->name, chip->port, irq[dev], dma1[dev], + dma2[dev]); err = snd_wss_timer(chip, 0); if (err < 0) From 1e97acf3a6609c1329e23c2bf1e703c012bec848 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:56 +0200 Subject: [PATCH 225/333] ALSA: es1688: Fix -Wformat-truncation warning The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915082802.28684-8-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/es1688/es1688.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/isa/es1688/es1688.c b/sound/isa/es1688/es1688.c index f935b56eeec7..97728bf45474 100644 --- a/sound/isa/es1688/es1688.c +++ b/sound/isa/es1688/es1688.c @@ -130,9 +130,9 @@ static int snd_es1688_probe(struct snd_card *card, unsigned int n) strscpy(card->driver, "ES1688", sizeof(card->driver)); strscpy(card->shortname, chip->pcm->name, sizeof(card->shortname)); - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %i, dma %i", chip->pcm->name, chip->port, - chip->irq, chip->dma8); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %i, dma %i", chip->pcm->name, chip->port, + chip->irq, chip->dma8); if (fm_port[n] == SNDRV_AUTO_PORT) fm_port[n] = port[n]; /* share the same port */ From bc44e10abb90ac65a7b5337cb3323ec1ce79fb9a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:57 +0200 Subject: [PATCH 226/333] ALSA: opti9x: Fix -Wformat-truncation warning The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915082802.28684-9-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/opti9xx/miro.c | 8 ++++---- sound/isa/opti9xx/opti92x-ad1848.c | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c index 59242baed576..59792f2fada1 100644 --- a/sound/isa/opti9xx/miro.c +++ b/sound/isa/opti9xx/miro.c @@ -1344,10 +1344,10 @@ static int snd_miro_probe(struct snd_card *card) } strcpy(card->driver, "miro"); - snprintf(card->longname, sizeof(card->longname), - "%s: OPTi%s, %s at 0x%lx, irq %d, dma %d&%d", - card->shortname, miro->name, codec->pcm->name, - miro->wss_base + 4, miro->irq, miro->dma1, miro->dma2); + scnprintf(card->longname, sizeof(card->longname), + "%s: OPTi%s, %s at 0x%lx, irq %d, dma %d&%d", + card->shortname, miro->name, codec->pcm->name, + miro->wss_base + 4, miro->irq, miro->dma1, miro->dma2); if (mpu_port <= 0 || mpu_port == SNDRV_AUTO_PORT) rmidi = NULL; diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c index 4beeb32fe2a7..c33f67dd5133 100644 --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -859,15 +859,15 @@ static int snd_opti9xx_probe(struct snd_card *card) strcpy(card->driver, chip->name); sprintf(card->shortname, "OPTi %s", card->driver); #if defined(CS4231) || defined(OPTi93X) - snprintf(card->longname, sizeof(card->longname), - "%s, %s at 0x%lx, irq %d, dma %d&%d", - card->shortname, codec->pcm->name, - chip->wss_base + 4, irq, dma1, xdma2); + scnprintf(card->longname, sizeof(card->longname), + "%s, %s at 0x%lx, irq %d, dma %d&%d", + card->shortname, codec->pcm->name, + chip->wss_base + 4, irq, dma1, xdma2); #else - snprintf(card->longname, sizeof(card->longname), - "%s, %s at 0x%lx, irq %d, dma %d", - card->shortname, codec->pcm->name, chip->wss_base + 4, irq, - dma1); + scnprintf(card->longname, sizeof(card->longname), + "%s, %s at 0x%lx, irq %d, dma %d", + card->shortname, codec->pcm->name, chip->wss_base + 4, irq, + dma1); #endif /* CS4231 || OPTi93X */ if (mpu_port <= 0 || mpu_port == SNDRV_AUTO_PORT) From 7272b8bfba35b2333b33d77f73ce75ee161880c2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:58 +0200 Subject: [PATCH 227/333] ALSA: xen: Fix -Wformat-truncation warning The compile warning with -Wformat-truncation at xen_snd_front_cfg_card() is false-positive; the loop can be only for SNDRV_PCM_DEVICES which is at most 32. For suppressing the warning, replace snprintf() with scnprintf(). As stated in the above, truncation doesn't matter. Link: https://lore.kernel.org/r/20230915082802.28684-10-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/xen/xen_snd_front_cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/xen/xen_snd_front_cfg.c b/sound/xen/xen_snd_front_cfg.c index 63b0398c3276..55ecf766ca67 100644 --- a/sound/xen/xen_snd_front_cfg.c +++ b/sound/xen/xen_snd_front_cfg.c @@ -483,7 +483,7 @@ int xen_snd_front_cfg_card(struct xen_snd_front_info *front_info, *stream_cnt = 0; num_devices = 0; do { - snprintf(node, sizeof(node), "%d", num_devices); + scnprintf(node, sizeof(node), "%d", num_devices); if (!xenbus_exists(XBT_NIL, xb_dev->nodename, node)) break; From 641e969114c781ff269e1bf1b1f8d3cc33bc4a1a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:59 +0200 Subject: [PATCH 228/333] ALSA: firewire: Fix -Wformat-truncation warning for longname string The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915082802.28684-11-tiwai@suse.de Reviewed-by: Takashi Sakamoto Tested-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/fireworks/fireworks.c | 10 +++++----- sound/firewire/oxfw/oxfw.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sound/firewire/fireworks/fireworks.c b/sound/firewire/fireworks/fireworks.c index dd4298876ac0..e3ed4e094ccd 100644 --- a/sound/firewire/fireworks/fireworks.c +++ b/sound/firewire/fireworks/fireworks.c @@ -93,11 +93,11 @@ get_hardware_info(struct snd_efw *efw) strcpy(efw->card->driver, "Fireworks"); strcpy(efw->card->shortname, hwinfo->model_name); strcpy(efw->card->mixername, hwinfo->model_name); - snprintf(efw->card->longname, sizeof(efw->card->longname), - "%s %s v%s, GUID %08x%08x at %s, S%d", - hwinfo->vendor_name, hwinfo->model_name, version, - hwinfo->guid_hi, hwinfo->guid_lo, - dev_name(&efw->unit->device), 100 << fw_dev->max_speed); + scnprintf(efw->card->longname, sizeof(efw->card->longname), + "%s %s v%s, GUID %08x%08x at %s, S%d", + hwinfo->vendor_name, hwinfo->model_name, version, + hwinfo->guid_hi, hwinfo->guid_lo, + dev_name(&efw->unit->device), 100 << fw_dev->max_speed); if (hwinfo->flags & BIT(FLAG_RESP_ADDR_CHANGABLE)) efw->resp_addr_changable = true; diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c index 63d40f1a914f..241a697ce26b 100644 --- a/sound/firewire/oxfw/oxfw.c +++ b/sound/firewire/oxfw/oxfw.c @@ -108,11 +108,11 @@ static int name_card(struct snd_oxfw *oxfw, const struct ieee1394_device_id *ent strcpy(oxfw->card->mixername, m); strcpy(oxfw->card->shortname, m); - snprintf(oxfw->card->longname, sizeof(oxfw->card->longname), - "%s %s (OXFW%x %04x), GUID %08x%08x at %s, S%d", - v, m, firmware >> 20, firmware & 0xffff, - fw_dev->config_rom[3], fw_dev->config_rom[4], - dev_name(&oxfw->unit->device), 100 << fw_dev->max_speed); + scnprintf(oxfw->card->longname, sizeof(oxfw->card->longname), + "%s %s (OXFW%x %04x), GUID %08x%08x at %s, S%d", + v, m, firmware >> 20, firmware & 0xffff, + fw_dev->config_rom[3], fw_dev->config_rom[4], + dev_name(&oxfw->unit->device), 100 << fw_dev->max_speed); end: return err; } From ea77850e98410987525eb392c229949c87779835 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:28:00 +0200 Subject: [PATCH 229/333] ALSA: firewire: Fix -Wformat-truncation warning for MIDI stream names The compile warnings at filling MIDI stream name strings are all false-positive; the number of streams can't go so high. For suppressing the warning, replace snprintf() with scnprintf(). As stated in the above, truncation doesn't matter. Link: https://lore.kernel.org/r/20230915082802.28684-12-tiwai@suse.de Reviewed-by: Takashi Sakamoto Tested-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/bebob/bebob_midi.c | 6 +++--- sound/firewire/dice/dice-midi.c | 4 ++-- sound/firewire/digi00x/digi00x-midi.c | 14 +++++++------- sound/firewire/fireface/ff-midi.c | 4 ++-- sound/firewire/fireworks/fireworks_midi.c | 4 ++-- sound/firewire/motu/motu-midi.c | 4 ++-- sound/firewire/oxfw/oxfw-midi.c | 6 +++--- sound/firewire/tascam/tascam-midi.c | 12 ++++++------ 8 files changed, 27 insertions(+), 27 deletions(-) diff --git a/sound/firewire/bebob/bebob_midi.c b/sound/firewire/bebob/bebob_midi.c index 6f597d03e7c1..b1425bf98c3b 100644 --- a/sound/firewire/bebob/bebob_midi.c +++ b/sound/firewire/bebob/bebob_midi.c @@ -84,9 +84,9 @@ static void set_midi_substream_names(struct snd_bebob *bebob, struct snd_rawmidi_substream *subs; list_for_each_entry(subs, &str->substreams, list) { - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", - bebob->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", + bebob->card->shortname, subs->number + 1); } } diff --git a/sound/firewire/dice/dice-midi.c b/sound/firewire/dice/dice-midi.c index 4c2998034313..78988e44b8bc 100644 --- a/sound/firewire/dice/dice-midi.c +++ b/sound/firewire/dice/dice-midi.c @@ -88,8 +88,8 @@ static void set_midi_substream_names(struct snd_dice *dice, struct snd_rawmidi_substream *subs; list_for_each_entry(subs, &str->substreams, list) { - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", dice->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", dice->card->shortname, subs->number + 1); } } diff --git a/sound/firewire/digi00x/digi00x-midi.c b/sound/firewire/digi00x/digi00x-midi.c index 68eb8c39afa6..8f4bace16050 100644 --- a/sound/firewire/digi00x/digi00x-midi.c +++ b/sound/firewire/digi00x/digi00x-midi.c @@ -100,14 +100,14 @@ static void set_substream_names(struct snd_dg00x *dg00x, list_for_each_entry(subs, &str->substreams, list) { if (!is_console) { - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", - dg00x->card->shortname, - subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", + dg00x->card->shortname, + subs->number + 1); } else { - snprintf(subs->name, sizeof(subs->name), - "%s control", - dg00x->card->shortname); + scnprintf(subs->name, sizeof(subs->name), + "%s control", + dg00x->card->shortname); } } } diff --git a/sound/firewire/fireface/ff-midi.c b/sound/firewire/fireface/ff-midi.c index 25821d186b87..da3054fdcc7d 100644 --- a/sound/firewire/fireface/ff-midi.c +++ b/sound/firewire/fireface/ff-midi.c @@ -79,8 +79,8 @@ static void set_midi_substream_names(struct snd_rawmidi_str *stream, struct snd_rawmidi_substream *substream; list_for_each_entry(substream, &stream->substreams, list) { - snprintf(substream->name, sizeof(substream->name), - "%s MIDI %d", name, substream->number + 1); + scnprintf(substream->name, sizeof(substream->name), + "%s MIDI %d", name, substream->number + 1); } } diff --git a/sound/firewire/fireworks/fireworks_midi.c b/sound/firewire/fireworks/fireworks_midi.c index 84621e356848..350bf4d299c2 100644 --- a/sound/firewire/fireworks/fireworks_midi.c +++ b/sound/firewire/fireworks/fireworks_midi.c @@ -84,8 +84,8 @@ static void set_midi_substream_names(struct snd_efw *efw, struct snd_rawmidi_substream *subs; list_for_each_entry(subs, &str->substreams, list) { - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", efw->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", efw->card->shortname, subs->number + 1); } } diff --git a/sound/firewire/motu/motu-midi.c b/sound/firewire/motu/motu-midi.c index 2365f7dfde26..eebc7e790ee2 100644 --- a/sound/firewire/motu/motu-midi.c +++ b/sound/firewire/motu/motu-midi.c @@ -88,8 +88,8 @@ static void set_midi_substream_names(struct snd_motu *motu, struct snd_rawmidi_substream *subs; list_for_each_entry(subs, &str->substreams, list) { - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", motu->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", motu->card->shortname, subs->number + 1); } } diff --git a/sound/firewire/oxfw/oxfw-midi.c b/sound/firewire/oxfw/oxfw-midi.c index 775cba3f1f02..c215fa6f7a03 100644 --- a/sound/firewire/oxfw/oxfw-midi.c +++ b/sound/firewire/oxfw/oxfw-midi.c @@ -129,9 +129,9 @@ static void set_midi_substream_names(struct snd_oxfw *oxfw, struct snd_rawmidi_substream *subs; list_for_each_entry(subs, &str->substreams, list) { - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", - oxfw->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", + oxfw->card->shortname, subs->number + 1); } } diff --git a/sound/firewire/tascam/tascam-midi.c b/sound/firewire/tascam/tascam-midi.c index 02eed2dce435..c57fac4f1968 100644 --- a/sound/firewire/tascam/tascam-midi.c +++ b/sound/firewire/tascam/tascam-midi.c @@ -108,9 +108,9 @@ int snd_tscm_create_midi_devices(struct snd_tscm *tscm) /* TODO: support virtual MIDI ports. */ if (subs->number < tscm->spec->midi_capture_ports) { /* Hardware MIDI ports. */ - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", - tscm->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", + tscm->card->shortname, subs->number + 1); } } @@ -123,9 +123,9 @@ int snd_tscm_create_midi_devices(struct snd_tscm *tscm) list_for_each_entry(subs, &stream->substreams, list) { if (subs->number < tscm->spec->midi_playback_ports) { /* Hardware MIDI ports only. */ - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", - tscm->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", + tscm->card->shortname, subs->number + 1); } } From 28329936d1e2ff4b962daca1c943f0150890d51e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:28:01 +0200 Subject: [PATCH 230/333] ALSA: cmipci: Fix -Wformat-truncation warning CMIPCI driver got compile warnings with -Wformat-truncation at a couple of plain sprintf() usages. Use scnprintf() for filling the longname string for avoiding the warnings. Link: https://lore.kernel.org/r/20230915082802.28684-13-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/cmipci.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c index 1415baac9c36..08e34b184780 100644 --- a/sound/pci/cmipci.c +++ b/sound/pci/cmipci.c @@ -3102,11 +3102,13 @@ static int snd_cmipci_create(struct snd_card *card, struct pci_dev *pci, } sprintf(card->shortname, "C-Media CMI%d", val); if (cm->chip_version < 68) - sprintf(modelstr, " (model %d)", cm->chip_version); + scnprintf(modelstr, sizeof(modelstr), + " (model %d)", cm->chip_version); else modelstr[0] = '\0'; - sprintf(card->longname, "%s%s at %#lx, irq %i", - card->shortname, modelstr, cm->iobase, cm->irq); + scnprintf(card->longname, sizeof(card->longname), + "%s%s at %#lx, irq %i", + card->shortname, modelstr, cm->iobase, cm->irq); if (cm->chip_version >= 39) { val = snd_cmipci_read_b(cm, CM_REG_MPU_PCI + 1); From 5f6af0050a7a6f8d7972267ddd3a75970e13931e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:28:02 +0200 Subject: [PATCH 231/333] ALSA: hda: generic: Check potential mixer name string truncation add_control_with_pfx() constructs a mixer name element with the fixed size, and it got compile warnings with -Wformat-truncation. Although the size overflow is very unlikely, let's have a sanity check of the string size and returns the error if it really doesn't fit instead of silent truncation. Link: https://lore.kernel.org/r/20230915082802.28684-14-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_generic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index dbf7aa88e0e3..bf685d01259d 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -998,7 +998,11 @@ static int add_control_with_pfx(struct hda_gen_spec *spec, int type, const char *sfx, int cidx, unsigned long val) { char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; - snprintf(name, sizeof(name), "%s %s %s", pfx, dir, sfx); + int len; + + len = snprintf(name, sizeof(name), "%s %s %s", pfx, dir, sfx); + if (snd_BUG_ON(len >= sizeof(name))) + return -EINVAL; if (!add_control(spec, type, name, cidx, val)) return -ENOMEM; return 0; From 60a9c7f7fb98163150964236e07bc57e731ad4f2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 11:13:11 +0200 Subject: [PATCH 232/333] ALSA: ad1848: Fix -Wformat-truncation warning for longname string The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915091313.5988-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/ad1848/ad1848.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/isa/ad1848/ad1848.c b/sound/isa/ad1848/ad1848.c index c471ac2aa450..401d8df28d87 100644 --- a/sound/isa/ad1848/ad1848.c +++ b/sound/isa/ad1848/ad1848.c @@ -96,13 +96,13 @@ static int snd_ad1848_probe(struct device *dev, unsigned int n) strscpy(card->shortname, chip->pcm->name, sizeof(card->shortname)); if (!thinkpad[n]) - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %d, dma %d", - chip->pcm->name, chip->port, irq[n], dma1[n]); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %d, dma %d", + chip->pcm->name, chip->port, irq[n], dma1[n]); else - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %d, dma %d [Thinkpad]", - chip->pcm->name, chip->port, irq[n], dma1[n]); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %d, dma %d [Thinkpad]", + chip->pcm->name, chip->port, irq[n], dma1[n]); error = snd_card_register(card); if (error < 0) From ba8bb7dce1b2d2e3d582733f015778e3c31d042b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 11:13:12 +0200 Subject: [PATCH 233/333] ALSA: cs4231: Fix -Wformat-truncation warning for longname string The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915091313.5988-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/cs423x/cs4231.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/isa/cs423x/cs4231.c b/sound/isa/cs423x/cs4231.c index 1e8923385366..c87be4be6df1 100644 --- a/sound/isa/cs423x/cs4231.c +++ b/sound/isa/cs423x/cs4231.c @@ -98,13 +98,13 @@ static int snd_cs4231_probe(struct device *dev, unsigned int n) strscpy(card->shortname, chip->pcm->name, sizeof(card->shortname)); if (dma2[n] < 0) - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %d, dma %d", - chip->pcm->name, chip->port, irq[n], dma1[n]); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %d, dma %d", + chip->pcm->name, chip->port, irq[n], dma1[n]); else - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %d, dma %d&%d", - chip->pcm->name, chip->port, irq[n], dma1[n], dma2[n]); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %d, dma %d&%d", + chip->pcm->name, chip->port, irq[n], dma1[n], dma2[n]); error = snd_wss_mixer(chip); if (error < 0) From 322e0c500073ac4b2f74d8c850c0aeee81be8df3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 11:13:13 +0200 Subject: [PATCH 234/333] ALSA: riptide: Fix -Wformat-truncation warning for longname string The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915091313.5988-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/riptide/riptide.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c index b37c877c2c16..9dee0345f22c 100644 --- a/sound/pci/riptide/riptide.c +++ b/sound/pci/riptide/riptide.c @@ -2105,15 +2105,15 @@ __snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id strcpy(card->driver, "RIPTIDE"); strcpy(card->shortname, "Riptide"); #ifdef SUPPORT_JOYSTICK - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %i mpu 0x%x opl3 0x%x gameport 0x%x", - card->shortname, chip->port, chip->irq, chip->mpuaddr, - chip->opladdr, chip->gameaddr); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %i mpu 0x%x opl3 0x%x gameport 0x%x", + card->shortname, chip->port, chip->irq, chip->mpuaddr, + chip->opladdr, chip->gameaddr); #else - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %i mpu 0x%x opl3 0x%x", - card->shortname, chip->port, chip->irq, chip->mpuaddr, - chip->opladdr); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %i mpu 0x%x opl3 0x%x", + card->shortname, chip->port, chip->irq, chip->mpuaddr, + chip->opladdr); #endif snd_riptide_proc_init(chip); err = snd_card_register(card); From c04efbfd76d23157e64e6d6147518c187ab4233a Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Fri, 15 Sep 2023 02:13:44 +0000 Subject: [PATCH 235/333] ASoC: hdaudio.c: Add missing check for devm_kstrdup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because of the potential failure of the devm_kstrdup(), the dl[i].codecs->name could be NULL. Therefore, we need to check it and return -ENOMEM in order to transfer the error. Fixes: 97030a43371e ("ASoC: Intel: avs: Add HDAudio machine board") Signed-off-by: Chen Ni Reviewed-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20230915021344.3078-1-nichen@iscas.ac.cn Signed-off-by: Mark Brown --- sound/soc/intel/avs/boards/hdaudio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/intel/avs/boards/hdaudio.c b/sound/soc/intel/avs/boards/hdaudio.c index cb00bc86ac94..8876558f19a1 100644 --- a/sound/soc/intel/avs/boards/hdaudio.c +++ b/sound/soc/intel/avs/boards/hdaudio.c @@ -55,6 +55,9 @@ static int avs_create_dai_links(struct device *dev, struct hda_codec *codec, int return -ENOMEM; dl[i].codecs->name = devm_kstrdup(dev, cname, GFP_KERNEL); + if (!dl[i].codecs->name) + return -ENOMEM; + dl[i].codecs->dai_name = pcm->name; dl[i].num_codecs = 1; dl[i].num_cpus = 1; From b19a5733de255cabba5feecabf6e900638b582d1 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 15 Sep 2023 14:02:11 +0800 Subject: [PATCH 236/333] ASoC: imx-audmix: Fix return error with devm_clk_get() The devm_clk_get() can return -EPROBE_DEFER error, modify the error code to be -EINVAL is not correct, which cause the -EPROBE_DEFER error is not correctly handled. This patch is to fix the return error code. Fixes: b86ef5367761 ("ASoC: fsl: Add Audio Mixer machine driver") Signed-off-by: Shengjiu Wang Reviewed-by: Daniel Baluta Link: https://lore.kernel.org/r/1694757731-18308-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-audmix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/fsl/imx-audmix.c b/sound/soc/fsl/imx-audmix.c index 0b58df56f4da..aeb81aa61184 100644 --- a/sound/soc/fsl/imx-audmix.c +++ b/sound/soc/fsl/imx-audmix.c @@ -315,7 +315,7 @@ static int imx_audmix_probe(struct platform_device *pdev) if (IS_ERR(priv->cpu_mclk)) { ret = PTR_ERR(priv->cpu_mclk); dev_err(&cpu_pdev->dev, "failed to get DAI mclk1: %d\n", ret); - return -EINVAL; + return ret; } priv->audmix_pdev = audmix_pdev; From e0b65f9b81fef180cf5f103adecbe5505c961153 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 13 Sep 2023 08:26:47 +0300 Subject: [PATCH 237/333] net: thunderbolt: Fix TCPv6 GSO checksum calculation Alex reported that running ssh over IPv6 does not work with Thunderbolt/USB4 networking driver. The reason for that is that driver should call skb_is_gso() before calling skb_is_gso_v6(), and it should not return false after calculates the checksum successfully. This probably was a copy paste error from the original driver where it was done properly. Reported-by: Alex Balcanquall Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg Reviewed-by: Eric Dumazet Reviewed-by: Jiri Pirko Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/thunderbolt/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/thunderbolt/main.c b/drivers/net/thunderbolt/main.c index 0c1e8970ee58..0a53ec293d04 100644 --- a/drivers/net/thunderbolt/main.c +++ b/drivers/net/thunderbolt/main.c @@ -1049,12 +1049,11 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb, *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 0, ip_hdr(skb)->protocol, 0); - } else if (skb_is_gso_v6(skb)) { + } else if (skb_is_gso(skb) && skb_is_gso_v6(skb)) { tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data); *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); - return false; } else if (protocol == htons(ETH_P_IPV6)) { tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset; *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, From 350db8a59eb392bf42e62b6b2a37d56b5833012b Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Wed, 13 Sep 2023 01:41:56 -0700 Subject: [PATCH 238/333] octeon_ep: fix tx dma unmap len values in SG Lengths of SG pointers are kept in the following order in the SG entries in hardware. 63 48|47 32|31 16|15 0 ----------------------------------------- | Len 0 | Len 1 | Len 2 | Len 3 | ----------------------------------------- | Ptr 0 | ----------------------------------------- | Ptr 1 | ----------------------------------------- | Ptr 2 | ----------------------------------------- | Ptr 3 | ----------------------------------------- Dma pointers have to be unmapped based on their respective lengths given in this format. Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support") Signed-off-by: Shinas Rasheed Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeon_ep/octep_main.c | 8 ++++---- .../net/ethernet/marvell/octeon_ep/octep_tx.c | 8 ++++---- .../net/ethernet/marvell/octeon_ep/octep_tx.h | 16 +++++++++++++++- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 4424de2ffd70..dbc518ff8276 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -734,13 +734,13 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, dma_map_sg_err: if (si > 0) { dma_unmap_single(iq->dev, sglist[0].dma_ptr[0], - sglist[0].len[0], DMA_TO_DEVICE); - sglist[0].len[0] = 0; + sglist[0].len[3], DMA_TO_DEVICE); + sglist[0].len[3] = 0; } while (si > 1) { dma_unmap_page(iq->dev, sglist[si >> 2].dma_ptr[si & 3], - sglist[si >> 2].len[si & 3], DMA_TO_DEVICE); - sglist[si >> 2].len[si & 3] = 0; + sglist[si >> 2].len[3 - (si & 3)], DMA_TO_DEVICE); + sglist[si >> 2].len[3 - (si & 3)] = 0; si--; } tx_buffer->gather = 0; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c index 5a520d37bea0..d0adb82d65c3 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c @@ -69,12 +69,12 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget) compl_sg++; dma_unmap_single(iq->dev, tx_buffer->sglist[0].dma_ptr[0], - tx_buffer->sglist[0].len[0], DMA_TO_DEVICE); + tx_buffer->sglist[0].len[3], DMA_TO_DEVICE); i = 1; /* entry 0 is main skb, unmapped above */ while (frags--) { dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3], - tx_buffer->sglist[i >> 2].len[i & 3], DMA_TO_DEVICE); + tx_buffer->sglist[i >> 2].len[3 - (i & 3)], DMA_TO_DEVICE); i++; } @@ -131,13 +131,13 @@ static void octep_iq_free_pending(struct octep_iq *iq) dma_unmap_single(iq->dev, tx_buffer->sglist[0].dma_ptr[0], - tx_buffer->sglist[0].len[0], + tx_buffer->sglist[0].len[3], DMA_TO_DEVICE); i = 1; /* entry 0 is main skb, unmapped above */ while (frags--) { dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3], - tx_buffer->sglist[i >> 2].len[i & 3], DMA_TO_DEVICE); + tx_buffer->sglist[i >> 2].len[3 - (i & 3)], DMA_TO_DEVICE); i++; } diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h index 2ef57980eb47..21e75ff9f5e7 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h @@ -17,7 +17,21 @@ #define TX_BUFTYPE_NET_SG 2 #define NUM_TX_BUFTYPES 3 -/* Hardware format for Scatter/Gather list */ +/* Hardware format for Scatter/Gather list + * + * 63 48|47 32|31 16|15 0 + * ----------------------------------------- + * | Len 0 | Len 1 | Len 2 | Len 3 | + * ----------------------------------------- + * | Ptr 0 | + * ----------------------------------------- + * | Ptr 1 | + * ----------------------------------------- + * | Ptr 2 | + * ----------------------------------------- + * | Ptr 3 | + * ----------------------------------------- + */ struct octep_tx_sglist_desc { u16 len[4]; dma_addr_t dma_ptr[4]; From 4506f23e117161a20104c8fa04f33e1ca63c26af Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 13 Jul 2023 15:54:16 -0400 Subject: [PATCH 239/333] NFSv4.1: fix zero value filehandle in post open getattr Currently, if the OPEN compound experiencing an error and needs to get the file attributes separately, it will send a stand alone GETATTR but it would use the filehandle from the results of the OPEN compound. In case of the CLAIM_FH OPEN, nfs_openres's fh is zero value. That generate a GETATTR that's sent with a zero value filehandle, and results in the server returning an error. Instead, for the CLAIM_FH OPEN, take the filehandle that was used in the PUTFH of the OPEN compound. Signed-off-by: Olga Kornievskaia Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 794343790ea8..3508d8238826 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2703,8 +2703,12 @@ static int _nfs4_proc_open(struct nfs4_opendata *data, return status; } if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) { + struct nfs_fh *fh = &o_res->fh; + nfs4_sequence_free_slot(&o_res->seq_res); - nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, NULL); + if (o_arg->claim == NFS4_OPEN_CLAIM_FH) + fh = NFS_FH(d_inode(data->dentry)); + nfs4_proc_getattr(server, fh, o_res->f_attr, NULL); } return 0; } From c8de44b577eb540e8bfea55afe1d0904bb571b7a Mon Sep 17 00:00:00 2001 From: Radoslaw Tyl Date: Mon, 7 Aug 2023 14:59:40 +0200 Subject: [PATCH 240/333] iavf: do not process adminq tasks when __IAVF_IN_REMOVE_TASK is set Prevent schedule operations for adminq during device remove and when __IAVF_IN_REMOVE_TASK flag is set. Currently, the iavf_down function adds operations for adminq that shouldn't be processed when the device is in the __IAVF_REMOVE state. Reproduction: echo 4 > /sys/bus/pci/devices/0000:17:00.0/sriov_numvfs ip link set dev ens1f0 vf 0 trust on ip link set dev ens1f0 vf 1 trust on ip link set dev ens1f0 vf 2 trust on ip link set dev ens1f0 vf 3 trust on ip link set dev ens1f0 vf 0 mac 00:22:33:44:55:66 ip link set dev ens1f0 vf 1 mac 00:22:33:44:55:67 ip link set dev ens1f0 vf 2 mac 00:22:33:44:55:68 ip link set dev ens1f0 vf 3 mac 00:22:33:44:55:69 echo 0000:17:02.0 > /sys/bus/pci/devices/0000\:17\:02.0/driver/unbind echo 0000:17:02.1 > /sys/bus/pci/devices/0000\:17\:02.1/driver/unbind echo 0000:17:02.2 > /sys/bus/pci/devices/0000\:17\:02.2/driver/unbind echo 0000:17:02.3 > /sys/bus/pci/devices/0000\:17\:02.3/driver/unbind sleep 10 echo 0000:17:02.0 > /sys/bus/pci/drivers/iavf/bind echo 0000:17:02.1 > /sys/bus/pci/drivers/iavf/bind echo 0000:17:02.2 > /sys/bus/pci/drivers/iavf/bind echo 0000:17:02.3 > /sys/bus/pci/drivers/iavf/bind modprobe vfio-pci echo 8086 154c > /sys/bus/pci/drivers/vfio-pci/new_id qemu-system-x86_64 -accel kvm -m 4096 -cpu host \ -drive file=centos9.qcow2,if=none,id=virtio-disk0 \ -device virtio-blk-pci,drive=virtio-disk0,bootindex=0 -smp 4 \ -device vfio-pci,host=17:02.0 -net none \ -device vfio-pci,host=17:02.1 -net none \ -device vfio-pci,host=17:02.2 -net none \ -device vfio-pci,host=17:02.3 -net none \ -daemonize -vnc :5 Current result: There is a probability that the mac of VF in guest is inconsistent with it in host Expected result: When passthrough NIC VF to guest, the VF in guest should always get the same mac as it in host. Fixes: 14756b2ae265 ("iavf: Fix __IAVF_RESETTING state usage") Signed-off-by: Radoslaw Tyl Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 7b300c86ceda..b23ca9d80189 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1421,7 +1421,8 @@ void iavf_down(struct iavf_adapter *adapter) iavf_clear_fdir_filters(adapter); iavf_clear_adv_rss_conf(adapter); - if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)) { + if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) && + !(test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))) { /* cancel any current operation */ adapter->current_op = VIRTCHNL_OP_UNKNOWN; /* Schedule operations to close down the HW. Don't wait From ed4cad33df9e272feaa6698b33359b29c2929564 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Thu, 7 Sep 2023 17:02:50 +0200 Subject: [PATCH 241/333] iavf: add iavf_schedule_aq_request() helper Add helper for set iavf aq request AVF_FLAG_AQ_* and immediately schedule watchdog_task. Helper will be used in cases where it is necessary to run aq requests asap Signed-off-by: Petr Oros Co-developed-by: Michal Schmidt Signed-off-by: Michal Schmidt Co-developed-by: Ivan Vecera Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 2 +- drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 2 +- drivers/net/ethernet/intel/iavf/iavf_main.c | 10 ++++------ 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 85fba85fbb23..e110ba346185 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -521,7 +521,7 @@ void iavf_down(struct iavf_adapter *adapter); int iavf_process_config(struct iavf_adapter *adapter); int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter); void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags); -void iavf_schedule_request_stats(struct iavf_adapter *adapter); +void iavf_schedule_aq_request(struct iavf_adapter *adapter, u64 flags); void iavf_schedule_finish_config(struct iavf_adapter *adapter); void iavf_reset(struct iavf_adapter *adapter); void iavf_set_ethtool_ops(struct net_device *netdev); diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index a34303ad057d..90397293525f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -362,7 +362,7 @@ static void iavf_get_ethtool_stats(struct net_device *netdev, unsigned int i; /* Explicitly request stats refresh */ - iavf_schedule_request_stats(adapter); + iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_REQUEST_STATS); iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index b23ca9d80189..4b02a8cd77e9 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -314,15 +314,13 @@ void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags) } /** - * iavf_schedule_request_stats - Set the flags and schedule statistics request + * iavf_schedule_aq_request - Set the flags and schedule aq request * @adapter: board private structure - * - * Sets IAVF_FLAG_AQ_REQUEST_STATS flag so iavf_watchdog_task() will explicitly - * request and refresh ethtool stats + * @flags: requested aq flags **/ -void iavf_schedule_request_stats(struct iavf_adapter *adapter) +void iavf_schedule_aq_request(struct iavf_adapter *adapter, u64 flags) { - adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_STATS; + adapter->aq_required |= flags; mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); } From c923e7759a29cf67aa4dda77b816263771380f86 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 15 Sep 2023 15:43:00 +0100 Subject: [PATCH 242/333] ASoC: cs42l43: Add shared IRQ flag for shutters The microphone and speaker shutters on cs42l43 can be configured to trigger from the same GPIO, in this case the current code returns an error as we attempt to request two IRQ handlers for the same IRQ. Fix this by always requesting the shutter IRQs with the IRQF_SHARED flag. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20230915144300.120100-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 1a95c370fc4c..5643c666d7d0 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -2077,7 +2077,8 @@ static const struct cs42l43_irq cs42l43_irqs[] = { static int cs42l43_request_irq(struct cs42l43_codec *priv, struct irq_domain *dom, const char * const name, - unsigned int irq, irq_handler_t handler) + unsigned int irq, irq_handler_t handler, + unsigned long flags) { int ret; @@ -2087,8 +2088,8 @@ static int cs42l43_request_irq(struct cs42l43_codec *priv, dev_dbg(priv->dev, "Request IRQ %d for %s\n", ret, name); - ret = devm_request_threaded_irq(priv->dev, ret, NULL, handler, IRQF_ONESHOT, - name, priv); + ret = devm_request_threaded_irq(priv->dev, ret, NULL, handler, + IRQF_ONESHOT | flags, name, priv); if (ret) return dev_err_probe(priv->dev, ret, "Failed to request IRQ %s\n", name); @@ -2124,11 +2125,11 @@ static int cs42l43_shutter_irq(struct cs42l43_codec *priv, return 0; } - ret = cs42l43_request_irq(priv, dom, close_name, close_irq, handler); + ret = cs42l43_request_irq(priv, dom, close_name, close_irq, handler, IRQF_SHARED); if (ret) return ret; - return cs42l43_request_irq(priv, dom, open_name, open_irq, handler); + return cs42l43_request_irq(priv, dom, open_name, open_irq, handler, IRQF_SHARED); } static int cs42l43_codec_probe(struct platform_device *pdev) @@ -2178,7 +2179,8 @@ static int cs42l43_codec_probe(struct platform_device *pdev) for (i = 0; i < ARRAY_SIZE(cs42l43_irqs); i++) { ret = cs42l43_request_irq(priv, dom, cs42l43_irqs[i].name, - cs42l43_irqs[i].irq, cs42l43_irqs[i].handler); + cs42l43_irqs[i].irq, + cs42l43_irqs[i].handler, 0); if (ret) goto err_pm; } From e0f96246c4402514acda040be19ee24c1619e01a Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Fri, 15 Sep 2023 16:41:53 +0300 Subject: [PATCH 243/333] ASoC: SOF: Intel: MTL: Reduce the DSP init timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 20s seems unnecessarily large for the DSP init timeout. This coupled with multiple FW boot attempts causes an excessive delay in the error path when booting in recovery mode. Reduce it to 0.5s and use the existing HDA_DSP_INIT_TIMEOUT_US. Link: https://github.com/thesofproject/linux/issues/4565 Signed-off-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230915134153.9688-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/mtl.c | 2 +- sound/soc/sof/intel/mtl.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/sof/intel/mtl.c b/sound/soc/sof/intel/mtl.c index b84ca58da9d5..f9412517eaf2 100644 --- a/sound/soc/sof/intel/mtl.c +++ b/sound/soc/sof/intel/mtl.c @@ -460,7 +460,7 @@ int mtl_dsp_cl_init(struct snd_sof_dev *sdev, int stream_tag, bool imr_boot) /* step 3: wait for IPC DONE bit from ROM */ ret = snd_sof_dsp_read_poll_timeout(sdev, HDA_DSP_BAR, chip->ipc_ack, status, ((status & chip->ipc_ack_mask) == chip->ipc_ack_mask), - HDA_DSP_REG_POLL_INTERVAL_US, MTL_DSP_PURGE_TIMEOUT_US); + HDA_DSP_REG_POLL_INTERVAL_US, HDA_DSP_INIT_TIMEOUT_US); if (ret < 0) { if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS) dev_err(sdev->dev, "timeout waiting for purge IPC done\n"); diff --git a/sound/soc/sof/intel/mtl.h b/sound/soc/sof/intel/mtl.h index 02181490f12a..95696b3d7c4c 100644 --- a/sound/soc/sof/intel/mtl.h +++ b/sound/soc/sof/intel/mtl.h @@ -62,7 +62,6 @@ #define MTL_DSP_IRQSTS_IPC BIT(0) #define MTL_DSP_IRQSTS_SDW BIT(6) -#define MTL_DSP_PURGE_TIMEOUT_US 20000000 /* 20s */ #define MTL_DSP_REG_POLL_INTERVAL_US 10 /* 10 us */ /* Memory windows */ From 31bb7bd9ffee50d09ec931998b823a86132ab807 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 15 Sep 2023 15:40:15 +0300 Subject: [PATCH 244/333] ASoC: SOF: core: Only call sof_ops_free() on remove if the probe was successful All the fail paths during probe will free up the ops, on remove we should only free it if the probe was successful. Fixes: bc433fd76fae ("ASoC: SOF: Add ops_free") Signed-off-by: Peter Ujfalusi Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Rander Wang Link: https://lore.kernel.org/r/20230915124015.19637-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c index 30db685cc5f4..2d1616b81485 100644 --- a/sound/soc/sof/core.c +++ b/sound/soc/sof/core.c @@ -486,10 +486,9 @@ int snd_sof_device_remove(struct device *dev) snd_sof_ipc_free(sdev); snd_sof_free_debug(sdev); snd_sof_remove(sdev); + sof_ops_free(sdev); } - sof_ops_free(sdev); - /* release firmware */ snd_sof_fw_unload(sdev); From 5f3d319a248654a805bafc9e7094bcea47dac6c7 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Thu, 7 Sep 2023 17:02:51 +0200 Subject: [PATCH 245/333] iavf: schedule a request immediately after add/delete vlan When the iavf driver wants to reconfigure the VLAN filters (iavf_add_vlan, iavf_del_vlan), it sets a flag in aq_required: adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; or: adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER; This is later processed by the watchdog_task, but it runs periodically every 2 seconds, so it can be a long time before it processes the request. In the worst case, the interface is unable to receive traffic for more than 2 seconds for no objective reason. Fixes: 5eae00c57f5e ("i40evf: main driver core") Signed-off-by: Petr Oros Co-developed-by: Michal Schmidt Signed-off-by: Michal Schmidt Co-developed-by: Ivan Vecera Signed-off-by: Ivan Vecera Reviewed-by: Ahmed Zaki Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 4b02a8cd77e9..6a2e6d64bc3a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -821,7 +821,7 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, list_add_tail(&f->list, &adapter->vlan_filter_list); f->state = IAVF_VLAN_ADD; adapter->num_vlan_filters++; - adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; + iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER); } clearout: @@ -843,7 +843,7 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan) f = iavf_find_vlan(adapter, vlan); if (f) { f->state = IAVF_VLAN_REMOVE; - adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER; + iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_DEL_VLAN_FILTER); } spin_unlock_bh(&adapter->mac_vlan_list_lock); From d0d362ffa33da4acdcf7aee2116ceef8c8fef658 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 7 Sep 2023 17:44:57 +0200 Subject: [PATCH 246/333] i40e: Fix VF VLAN offloading when port VLAN is configured If port VLAN is configured on a VF then any other VLANs on top of this VF are broken. During i40e_ndo_set_vf_port_vlan() call the i40e driver reset the VF and iavf driver asks PF (using VIRTCHNL_OP_GET_VF_RESOURCES) for VF capabilities but this reset occurs too early, prior setting of vf->info.pvid field and because this field can be zero during i40e_vc_get_vf_resources_msg() then VIRTCHNL_VF_OFFLOAD_VLAN capability is reported to iavf driver. This is wrong because iavf driver should not report VLAN offloading capability when port VLAN is configured as i40e does not support QinQ offloading. Fix the issue by moving VF reset after setting of vf->port_vlan_id field. Without this patch: $ echo 1 > /sys/class/net/enp2s0f0/device/sriov_numvfs $ ip link set enp2s0f0 vf 0 vlan 3 $ ip link set enp2s0f0v0 up $ ip link add link enp2s0f0v0 name vlan4 type vlan id 4 $ ip link set vlan4 up ... $ ethtool -k enp2s0f0v0 | grep vlan-offload rx-vlan-offload: on tx-vlan-offload: on $ dmesg -l err | grep iavf [1292500.742914] iavf 0000:02:02.0: Failed to add VLAN filter, error IAVF_ERR_INVALID_QP_ID With this patch: $ echo 1 > /sys/class/net/enp2s0f0/device/sriov_numvfs $ ip link set enp2s0f0 vf 0 vlan 3 $ ip link set enp2s0f0v0 up $ ip link add link enp2s0f0v0 name vlan4 type vlan id 4 $ ip link set vlan4 up ... $ ethtool -k enp2s0f0v0 | grep vlan-offload rx-vlan-offload: off [requested on] tx-vlan-offload: off [requested on] $ dmesg -l err | grep iavf Fixes: f9b4b6278d51 ("i40e: Reset the VF upon conflicting VLAN configuration") Signed-off-by: Ivan Vecera Reviewed-by: Jesse Brandeburg Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 8ea1a238dcef..d3d6415553ed 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -4475,9 +4475,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, goto error_pvid; i40e_vlan_stripping_enable(vsi); - i40e_vc_reset_vf(vf, true); - /* During reset the VF got a new VSI, so refresh a pointer. */ - vsi = pf->vsi[vf->lan_vsi_idx]; + /* Locked once because multiple functions below iterate list */ spin_lock_bh(&vsi->mac_filter_hash_lock); @@ -4563,6 +4561,10 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, */ vf->port_vlan_id = le16_to_cpu(vsi->info.pvid); + i40e_vc_reset_vf(vf, true); + /* During reset the VF got a new VSI, so refresh a pointer. */ + vsi = pf->vsi[vf->lan_vsi_idx]; + ret = i40e_config_vf_promiscuous_mode(vf, vsi->id, allmulti, alluni); if (ret) { dev_err(&pf->pdev->dev, "Unable to config vf promiscuous mode\n"); From 837723b22a63cfbff584655b009b9d488d0e9087 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 30 Aug 2023 03:07:43 +0200 Subject: [PATCH 247/333] netfilter, bpf: Adjust timeouts of non-confirmed CTs in bpf_ct_insert_entry() bpf_nf testcase fails on s390x: bpf_skb_ct_lookup() cannot find the entry that was added by bpf_ct_insert_entry() within the same BPF function. The reason is that this entry is deleted by nf_ct_gc_expired(). The CT timeout starts ticking after the CT confirmation; therefore nf_conn.timeout is initially set to the timeout value, and __nf_conntrack_confirm() sets it to the deadline value. bpf_ct_insert_entry() sets IPS_CONFIRMED_BIT, but does not adjust the timeout, making its value meaningless and causing false positives. Fix the problem by making bpf_ct_insert_entry() adjust the timeout, like __nf_conntrack_confirm(). Fixes: 2cdaa3eefed8 ("netfilter: conntrack: restore IPS_CONFIRMED out of nf_conntrack_hash_check_insert()") Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Cc: Florian Westphal Link: https://lore.kernel.org/bpf/20230830011128.1415752-3-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- net/netfilter/nf_conntrack_bpf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index c7a6114091ae..b21799d468d2 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -381,6 +381,8 @@ __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) struct nf_conn *nfct = (struct nf_conn *)nfct_i; int err; + if (!nf_ct_is_confirmed(nfct)) + nfct->timeout += nfct_time_stamp; nfct->status |= IPS_CONFIRMED; err = nf_conntrack_hash_check_insert(nfct); if (err < 0) { From dca7acd84e93f2881e3f63465bbb5d89a40b5d17 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 13 Sep 2023 21:59:43 +0800 Subject: [PATCH 248/333] bpf: Skip unit_size checking for global per-cpu allocator For global per-cpu allocator, the size of free object in free list doesn't match with unit_size and now there is no way to get the size of per-cpu pointer saved in free object, so just skip the checking. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/bpf/20230913133436.0eeec4cb@canb.auug.org.au/ Signed-off-by: Hou Tao Tested-by: Biju Das Link: https://lore.kernel.org/r/20230913135943.3137292-1-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/memalloc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 1c22b90e754a..cf1941516643 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -491,6 +491,13 @@ static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) struct llist_node *first; unsigned int obj_size; + /* For per-cpu allocator, the size of free objects in free list doesn't + * match with unit_size and now there is no way to get the size of + * per-cpu pointer saved in free object, so just skip the checking. + */ + if (c->percpu_size) + return 0; + first = c->free_llist.first; if (!first) return 0; From 57eb5e1c5c57972c95e8efab6bc81b87161b0b07 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 15 Sep 2023 12:14:20 +0200 Subject: [PATCH 249/333] bpf: Fix uprobe_multi get_pid_task error path Dan reported Smatch static checker warning due to missing error value set in uprobe multi link's get_pid_task error path. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/bpf/c5ffa7c0-6b06-40d5-aca2-63833b5cd9af@moroto.mountain/ Signed-off-by: Jiri Olsa Reviewed-by: Song Liu Link: https://lore.kernel.org/r/20230915101420.1193800-1-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/trace/bpf_trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index c1c1af63ced2..868008f56fec 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3223,8 +3223,10 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr rcu_read_lock(); task = get_pid_task(find_vpid(pid), PIDTYPE_PID); rcu_read_unlock(); - if (!task) + if (!task) { + err = -ESRCH; goto error_path_put; + } } err = -ENOMEM; From 8f908db77782630c45ba29dac35c434b5ce0b730 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 15 Sep 2023 10:34:27 -0700 Subject: [PATCH 250/333] bpf: Fix BTF_ID symbol generation collision Marcus and Satya reported an issue where BTF_ID macro generates same symbol in separate objects and that breaks final vmlinux link. ld.lld: error: ld-temp.o :14577:1: symbol '__BTF_ID__struct__cgroup__624' is already defined This can be triggered under specific configs when __COUNTER__ happens to be the same for the same symbol in two different translation units, which is already quite unlikely to happen. Add __LINE__ number suffix to make BTF_ID symbol more unique, which is not a complete fix, but it would help for now and meanwhile we can work on better solution as suggested by Andrii. Cc: stable@vger.kernel.org Reported-by: Satya Durga Srinivasu Prabhala Reported-by: Marcus Seyfarth Closes: https://github.com/ClangBuiltLinux/linux/issues/1913 Debugged-by: Nathan Chancellor Link: https://lore.kernel.org/bpf/CAEf4Bzb5KQ2_LmhN769ifMeSJaWfebccUasQOfQKaOd0nQ51tw@mail.gmail.com/ Signed-off-by: Jiri Olsa Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20230915-bpf_collision-v3-1-263fc519c21f@google.com Signed-off-by: Alexei Starovoitov --- include/linux/btf_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index a3462a9b8e18..a9cb10b0e2e9 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -49,7 +49,7 @@ word \ ____BTF_ID(symbol, word) #define __ID(prefix) \ - __PASTE(prefix, __COUNTER__) + __PASTE(__PASTE(prefix, __COUNTER__), __LINE__) /* * The BTF_ID defines unique symbol for each ID pointing From c0bb9fb0e52a64601d38b3739b729d9138d4c8a1 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 15 Sep 2023 10:34:28 -0700 Subject: [PATCH 251/333] bpf: Fix BTF_ID symbol generation collision in tools/ Marcus and Satya reported an issue where BTF_ID macro generates same symbol in separate objects and that breaks final vmlinux link. ld.lld: error: ld-temp.o :14577:1: symbol '__BTF_ID__struct__cgroup__624' is already defined This can be triggered under specific configs when __COUNTER__ happens to be the same for the same symbol in two different translation units, which is already quite unlikely to happen. Add __LINE__ number suffix to make BTF_ID symbol more unique, which is not a complete fix, but it would help for now and meanwhile we can work on better solution as suggested by Andrii. Cc: stable@vger.kernel.org Reported-by: Satya Durga Srinivasu Prabhala Reported-by: Marcus Seyfarth Closes: https://github.com/ClangBuiltLinux/linux/issues/1913 Debugged-by: Nathan Chancellor Co-developed-by: Jiri Olsa Link: https://lore.kernel.org/bpf/CAEf4Bzb5KQ2_LmhN769ifMeSJaWfebccUasQOfQKaOd0nQ51tw@mail.gmail.com/ Signed-off-by: Nick Desaulniers Link: https://lore.kernel.org/r/20230915-bpf_collision-v3-2-263fc519c21f@google.com Signed-off-by: Alexei Starovoitov --- tools/include/linux/btf_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 71e54b1e3796..2f882d5cb30f 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -38,7 +38,7 @@ asm( \ ____BTF_ID(symbol) #define __ID(prefix) \ - __PASTE(prefix, __COUNTER__) + __PASTE(__PASTE(prefix, __COUNTER__), __LINE__) /* * The BTF_ID defines unique symbol for each ID pointing From a9ce385344f916cd1c36a33905e564f5581beae9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Sep 2023 13:14:23 -0600 Subject: [PATCH 252/333] dm: don't attempt to queue IO under RCU protection dm looks up the table for IO based on the request type, with an assumption that if the request is marked REQ_NOWAIT, it's fine to attempt to submit that IO while under RCU read lock protection. This is not OK, as REQ_NOWAIT just means that we should not be sleeping waiting on other IO, it does not mean that we can't potentially schedule. A simple test case demonstrates this quite nicely: int main(int argc, char *argv[]) { struct iovec iov; int fd; fd = open("/dev/dm-0", O_RDONLY | O_DIRECT); posix_memalign(&iov.iov_base, 4096, 4096); iov.iov_len = 4096; preadv2(fd, &iov, 1, 0, RWF_NOWAIT); return 0; } which will instantly spew: BUG: sleeping function called from invalid context at include/linux/sched/mm.h:306 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 5580, name: dm-nowait preempt_count: 0, expected: 0 RCU nest depth: 1, expected: 0 INFO: lockdep is turned off. CPU: 7 PID: 5580 Comm: dm-nowait Not tainted 6.6.0-rc1-g39956d2dcd81 #132 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 Call Trace: dump_stack_lvl+0x11d/0x1b0 __might_resched+0x3c3/0x5e0 ? preempt_count_sub+0x150/0x150 mempool_alloc+0x1e2/0x390 ? mempool_resize+0x7d0/0x7d0 ? lock_sync+0x190/0x190 ? lock_release+0x4b7/0x670 ? internal_get_user_pages_fast+0x868/0x2d40 bio_alloc_bioset+0x417/0x8c0 ? bvec_alloc+0x200/0x200 ? internal_get_user_pages_fast+0xb8c/0x2d40 bio_alloc_clone+0x53/0x100 dm_submit_bio+0x27f/0x1a20 ? lock_release+0x4b7/0x670 ? blk_try_enter_queue+0x1a0/0x4d0 ? dm_dax_direct_access+0x260/0x260 ? rcu_is_watching+0x12/0xb0 ? blk_try_enter_queue+0x1cc/0x4d0 __submit_bio+0x239/0x310 ? __bio_queue_enter+0x700/0x700 ? kvm_clock_get_cycles+0x40/0x60 ? ktime_get+0x285/0x470 submit_bio_noacct_nocheck+0x4d9/0xb80 ? should_fail_request+0x80/0x80 ? preempt_count_sub+0x150/0x150 ? lock_release+0x4b7/0x670 ? __bio_add_page+0x143/0x2d0 ? iov_iter_revert+0x27/0x360 submit_bio_noacct+0x53e/0x1b30 submit_bio_wait+0x10a/0x230 ? submit_bio_wait_endio+0x40/0x40 __blkdev_direct_IO_simple+0x4f8/0x780 ? blkdev_bio_end_io+0x4c0/0x4c0 ? stack_trace_save+0x90/0xc0 ? __bio_clone+0x3c0/0x3c0 ? lock_release+0x4b7/0x670 ? lock_sync+0x190/0x190 ? atime_needs_update+0x3bf/0x7e0 ? timestamp_truncate+0x21b/0x2d0 ? inode_owner_or_capable+0x240/0x240 blkdev_direct_IO.part.0+0x84a/0x1810 ? rcu_is_watching+0x12/0xb0 ? lock_release+0x4b7/0x670 ? blkdev_read_iter+0x40d/0x530 ? reacquire_held_locks+0x4e0/0x4e0 ? __blkdev_direct_IO_simple+0x780/0x780 ? rcu_is_watching+0x12/0xb0 ? __mark_inode_dirty+0x297/0xd50 ? preempt_count_add+0x72/0x140 blkdev_read_iter+0x2a4/0x530 do_iter_readv_writev+0x2f2/0x3c0 ? generic_copy_file_range+0x1d0/0x1d0 ? fsnotify_perm.part.0+0x25d/0x630 ? security_file_permission+0xd8/0x100 do_iter_read+0x31b/0x880 ? import_iovec+0x10b/0x140 vfs_readv+0x12d/0x1a0 ? vfs_iter_read+0xb0/0xb0 ? rcu_is_watching+0x12/0xb0 ? rcu_is_watching+0x12/0xb0 ? lock_release+0x4b7/0x670 do_preadv+0x1b3/0x260 ? do_readv+0x370/0x370 __x64_sys_preadv2+0xef/0x150 do_syscall_64+0x39/0xb0 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f5af41ad806 Code: 41 54 41 89 fc 55 44 89 c5 53 48 89 cb 48 83 ec 18 80 3d e4 dd 0d 00 00 74 7a 45 89 c1 49 89 ca 45 31 c0 b8 47 01 00 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 be 00 00 00 48 85 c0 79 4a 48 8b 0d da 55 RSP: 002b:00007ffd3145c7f0 EFLAGS: 00000246 ORIG_RAX: 0000000000000147 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f5af41ad806 RDX: 0000000000000001 RSI: 00007ffd3145c850 RDI: 0000000000000003 RBP: 0000000000000008 R08: 0000000000000000 R09: 0000000000000008 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003 R13: 00007ffd3145c850 R14: 000055f5f0431dd8 R15: 0000000000000001 where in fact it is dm itself that attempts to allocate a bio clone with GFP_NOIO under the rcu read lock, regardless of the request type. Fix this by getting rid of the special casing for REQ_NOWAIT, and just use the normal SRCU protected table lookup. Get rid of the bio based table locking helpers at the same time, as they are now unused. Cc: stable@vger.kernel.org Fixes: 563a225c9fd2 ("dm: introduce dm_{get,put}_live_table_bio called from dm_submit_bio") Signed-off-by: Jens Axboe Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f0f118ab20fa..64a1f306c96c 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -715,24 +715,6 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) rcu_read_unlock(); } -static inline struct dm_table *dm_get_live_table_bio(struct mapped_device *md, - int *srcu_idx, blk_opf_t bio_opf) -{ - if (bio_opf & REQ_NOWAIT) - return dm_get_live_table_fast(md); - else - return dm_get_live_table(md, srcu_idx); -} - -static inline void dm_put_live_table_bio(struct mapped_device *md, int srcu_idx, - blk_opf_t bio_opf) -{ - if (bio_opf & REQ_NOWAIT) - dm_put_live_table_fast(md); - else - dm_put_live_table(md, srcu_idx); -} - static char *_dm_claim_ptr = "I belong to device-mapper"; /* @@ -1833,9 +1815,8 @@ static void dm_submit_bio(struct bio *bio) struct mapped_device *md = bio->bi_bdev->bd_disk->private_data; int srcu_idx; struct dm_table *map; - blk_opf_t bio_opf = bio->bi_opf; - map = dm_get_live_table_bio(md, &srcu_idx, bio_opf); + map = dm_get_live_table(md, &srcu_idx); /* If suspended, or map not yet available, queue this IO for later */ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || @@ -1851,7 +1832,7 @@ static void dm_submit_bio(struct bio *bio) dm_split_and_process_bio(md, map, bio); out: - dm_put_live_table_bio(md, srcu_idx, bio_opf); + dm_put_live_table(md, srcu_idx); } static bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob, From c656a4d5484ad99e97de549a9affc12a91d94963 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 15 Sep 2023 15:46:08 -0400 Subject: [PATCH 253/333] Revert "SUNRPC: clean up integer overflow check" This reverts commit e87cf8a28e7592bd19064e8181324ae26bc02932. This commit was added to silence a tautological comparison warning, but removing the 'len' value check before calling xdr_inline_decode() is really not what we want. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5b4fb3c791bc..896a6d2a9cf0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -779,7 +779,9 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; - p = xdr_inline_decode(xdr, size_mul(len, sizeof(*p))); + if (len > SIZE_MAX / sizeof(*p)) + return -EBADMSG; + p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) return -EBADMSG; if (array == NULL) From 993b5662f302628db4eb358d69b2720c88cbfaf0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Sep 2023 16:12:33 -0400 Subject: [PATCH 254/333] SUNRPC: Silence compiler complaints about tautological comparisons On 64-bit systems, the compiler will complain that the comparison between SIZE_MAX and the 32-bit unsigned int 'len' is unnecessary. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 896a6d2a9cf0..2f8dc47f1eb0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -779,7 +779,7 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; - if (len > SIZE_MAX / sizeof(*p)) + if (U32_MAX >= SIZE_MAX / sizeof(*p) && len > SIZE_MAX / sizeof(*p)) return -EBADMSG; p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) From b2ce0027d7b2905495021c5208f92043eb493146 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 16 Sep 2023 08:07:25 +0200 Subject: [PATCH 255/333] ALSA: rawmidi: Fix NULL dereference at proc read At the implementation of the optional proc fs in rawmidi, I forgot that rmidi->ops itself is optional and can be NULL. Add the proper NULL check for avoiding the Oops. Fixes: fa030f666d24 ("ALSA: ump: Additional proc output") Reported-and-tested-by: Mark Hills Closes: https://lore.kernel.org/r/ef9118c3-a2eb-d0ff-1efa-cc5fb6416bde@xwax.org Cc: Link: https://lore.kernel.org/r/20230916060725.11726-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/rawmidi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index ba06484ac4aa..1431cb997808 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -1770,7 +1770,7 @@ static void snd_rawmidi_proc_info_read(struct snd_info_entry *entry, if (IS_ENABLED(CONFIG_SND_UMP)) snd_iprintf(buffer, "Type: %s\n", rawmidi_is_ump(rmidi) ? "UMP" : "Legacy"); - if (rmidi->ops->proc_read) + if (rmidi->ops && rmidi->ops->proc_read) rmidi->ops->proc_read(entry, buffer); mutex_lock(&rmidi->open_mutex); if (rmidi->info_flags & SNDRV_RAWMIDI_INFO_OUTPUT) { From 8f6b846b0a86c3cbae8a25b772651cfc2270ad0a Mon Sep 17 00:00:00 2001 From: David Christensen Date: Thu, 14 Sep 2023 18:02:52 -0400 Subject: [PATCH 256/333] ionic: fix 16bit math issue when PAGE_SIZE >= 64KB The ionic device supports a maximum buffer length of 16 bits (see ionic_rxq_desc or ionic_rxq_sg_elem). When adding new buffers to the receive rings, the function ionic_rx_fill() uses 16bit math when calculating the number of pages to allocate for an RX descriptor, given the interface's MTU setting. If the system PAGE_SIZE >= 64KB, and the buf_info->page_offset is 0, the remain_len value will never decrement from the original MTU value and the frag_len value will always be 0, causing additional pages to be allocated as scatter- gather elements unnecessarily. A similar math issue exists in ionic_rx_frags(), but no failures have been observed here since a 64KB page should not normally require any scatter-gather elements at any legal Ethernet MTU size. Fixes: 4b0a7539a372 ("ionic: implement Rx page reuse") Signed-off-by: David Christensen Reviewed-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_dev.h | 1 + drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index 6aac98bcb9f4..aae4131f146a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -187,6 +187,7 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q, struct ionic_desc_info *desc_info, struct ionic_cq_info *cq_info, void *cb_arg); +#define IONIC_MAX_BUF_LEN ((u16)-1) #define IONIC_PAGE_SIZE PAGE_SIZE #define IONIC_PAGE_SPLIT_SZ (PAGE_SIZE / 2) #define IONIC_PAGE_GFP_MASK (GFP_ATOMIC | __GFP_NOWARN |\ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 26798fc635db..44466e8c5d77 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -207,7 +207,8 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q, return NULL; } - frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset); + frag_len = min_t(u16, len, min_t(u32, IONIC_MAX_BUF_LEN, + IONIC_PAGE_SIZE - buf_info->page_offset)); len -= frag_len; dma_sync_single_for_cpu(dev, @@ -452,7 +453,8 @@ void ionic_rx_fill(struct ionic_queue *q) /* fill main descriptor - buf[0] */ desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset); - frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset); + frag_len = min_t(u16, len, min_t(u32, IONIC_MAX_BUF_LEN, + IONIC_PAGE_SIZE - buf_info->page_offset)); desc->len = cpu_to_le16(frag_len); remain_len -= frag_len; buf_info++; @@ -471,7 +473,9 @@ void ionic_rx_fill(struct ionic_queue *q) } sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset); - frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE - buf_info->page_offset); + frag_len = min_t(u16, remain_len, min_t(u32, IONIC_MAX_BUF_LEN, + IONIC_PAGE_SIZE - + buf_info->page_offset)); sg_elem->len = cpu_to_le16(frag_len); remain_len -= frag_len; buf_info++; From 80cc944eca4f0baa9c381d0706f3160e491437f2 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 14 Sep 2023 00:19:16 +0200 Subject: [PATCH 257/333] ata: libata-eh: do not clear ATA_PFLAG_EH_PENDING in ata_eh_reset() ata_scsi_port_error_handler() starts off by clearing ATA_PFLAG_EH_PENDING, before calling ap->ops->error_handler() (without holding the ap->lock). If an error IRQ is received while ap->ops->error_handler() is running, the irq handler will set ATA_PFLAG_EH_PENDING. Once ap->ops->error_handler() returns, ata_scsi_port_error_handler() checks if ATA_PFLAG_EH_PENDING is set, and if it is, another iteration of ATA EH is performed. The problem is that ATA_PFLAG_EH_PENDING is not only cleared by ata_scsi_port_error_handler(), it is also cleared by ata_eh_reset(). ata_eh_reset() is called by ap->ops->error_handler(). This additional clearing done by ata_eh_reset() breaks the whole retry logic in ata_scsi_port_error_handler(). Thus, if an error IRQ is received while ap->ops->error_handler() is running, the port will currently remain frozen and will never get re-enabled. The additional clearing in ata_eh_reset() was introduced in commit 1e641060c4b5 ("libata: clear eh_info on reset completion"). Looking at the original error report: https://marc.info/?l=linux-ide&m=124765325828495&w=2 We can see the following happening: [ 1.074659] ata3: XXX port freeze [ 1.074700] ata3: XXX hardresetting link, stopping engine [ 1.074746] ata3: XXX flipping SControl [ 1.411471] ata3: XXX irq_stat=400040 CONN|PHY [ 1.411475] ata3: XXX port freeze [ 1.420049] ata3: XXX starting engine [ 1.420096] ata3: XXX rc=0, class=1 [ 1.420142] ata3: XXX clearing IRQs for thawing [ 1.420188] ata3: XXX port thawed [ 1.420234] ata3: SATA link up 3.0 Gbps (SStatus 123 SControl 300) We are not supposed to be able to receive an error IRQ while the port is frozen (PxIE is set to 0, i.e. all IRQs for the port are disabled). AHCI 1.3.1 section 10.7.1.1 First Tier (IS Register) states: "Each bit location can be thought of as reporting a '1' if the virtual "interrupt line" for that port is indicating it wishes to generate an interrupt. That is, if a port has one or more interrupt status bit set, and the enables for those status bits are set, then this bit shall be set." Additionally, AHCI state P:ComInit clearly shows that the state machine will only jump to P:ComInitSetIS (which sets IS.IPS(x) to '1'), if PxIE.PCE is set to '1'. In our case, PxIE is set to 0, so IS.IPS(x) won't get set. So IS.IPS(x) only gets set if PxIS and PxIE is set. AHCI 1.3.1 section 10.7.1.1 First Tier (IS Register) also states: "The bits in this register are read/write clear. It is set by the level of the virtual interrupt line being a set, and cleared by a write of '1' from the software." So if IS.IPS(x) is set, you need to explicitly clear it by writing a 1 to IS.IPS(x) for that port. Since PxIE is cleared, the only way to get an interrupt while the port is frozen, is if IS.IPS(x) is set, and the only way IS.IPS(x) can be set when the port is frozen, is if it was set before the port was frozen. However, since commit 737dd811a3db ("ata: libahci: clear pending interrupt status"), we clear both PxIS and IS.IPS(x) after freezing the port, but before the COMRESET, so the problem that commit 1e641060c4b5 ("libata: clear eh_info on reset completion") fixed can no longer happen. Thus, revert commit 1e641060c4b5 ("libata: clear eh_info on reset completion"), so that the retry logic in ata_scsi_port_error_handler() works once again. (The retry logic is still needed, since we can still get an error IRQ _after_ the port has been thawed, but before ata_scsi_port_error_handler() takes the ap->lock in order to check if ATA_PFLAG_EH_PENDING is set.) Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/libata-eh.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 159ba6ba19eb..5c493b6316eb 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2796,18 +2796,11 @@ int ata_eh_reset(struct ata_link *link, int classify, } } - /* - * Some controllers can't be frozen very well and may set spurious - * error conditions during reset. Clear accumulated error - * information and re-thaw the port if frozen. As reset is the - * final recovery action and we cross check link onlineness against - * device classification later, no hotplug event is lost by this. - */ + /* clear cached SError */ spin_lock_irqsave(link->ap->lock, flags); - memset(&link->eh_info, 0, sizeof(link->eh_info)); + link->eh_info.serror = 0; if (slave) - memset(&slave->eh_info, 0, sizeof(link->eh_info)); - ap->pflags &= ~ATA_PFLAG_EH_PENDING; + slave->eh_info.serror = 0; spin_unlock_irqrestore(link->ap->lock, flags); if (ata_port_is_frozen(ap)) From 7a3bc2b3989e05bbaa904a63279049a401491c84 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 14 Sep 2023 00:19:17 +0200 Subject: [PATCH 258/333] ata: libata-eh: do not thaw the port twice in ata_eh_reset() commit 1e641060c4b5 ("libata: clear eh_info on reset completion") added a workaround that broke the retry mechanism in ATA EH. Tejun himself suggested to remove this workaround when it was identified to cause additional problems: https://lore.kernel.org/linux-ide/20110426135027.GI878@htj.dyndns.org/ He even said: "Hmm... it seems I wasn't thinking straight when I added that work around." https://lore.kernel.org/linux-ide/20110426155229.GM878@htj.dyndns.org/ While removing the workaround solved the issue, however, the workaround was kept to avoid "spurious hotplug events during reset", and instead another workaround was added on top of the existing workaround in commit 8c56cacc724c ("libata: fix unexpectedly frozen port after ata_eh_reset()"). Because these IRQs happened when the port was frozen, we know that they were actually a side effect of PxIS and IS.IPS(x) not being cleared before the COMRESET. This is now done in commit 94152042eaa9 ("ata: libahci: clear pending interrupt status"), so these workarounds can now be removed. Since commit 1e641060c4b5 ("libata: clear eh_info on reset completion") has now been reverted, the ATA EH retry mechanism is functional again, so there is once again no need to thaw the port more than once in ata_eh_reset(). This reverts "the workaround on top of the workaround" introduced in commit 8c56cacc724c ("libata: fix unexpectedly frozen port after ata_eh_reset()"). Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/libata-eh.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 5c493b6316eb..4cf4f57e57b8 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2803,9 +2803,6 @@ int ata_eh_reset(struct ata_link *link, int classify, slave->eh_info.serror = 0; spin_unlock_irqrestore(link->ap->lock, flags); - if (ata_port_is_frozen(ap)) - ata_eh_thaw_port(ap); - /* * Make sure onlineness and classification result correspond. * Hotplug could have happened during reset and some From 5e35a9ac3fe3a0d571b899a16ca84253e53dc70c Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 13 Sep 2023 17:04:43 +0200 Subject: [PATCH 259/333] ata: libata-core: fetch sense data for successful commands iff CDL enabled Currently, we fetch sense data for a _successful_ command if either: 1) Command was NCQ and ATA_DFLAG_CDL_ENABLED flag set (flag ATA_DFLAG_CDL_ENABLED will only be set if the Successful NCQ command sense data supported bit is set); or 2) Command was non-NCQ and regular sense data reporting is enabled. This means that case 2) will trigger for a non-NCQ command which has ATA_SENSE bit set, regardless if CDL is enabled or not. This decision was by design. If the device reports that it has sense data available, it makes sense to fetch that sense data, since the sk/asc/ascq could be important information regardless if CDL is enabled or not. However, the fetching of sense data for a successful command is done via ATA EH. Considering how intricate the ATA EH is, we really do not want to invoke ATA EH unless absolutely needed. Before commit 18bd7718b5c4 ("scsi: ata: libata: Handle completion of CDL commands using policy 0xD") we never fetched sense data for successful commands. In order to not invoke the ATA EH unless absolutely necessary, even if the device claims support for sense data reporting, only fetch sense data for successful (NCQ and non-NCQ commands) commands that are using CDL. [Damien] Modified the check to test the qc flag ATA_QCFLAG_HAS_CDL instead of the device support for CDL, which is implied for commands using CDL. Fixes: 3ac873c76d79 ("ata: libata-core: fix when to fetch sense data for successful commands") Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 74314311295f..0072e0f9ad39 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4783,11 +4783,8 @@ void ata_qc_complete(struct ata_queued_cmd *qc) * been aborted by the device due to a limit timeout using the policy * 0xD. For these commands, invoke EH to get the command sense data. */ - if (qc->result_tf.status & ATA_SENSE && - ((ata_is_ncq(qc->tf.protocol) && - dev->flags & ATA_DFLAG_CDL_ENABLED) || - (!ata_is_ncq(qc->tf.protocol) && - ata_id_sense_reporting_enabled(dev->id)))) { + if (qc->flags & ATA_QCFLAG_HAS_CDL && + qc->result_tf.status & ATA_SENSE) { /* * Tell SCSI EH to not overwrite scmd->result even if this * command is finished with result SAM_STAT_GOOD. From aabb4af9bb29f8532e19c872b48ad1e7fd208617 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 13 Sep 2023 14:09:57 +0300 Subject: [PATCH 260/333] net: core: Use the bitmap API to allocate bitmaps Use bitmap_zalloc() and bitmap_free() instead of hand-writing them. It is less verbose and it improves the type checking and semantic. While at it, add missing header inclusion (should be bitops.h, but with the above change it becomes bitmap.h). Suggested-by: Sergey Ryazanov Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230911154534.4174265-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Przemek Kitszel Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index ccff2b6ef958..85df22f05c38 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -69,7 +69,7 @@ */ #include -#include +#include #include #include #include @@ -1080,7 +1080,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) return -EINVAL; /* Use one page as a bit array of possible slots */ - inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); + inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC); if (!inuse) return -ENOMEM; @@ -1109,7 +1109,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) } i = find_first_zero_bit(inuse, max_netdevices); - free_page((unsigned long) inuse); + bitmap_free(inuse); } snprintf(buf, IFNAMSIZ, name, i); From cb47b1f679c4d83a5fa5f1852e472f844e41a3da Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 13 Sep 2023 11:06:15 -0700 Subject: [PATCH 261/333] igc: Fix infinite initialization loop with early XDP redirect When an XDP redirect happens before the link is ready, that transmission will not finish and will timeout, causing an adapter reset. If the redirects do not stop, the adapter will not stop resetting. Wait for the driver to signal that there's a carrier before allowing transmissions to proceed. Previous code was relying that when __IGC_DOWN is cleared, the NIC is ready to transmit as all the queues are ready, what happens is that the carrier presence will only be signaled later, after the watchdog workqueue has a chance to run. And during this interval (between clearing __IGC_DOWN and the watchdog running) if any transmission happens the timeout is emitted (detected by igc_tx_timeout()) which causes the reset, with the potential for the infinite loop. Fixes: 4ff320361092 ("igc: Add support for XDP_REDIRECT action") Reported-by: Ferenc Fejes Closes: https://lore.kernel.org/netdev/0caf33cf6adb3a5bf137eeaa20e89b167c9986d5.camel@ericsson.com/ Signed-off-by: Vinicius Costa Gomes Tested-by: Ferenc Fejes Reviewed-by: Maciej Fijalkowski Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 293b45717683..98de34d0ce07 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6491,7 +6491,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, struct igc_ring *ring; int i, drops; - if (unlikely(test_bit(__IGC_DOWN, &adapter->state))) + if (unlikely(!netif_carrier_ok(dev))) return -ENETDOWN; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) From 3cec50490969afd4a76ccee441f747d869ccff77 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 Sep 2023 12:31:42 -0700 Subject: [PATCH 262/333] vm: fix move_vma() memory accounting being off Commit 408579cd627a ("mm: Update do_vmi_align_munmap() return semantics") seems to have updated one of the callers of do_vmi_munmap() incorrectly: it used to check for the error case (which didn't change: negative means error). That commit changed the check to the success case (which did change: before that commit, 0 was success, and 1 was "success and lock downgraded". After the change, it's always 0 for success, and the lock will have been released if requested). This didn't change any actual VM behavior _except_ for memory accounting when 'VM_ACCOUNT' was set on the vma. Which made the wrong return value test fairly subtle, since everything continues to work. Or rather - it continues to work but the "Committed memory" accounting goes all wonky (Committed_AS value in /proc/meminfo), and depending on settings that then causes problems much much later as the VM relies on bogus statistics for its heuristics. Revert that one line of the change back to the original logic. Fixes: 408579cd627a ("mm: Update do_vmi_align_munmap() return semantics") Reported-by: Christoph Biedl Reported-bisected-and-tested-by: Michael Labiuk Cc: Bagas Sanjaya Cc: Liam R. Howlett Link: https://lore.kernel.org/all/1694366957@msgid.manchmal.in-ulm.de/ Signed-off-by: Linus Torvalds --- mm/mremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mremap.c b/mm/mremap.c index 056478c106ee..382e81c33fc4 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -715,7 +715,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, } vma_iter_init(&vmi, mm, old_addr); - if (!do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false)) { + if (do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false) < 0) { /* OOM: unable to split vma, just get accounts right */ if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) vm_acct_memory(old_len >> PAGE_SHIFT); From f530ee95b72e77b09c141c4b1a4b94d1199ffbd9 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 15 Sep 2023 10:02:21 +0300 Subject: [PATCH 263/333] x86/boot/compressed: Reserve more memory for page tables The decompressor has a hard limit on the number of page tables it can allocate. This limit is defined at compile-time and will cause boot failure if it is reached. The kernel is very strict and calculates the limit precisely for the worst-case scenario based on the current configuration. However, it is easy to forget to adjust the limit when a new use-case arises. The worst-case scenario is rarely encountered during sanity checks. In the case of enabling 5-level paging, a use-case was overlooked. The limit needs to be increased by one to accommodate the additional level. This oversight went unnoticed until Aaron attempted to run the kernel via kexec with 5-level paging and unaccepted memory enabled. Update wost-case calculations to include 5-level paging. To address this issue, let's allocate some extra space for page tables. 128K should be sufficient for any use-case. The logic can be simplified by using a single value for all kernel configurations. [ Also add a warning, should this memory run low - by Dave Hansen. ] Fixes: 34bbb0009f3b ("x86/boot/compressed: Enable 5-level paging during decompression stage") Reported-by: Aaron Lu Signed-off-by: Kirill A. Shutemov Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915070221.10266-1-kirill.shutemov@linux.intel.com --- arch/x86/boot/compressed/ident_map_64.c | 8 +++++ arch/x86/include/asm/boot.h | 47 +++++++++++++++++-------- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index bcc956c17872..08f93b0401bb 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -59,6 +59,14 @@ static void *alloc_pgt_page(void *context) return NULL; } + /* Consumed more tables than expected? */ + if (pages->pgt_buf_offset == BOOT_PGT_SIZE_WARN) { + debug_putstr("pgt_buf running low in " __FILE__ "\n"); + debug_putstr("Need to raise BOOT_PGT_SIZE?\n"); + debug_putaddr(pages->pgt_buf_offset); + debug_putaddr(pages->pgt_buf_size); + } + entry = pages->pgt_buf + pages->pgt_buf_offset; pages->pgt_buf_offset += PAGE_SIZE; diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 4ae14339cb8c..b3a7cfb0d99e 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -40,23 +40,40 @@ #ifdef CONFIG_X86_64 # define BOOT_STACK_SIZE 0x4000 -# define BOOT_INIT_PGT_SIZE (6*4096) -# ifdef CONFIG_RANDOMIZE_BASE /* - * Assuming all cross the 512GB boundary: - * 1 page for level4 - * (2+2)*4 pages for kernel, param, cmd_line, and randomized kernel - * 2 pages for first 2M (video RAM: CONFIG_X86_VERBOSE_BOOTUP). - * Total is 19 pages. + * Used by decompressor's startup_32() to allocate page tables for identity + * mapping of the 4G of RAM in 4-level paging mode: + * - 1 level4 table; + * - 1 level3 table; + * - 4 level2 table that maps everything with 2M pages; + * + * The additional level5 table needed for 5-level paging is allocated from + * trampoline_32bit memory. */ -# ifdef CONFIG_X86_VERBOSE_BOOTUP -# define BOOT_PGT_SIZE (19*4096) -# else /* !CONFIG_X86_VERBOSE_BOOTUP */ -# define BOOT_PGT_SIZE (17*4096) -# endif -# else /* !CONFIG_RANDOMIZE_BASE */ -# define BOOT_PGT_SIZE BOOT_INIT_PGT_SIZE -# endif +# define BOOT_INIT_PGT_SIZE (6*4096) + +/* + * Total number of page tables kernel_add_identity_map() can allocate, + * including page tables consumed by startup_32(). + * + * Worst-case scenario: + * - 5-level paging needs 1 level5 table; + * - KASLR needs to map kernel, boot_params, cmdline and randomized kernel, + * assuming all of them cross 256T boundary: + * + 4*2 level4 table; + * + 4*2 level3 table; + * + 4*2 level2 table; + * - X86_VERBOSE_BOOTUP needs to map the first 2M (video RAM): + * + 1 level4 table; + * + 1 level3 table; + * + 1 level2 table; + * Total: 28 tables + * + * Add 4 spare table in case decompressor touches anything beyond what is + * accounted above. Warn if it happens. + */ +# define BOOT_PGT_SIZE_WARN (28*4096) +# define BOOT_PGT_SIZE (32*4096) #else /* !CONFIG_X86_64 */ # define BOOT_STACK_SIZE 0x1000 From 75b2f7e4c9e0fd750a5a27ca9736d1daa7a3762a Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 14 Sep 2023 10:01:38 -0700 Subject: [PATCH 264/333] x86/purgatory: Remove LTO flags -flto* implies -ffunction-sections. With LTO enabled, ld.lld generates multiple .text sections for purgatory.ro: $ readelf -S purgatory.ro | grep " .text" [ 1] .text PROGBITS 0000000000000000 00000040 [ 7] .text.purgatory PROGBITS 0000000000000000 000020e0 [ 9] .text.warn PROGBITS 0000000000000000 000021c0 [13] .text.sha256_upda PROGBITS 0000000000000000 000022f0 [15] .text.sha224_upda PROGBITS 0000000000000000 00002be0 [17] .text.sha256_fina PROGBITS 0000000000000000 00002bf0 [19] .text.sha224_fina PROGBITS 0000000000000000 00002cc0 This causes WARNING from kexec_purgatory_setup_sechdrs(): WARNING: CPU: 26 PID: 110894 at kernel/kexec_file.c:919 kexec_load_purgatory+0x37f/0x390 Fix this by disabling LTO for purgatory. [ AFAICT, x86 is the only arch that supports LTO and purgatory. ] We could also fix this with an explicit linker script to rejoin .text.* sections back into .text. However, given the benefit of LTOing purgatory is small, simply disable the production of more .text.* sections for now. Fixes: b33fff07e3e3 ("x86, build: allow LTO to be selected") Signed-off-by: Song Liu Signed-off-by: Ingo Molnar Reviewed-by: Nick Desaulniers Reviewed-by: Sami Tolvanen Link: https://lore.kernel.org/r/20230914170138.995606-1-song@kernel.org --- arch/x86/purgatory/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index c2a29be35c01..08aa0f25f12a 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -19,6 +19,10 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY # optimization flags. KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS)) +# When LTO is enabled, llvm emits many text sections, which is not supported +# by kexec. Remove -flto=* flags. +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS)) + # When linking purgatory.ro with -r unresolved symbols are not checked, # also link a purgatory.chk binary without -r to check for unresolved symbols. PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib From 0113d9c9d1ccc07f5a3710dac4aa24b6d711278c Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 14 Sep 2023 22:12:57 -0700 Subject: [PATCH 265/333] ipv4: fix null-deref in ipv4_link_failure Currently, we assume the skb is associated with a device before calling __ip_options_compile, which is not always the case if it is re-routed by ipvs. When skb->dev is NULL, dev_net(skb->dev) will become null-dereference. This patch adds a check for the edge case and switch to use the net_device from the rtable when skb->dev is NULL. Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure") Suggested-by: David Ahern Signed-off-by: Kyle Zeng Cc: Stephen Suryaputra Cc: Vadim Fedorenko Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 66f419e7f9a7..a57062283219 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1213,6 +1213,7 @@ EXPORT_INDIRECT_CALLABLE(ipv4_dst_check); static void ipv4_send_dest_unreach(struct sk_buff *skb) { + struct net_device *dev; struct ip_options opt; int res; @@ -1230,7 +1231,8 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb) opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); rcu_read_lock(); - res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev; + res = __ip_options_compile(dev_net(dev), &opt, skb, NULL); rcu_read_unlock(); if (res) From f4f82c52a0ead5ab363d207d06f81b967d09ffb8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Sep 2023 17:11:11 +0000 Subject: [PATCH 266/333] scsi: iscsi_tcp: restrict to TCP sockets Nothing prevents iscsi_sw_tcp_conn_bind() to receive file descriptor pointing to non TCP socket (af_unix for example). Return -EINVAL if this is attempted, instead of crashing the kernel. Fixes: 7ba247138907 ("[SCSI] open-iscsi/linux-iscsi-5 Initiator: Initiator code") Signed-off-by: Eric Dumazet Cc: Lee Duncan Cc: Chris Leech Cc: Mike Christie Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: open-iscsi@googlegroups.com Cc: linux-scsi@vger.kernel.org Reviewed-by: Mike Christie Signed-off-by: David S. Miller --- drivers/scsi/iscsi_tcp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 9ab8555180a3..8e14cea15f98 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -724,6 +724,10 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session, return -EEXIST; } + err = -EINVAL; + if (!sk_is_tcp(sock->sk)) + goto free_socket; + err = iscsi_conn_bind(cls_session, cls_conn, is_leading); if (err) goto free_socket; From 42aadec8c739727fce8e2c1ee71e72cb0f82ed3f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Sep 2023 11:09:56 -0700 Subject: [PATCH 267/333] stat: remove no-longer-used helper macros The choose_32_64() macros were added to deal with an odd inconsistency between the 32-bit and 64-bit layout of 'struct stat' way back when in commit a52dd971f947 ("vfs: de-crapify "cp_new_stat()" function"). Then a decade later Mikulas noticed that said inconsistency had been a mistake in the early x86-64 port, and shouldn't have existed in the first place. So commit 932aba1e1690 ("stat: fix inconsistency between struct stat and struct compat_stat") removed the uses of the helpers. But the helpers remained around, unused. Get rid of them. Signed-off-by: Linus Torvalds --- fs/stat.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/stat.c b/fs/stat.c index 6822ac77aec2..6e60389d6a15 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -419,12 +419,6 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat #ifdef __ARCH_WANT_NEW_STAT -#if BITS_PER_LONG == 32 -# define choose_32_64(a,b) a -#else -# define choose_32_64(a,b) b -#endif - #ifndef INIT_STRUCT_STAT_PADDING # define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st)) #endif From ce9ecca0238b140b88f43859b211c9fdfd8e5b70 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 Sep 2023 14:40:24 -0700 Subject: [PATCH 268/333] Linux 6.6-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ceb23eed4dce..57698d048e2c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 4ff3ba4db5943cac1045e3e4a3c0463ea10f6930 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 25 Aug 2023 11:26:01 +0530 Subject: [PATCH 269/333] powerpc/perf/hv-24x7: Update domain value check Valid domain value is in range 1 to HV_PERF_DOMAIN_MAX. Current code has check for domain value greater than or equal to HV_PERF_DOMAIN_MAX. But the check for domain value 0 is missing. Fix this issue by adding check for domain value 0. Before: # ./perf stat -v -e hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/ sleep 1 Using CPUID 00800200 Control descriptor is not initialized Error: The sys_perf_event_open() syscall returned with 5 (Input/output error) for event (hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/). /bin/dmesg | grep -i perf may provide additional information. Result from dmesg: [ 37.819387] hv-24x7: hcall failed: [0 0x60040000 0x100 0] => ret 0xfffffffffffffffc (-4) detail=0x2000000 failing ix=0 After: # ./perf stat -v -e hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/ sleep 1 Using CPUID 00800200 Control descriptor is not initialized Warning: hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/ event is not supported by the kernel. failed to read counter hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/ Fixes: ebd4a5a3ebd9 ("powerpc/perf/hv-24x7: Minor improvements") Reported-by: Krishan Gopal Sarawast Signed-off-by: Kajol Jain Tested-by: Disha Goel Signed-off-by: Michael Ellerman Link: https://msgid.link/20230825055601.360083-1-kjain@linux.ibm.com --- arch/powerpc/perf/hv-24x7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 317175791d23..3449be7c0d51 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1418,7 +1418,7 @@ static int h_24x7_event_init(struct perf_event *event) } domain = event_get_domain(event); - if (domain >= HV_PERF_DOMAIN_MAX) { + if (domain == 0 || domain >= HV_PERF_DOMAIN_MAX) { pr_devel("invalid domain %d\n", domain); return -EINVAL; } From cc879ab3ce39bc39f9b1d238b283f43a5f6f957d Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 29 Aug 2023 16:34:55 +1000 Subject: [PATCH 270/333] powerpc/watchpoints: Disable preemption in thread_change_pc() thread_change_pc() uses CPU local data, so must be protected from swapping CPUs while it is reading the breakpoint struct. The error is more noticeable after 1e60f3564bad ("powerpc/watchpoints: Track perf single step directly on the breakpoint"), which added an unconditional __this_cpu_read() call in thread_change_pc(). However the existing __this_cpu_read() that runs if a breakpoint does need to be re-inserted has the same issue. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230829063457.54157-2-bgray@linux.ibm.com --- arch/powerpc/kernel/hw_breakpoint.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index b8513dc3e53a..2854376870cf 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -230,13 +230,15 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) struct arch_hw_breakpoint *info; int i; + preempt_disable(); + for (i = 0; i < nr_wp_slots(); i++) { struct perf_event *bp = __this_cpu_read(bp_per_reg[i]); if (unlikely(bp && counter_arch_bp(bp)->perf_single_step)) goto reset; } - return; + goto out; reset: regs_set_return_msr(regs, regs->msr & ~MSR_SE); @@ -245,6 +247,9 @@ reset: __set_breakpoint(i, info); info->perf_single_step = false; } + +out: + preempt_enable(); } static bool is_larx_stcx_instr(int type) From 3241f260eb830d27d09cc604690ec24533fdb433 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 29 Aug 2023 16:34:56 +1000 Subject: [PATCH 271/333] powerpc/watchpoint: Disable pagefaults when getting user instruction This is called in an atomic context, so is not allowed to sleep if a user page needs to be faulted in and has nowhere it can be deferred to. The pagefault_disabled() function is documented as preventing user access methods from sleeping. In practice the page will be mapped in nearly always because we are reading the instruction that just triggered the watchpoint trap. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230829063457.54157-3-bgray@linux.ibm.com --- arch/powerpc/kernel/hw_breakpoint_constraints.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c index a74623025f3a..9e51801c4915 100644 --- a/arch/powerpc/kernel/hw_breakpoint_constraints.c +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -131,8 +131,13 @@ void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, int *type, int *size, unsigned long *ea) { struct instruction_op op; + int err; - if (__get_user_instr(*instr, (void __user *)regs->nip)) + pagefault_disable(); + err = __get_user_instr(*instr, (void __user *)regs->nip); + pagefault_enable(); + + if (err) return; analyse_instr(&op, regs, *instr); From 27646b2e02b096a6936b3e3b6ba334ae20763eab Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 29 Aug 2023 16:34:57 +1000 Subject: [PATCH 272/333] powerpc/watchpoints: Annotate atomic context in more places It can be easy to miss that the notifier mechanism invokes the callbacks in an atomic context, so add some comments to that effect on the two handlers we register here. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230829063457.54157-4-bgray@linux.ibm.com --- arch/powerpc/kernel/hw_breakpoint.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 2854376870cf..a1318ce18d0e 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -368,6 +368,11 @@ static void handle_p10dd1_spurious_exception(struct perf_event **bp, } } +/* + * Handle a DABR or DAWR exception. + * + * Called in atomic context. + */ int hw_breakpoint_handler(struct die_args *args) { bool err = false; @@ -495,6 +500,8 @@ NOKPROBE_SYMBOL(hw_breakpoint_handler); /* * Handle single-step exceptions following a DABR hit. + * + * Called in atomic context. */ static int single_step_dabr_instruction(struct die_args *args) { @@ -546,6 +553,8 @@ NOKPROBE_SYMBOL(single_step_dabr_instruction); /* * Handle debug exception notifications. + * + * Called in atomic context. */ int hw_breakpoint_exceptions_notify( struct notifier_block *unused, unsigned long val, void *data) From 60d77ed24bb3068c0837fe45b8921b0a6598829d Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 13 Sep 2023 19:11:29 +0530 Subject: [PATCH 273/333] powerpc: Fix build issue with LD_DEAD_CODE_DATA_ELIMINATION and FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY We recently added support for -fpatchable-function-entry and it is enabled by default on ppc32 (ppc64 needs gcc v13.1.0). When building the kernel for ppc32 and also enabling CONFIG_LD_DEAD_CODE_DATA_ELIMINATION, we see the below build error with older gcc versions: powerpc-linux-gnu-ld: init/main.o(__patchable_function_entries): error: need linked-to section for --gc-sections This error is thrown since __patchable_function_entries section would be garbage collected with --gc-sections since it does not reference any other kept sections. This has subsequently been fixed with: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=b7d072167715829eed0622616f6ae0182900de3e Disable LD_DEAD_CODE_DATA_ELIMINATION for gcc versions before v11.1.0 if using -fpatchable-function-entry to avoid this bug. Fixes: 0f71dcfb4aef ("powerpc/ftrace: Add support for -fpatchable-function-entry") Reported-by: Michael Ellerman Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/20230913134129.2782088-1-naveen@kernel.org --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 54b9387c3691..3aaadfd2c8eb 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -255,7 +255,7 @@ config PPC select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES - select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if HAVE_OBJTOOL_MCOUNT + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if HAVE_OBJTOOL_MCOUNT && (!ARCH_USING_PATCHABLE_FUNCTION_ENTRY || (!CC_IS_GCC || GCC_VERSION >= 110100)) select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) From 6901a9f9ef1561111283a0d8c8d1cea634d089ef Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 14 Sep 2023 17:23:45 +0200 Subject: [PATCH 274/333] powerpc/82xx: Select FSL_SOC It used to be impossible to select CONFIG_CPM2 without selecting CONFIG_FSL_SOC at the same time because CONFIG_CPM2 was dependent on CONFIG_8260 and CONFIG_8260 was selecting CONFIG_FSL_SOC. But after commit eb5aa2137275 ("powerpc/82xx: Remove CONFIG_8260 and CONFIG_8272") CONFIG_CPM2 depends on CONFIG_PPC_82xx instead but CONFIG_PPC_82xx doesn't directly selects CONFIG_FSL_SOC. Fix it by forcing CONFIG_PPC_82xx to select CONFIG_FSL_SOC just like already done by PPC_8xx, PPC_MPC512x, PPC_83xx, PPC_86xx. Reported-by: Randy Dunlap Fixes: eb5aa2137275 ("powerpc/82xx: Remove CONFIG_8260 and CONFIG_8272") Signed-off-by: Christophe Leroy Tested-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://msgid.link/7ab513546148ebe33ddd4b0ea92c7bfd3cce3ad7.1694705016.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/82xx/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig index d9f1a2a83158..1824536cf6f2 100644 --- a/arch/powerpc/platforms/82xx/Kconfig +++ b/arch/powerpc/platforms/82xx/Kconfig @@ -2,6 +2,7 @@ menuconfig PPC_82xx bool "82xx-based boards (PQ II)" depends on PPC_BOOK3S_32 + select FSL_SOC if PPC_82xx @@ -9,7 +10,6 @@ config EP8248E bool "Embedded Planet EP8248E (a.k.a. CWH-PPC-8248N-VE)" select CPM2 select PPC_INDIRECT_PCI if PCI - select FSL_SOC select PHYLIB if NETDEVICES select MDIO_BITBANG if PHYLIB help @@ -22,7 +22,6 @@ config MGCOGE bool "Keymile MGCOGE" select CPM2 select PPC_INDIRECT_PCI if PCI - select FSL_SOC help This enables support for the Keymile MGCOGE board. From c3f4309693758b13fbb34b3741c2e2801ad28769 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Fri, 15 Sep 2023 13:46:04 +1000 Subject: [PATCH 275/333] powerpc/dexcr: Move HASHCHK trap handler Syzkaller reported a sleep in atomic context bug relating to the HASHCHK handler logic: BUG: sleeping function called from invalid context at arch/powerpc/kernel/traps.c:1518 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 25040, name: syz-executor preempt_count: 0, expected: 0 RCU nest depth: 0, expected: 0 no locks held by syz-executor/25040. irq event stamp: 34 hardirqs last enabled at (33): [] prep_irq_for_enabled_exit arch/powerpc/kernel/interrupt.c:56 [inline] hardirqs last enabled at (33): [] interrupt_exit_user_prepare_main+0x148/0x600 arch/powerpc/kernel/interrupt.c:230 hardirqs last disabled at (34): [] interrupt_enter_prepare+0x144/0x4f0 arch/powerpc/include/asm/interrupt.h:176 softirqs last enabled at (0): [] copy_process+0x16e4/0x4750 kernel/fork.c:2436 softirqs last disabled at (0): [<0000000000000000>] 0x0 CPU: 15 PID: 25040 Comm: syz-executor Not tainted 6.5.0-rc5-00001-g3ccdff6bb06d #3 Hardware name: IBM,9105-22A POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1040.00 (NL1040_021) hv:phyp pSeries Call Trace: [c0000000a8247ce0] [c00000000032b0e4] __might_resched+0x3b4/0x400 kernel/sched/core.c:10189 [c0000000a8247d80] [c0000000008c7dc8] __might_fault+0xa8/0x170 mm/memory.c:5853 [c0000000a8247dc0] [c00000000004160c] do_program_check+0x32c/0xb20 arch/powerpc/kernel/traps.c:1518 [c0000000a8247e50] [c000000000009b2c] program_check_common_virt+0x3bc/0x3c0 To determine if a trap was caused by a HASHCHK instruction, we inspect the user instruction that triggered the trap. However this may sleep if the page needs to be faulted in (get_user_instr() reaches __get_user(), which calls might_fault() and triggers the bug message). Move the HASHCHK handler logic to after we allow IRQs, which is fine because we are only interested in HASHCHK if it's a user space trap. Fixes: 5bcba4e6c13f ("powerpc/dexcr: Handle hashchk exception") Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230915034604.45393-1-bgray@linux.ibm.com --- arch/powerpc/kernel/traps.c | 56 ++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index eeff136b83d9..64ff37721fd0 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1512,23 +1512,11 @@ static void do_program_check(struct pt_regs *regs) return; } - if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) && user_mode(regs)) { - ppc_inst_t insn; - - if (get_user_instr(insn, (void __user *)regs->nip)) { - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; - } - - if (ppc_inst_primary_opcode(insn) == 31 && - get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) { - _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); - return; - } + /* User mode considers other cases after enabling IRQs */ + if (!user_mode(regs)) { + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + return; } - - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - return; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (reason & REASON_TM) { @@ -1561,16 +1549,44 @@ static void do_program_check(struct pt_regs *regs) /* * If we took the program check in the kernel skip down to sending a - * SIGILL. The subsequent cases all relate to emulating instructions - * which we should only do for userspace. We also do not want to enable - * interrupts for kernel faults because that might lead to further - * faults, and loose the context of the original exception. + * SIGILL. The subsequent cases all relate to user space, such as + * emulating instructions which we should only do for user space. We + * also do not want to enable interrupts for kernel faults because that + * might lead to further faults, and loose the context of the original + * exception. */ if (!user_mode(regs)) goto sigill; interrupt_cond_local_irq_enable(regs); + /* + * (reason & REASON_TRAP) is mostly handled before enabling IRQs, + * except get_user_instr() can sleep so we cannot reliably inspect the + * current instruction in that context. Now that we know we are + * handling a user space trap and can sleep, we can check if the trap + * was a hashchk failure. + */ + if (reason & REASON_TRAP) { + if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) { + ppc_inst_t insn; + + if (get_user_instr(insn, (void __user *)regs->nip)) { + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return; + } + + if (ppc_inst_primary_opcode(insn) == 31 && + get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) { + _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); + return; + } + } + + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + return; + } + /* (reason & REASON_ILLEGAL) would be the obvious thing here, * but there seems to be a hardware bug on the 405GP (RevD) * that means ESR is sometimes set incorrectly - either to From 3780bb29311eccb7a1c9641032a112eed237f7e3 Mon Sep 17 00:00:00 2001 From: Johnathan Mantey Date: Fri, 15 Sep 2023 09:12:35 -0700 Subject: [PATCH 276/333] ncsi: Propagate carrier gain/loss events to the NCSI controller Report the carrier/no-carrier state for the network interface shared between the BMC and the passthrough channel. Without this functionality the BMC is unable to reconfigure the NIC in the event of a re-cabling to a different subnet. Signed-off-by: Johnathan Mantey Signed-off-by: David S. Miller --- net/ncsi/ncsi-aen.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index 62fb1031763d..f8854bff286c 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -89,6 +89,11 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, if ((had_link == has_link) || chained) return 0; + if (had_link) + netif_carrier_off(ndp->ndev.dev); + else + netif_carrier_on(ndp->ndev.dev); + if (!ndp->multi_package && !nc->package->multi_channel) { if (had_link) { ndp->flags |= NCSI_DEV_RESHUFFLE; From 6af289746a636f71f4c0535a9801774118486c7a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Sep 2023 19:00:35 +0000 Subject: [PATCH 277/333] dccp: fix dccp_v4_err()/dccp_v6_err() again dh->dccph_x is the 9th byte (offset 8) in "struct dccp_hdr", not in the "byte 7" as Jann claimed. We need to make sure the ICMP messages are big enough, using more standard ways (no more assumptions). syzbot reported: BUG: KMSAN: uninit-value in pskb_may_pull_reason include/linux/skbuff.h:2667 [inline] BUG: KMSAN: uninit-value in pskb_may_pull include/linux/skbuff.h:2681 [inline] BUG: KMSAN: uninit-value in dccp_v6_err+0x426/0x1aa0 net/dccp/ipv6.c:94 pskb_may_pull_reason include/linux/skbuff.h:2667 [inline] pskb_may_pull include/linux/skbuff.h:2681 [inline] dccp_v6_err+0x426/0x1aa0 net/dccp/ipv6.c:94 icmpv6_notify+0x4c7/0x880 net/ipv6/icmp.c:867 icmpv6_rcv+0x19d5/0x30d0 ip6_protocol_deliver_rcu+0xda6/0x2a60 net/ipv6/ip6_input.c:438 ip6_input_finish net/ipv6/ip6_input.c:483 [inline] NF_HOOK include/linux/netfilter.h:304 [inline] ip6_input+0x15d/0x430 net/ipv6/ip6_input.c:492 ip6_mc_input+0xa7e/0xc80 net/ipv6/ip6_input.c:586 dst_input include/net/dst.h:468 [inline] ip6_rcv_finish+0x5db/0x870 net/ipv6/ip6_input.c:79 NF_HOOK include/linux/netfilter.h:304 [inline] ipv6_rcv+0xda/0x390 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core net/core/dev.c:5523 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5637 netif_receive_skb_internal net/core/dev.c:5723 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5782 tun_rx_batched+0x83b/0x920 tun_get_user+0x564c/0x6940 drivers/net/tun.c:2002 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:1985 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x8ef/0x15c0 fs/read_write.c:584 ksys_write+0x20f/0x4c0 fs/read_write.c:637 __do_sys_write fs/read_write.c:649 [inline] __se_sys_write fs/read_write.c:646 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:646 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Uninit was created at: slab_post_alloc_hook+0x12f/0xb70 mm/slab.h:767 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x577/0xa80 mm/slub.c:3523 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:559 __alloc_skb+0x318/0x740 net/core/skbuff.c:650 alloc_skb include/linux/skbuff.h:1286 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6313 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2795 tun_alloc_skb drivers/net/tun.c:1531 [inline] tun_get_user+0x23cf/0x6940 drivers/net/tun.c:1846 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:1985 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x8ef/0x15c0 fs/read_write.c:584 ksys_write+0x20f/0x4c0 fs/read_write.c:637 __do_sys_write fs/read_write.c:649 [inline] __se_sys_write fs/read_write.c:646 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:646 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd CPU: 0 PID: 4995 Comm: syz-executor153 Not tainted 6.6.0-rc1-syzkaller-00014-ga747acc0b752 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023 Fixes: 977ad86c2a1b ("dccp: Fix out of bounds access in DCCP error handler") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Jann Horn Reviewed-by: Jann Horn Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 9 ++------- net/dccp/ipv6.c | 9 ++------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8f56e8723c73..69453b936bd5 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -254,13 +254,8 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x, - * which is in byte 7 of the dccp header. - * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. - * - * Later on, we want to access the sequence number fields, which are - * beyond 8 bytes, so we have to pskb_may_pull() ourselves. - */ + if (!pskb_may_pull(skb, offset + sizeof(*dh))) + return -EINVAL; dh = (struct dccp_hdr *)(skb->data + offset); if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh))) return -EINVAL; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 33f6ccf6ba77..c693a570682f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -83,13 +83,8 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; struct net *net = dev_net(skb->dev); - /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x, - * which is in byte 7 of the dccp header. - * Our caller (icmpv6_notify()) already pulled 8 bytes for us. - * - * Later on, we want to access the sequence number fields, which are - * beyond 8 bytes, so we have to pskb_may_pull() ourselves. - */ + if (!pskb_may_pull(skb, offset + sizeof(*dh))) + return -EINVAL; dh = (struct dccp_hdr *)(skb->data + offset); if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh))) return -EINVAL; From 295de650d3aaf9e50258465c5f1c84b465d836f6 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Fri, 15 Sep 2023 20:10:02 +0200 Subject: [PATCH 278/333] net: hsr: Properly parse HSRv1 supervisor frames. While adding support for parsing the redbox supervision frames, the author added `pull_size' and `total_pull_size' to track the amount of bytes that were pulled from the skb during while parsing the skb so it can be reverted/ pushed back at the end. In the process probably copy&paste error occurred and for the HSRv1 case the ethhdr was used instead of the hsr_tag. Later the hsr_tag was used instead of hsr_sup_tag. The later error didn't matter because both structs have the size so HSRv0 was still working. It broke however HSRv1 parsing because struct ethhdr is larger than struct hsr_tag. Reinstate the old pulling flow and pull first ethhdr, hsr_tag in v1 case followed by hsr_sup_tag. [bigeasy: commit message] Fixes: eafaa88b3eb7 ("net: hsr: Add support for redbox supervision frames")' Suggested-by: Tristram.Ha@microchip.com Signed-off-by: Lukasz Majewski Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- net/hsr/hsr_framereg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index b77f1189d19d..6d14d935ee82 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -288,13 +288,13 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) /* And leave the HSR tag. */ if (ethhdr->h_proto == htons(ETH_P_HSR)) { - pull_size = sizeof(struct ethhdr); + pull_size = sizeof(struct hsr_tag); skb_pull(skb, pull_size); total_pull_size += pull_size; } /* And leave the HSR sup tag. */ - pull_size = sizeof(struct hsr_tag); + pull_size = sizeof(struct hsr_sup_tag); skb_pull(skb, pull_size); total_pull_size += pull_size; From fbd825fcd7dd4c11d4c48c3d0adc248a4a0ce90b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 15 Sep 2023 20:10:03 +0200 Subject: [PATCH 279/333] net: hsr: Add __packed to struct hsr_sup_tlv. Struct hsr_sup_tlv describes HW layout and therefore it needs a __packed attribute to ensure the compiler does not add any padding. Due to the size and __packed attribute of the structs that use hsr_sup_tlv it has no functional impact. Add __packed to struct hsr_sup_tlv. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- net/hsr/hsr_main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 6851e33df7d1..18e01791ad79 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -83,7 +83,7 @@ struct hsr_vlan_ethhdr { struct hsr_sup_tlv { u8 HSR_TLV_type; u8 HSR_TLV_length; -}; +} __packed; /* HSR/PRP Supervision Frame data types. * Field names as defined in the IEC:2010 standard for HSR. From 5c3ce539a11185268aff3bb30d2fad8c7fa42f86 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 15 Sep 2023 20:10:04 +0200 Subject: [PATCH 280/333] selftests: hsr: Use `let' properly. The timeout in the while loop is never subtracted due wrong usage of `let' leading to an endless loop if the former condition never gets true. Put the statement for let in quotes so it is parsed as a single statement. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- tools/testing/selftests/net/hsr/hsr_ping.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index df9143538708..183f4a0f19dd 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -197,7 +197,7 @@ do break fi sleep 1 - let WAIT = WAIT - 1 + let "WAIT = WAIT - 1" done # Just a safety delay in case the above check didn't handle it. From d53f23fe164c24335d001cf725599a95e6fdf92d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 15 Sep 2023 20:10:05 +0200 Subject: [PATCH 281/333] selftests: hsr: Reorder the testsuite. Move the code and group into functions so it will be easier to extend the test to HSRv1 so that both versions are covered. Move the ping/test part into do_complete_ping_test() and the interface setup into setup_hsr_interfaces(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- tools/testing/selftests/net/hsr/hsr_ping.sh | 249 ++++++++++---------- 1 file changed, 129 insertions(+), 120 deletions(-) diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index 183f4a0f19dd..d4613b7b7188 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -41,61 +41,6 @@ cleanup() done } -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -trap cleanup EXIT - -for i in "$ns1" "$ns2" "$ns3" ;do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done - -echo "INFO: preparing interfaces." -# Three HSR nodes. Each node has one link to each of its neighbour, two links in total. -# -# ns1eth1 ----- ns2eth1 -# hsr1 hsr2 -# ns1eth2 ns2eth2 -# | | -# ns3eth1 ns3eth2 -# \ / -# hsr3 -# -# Interfaces -ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" -ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" -ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" - -# HSRv0. -ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0 -ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0 -ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0 - -# IP for HSR -ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 -ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad -ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 -ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad -ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 -ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad - -# All Links up -ip -net "$ns1" link set ns1eth1 up -ip -net "$ns1" link set ns1eth2 up -ip -net "$ns1" link set hsr1 up - -ip -net "$ns2" link set ns2eth1 up -ip -net "$ns2" link set ns2eth2 up -ip -net "$ns2" link set hsr2 up - -ip -net "$ns3" link set ns3eth1 up -ip -net "$ns3" link set ns3eth2 up -ip -net "$ns3" link set hsr3 up - # $1: IP address is_v6() { @@ -164,93 +109,157 @@ stop_if_error() fi } +do_complete_ping_test() +{ + echo "INFO: Initial validation ping." + # Each node has to be able each one. + do_ping "$ns1" 100.64.0.2 + do_ping "$ns2" 100.64.0.1 + do_ping "$ns3" 100.64.0.1 + stop_if_error "Initial validation failed." -echo "INFO: Initial validation ping." -# Each node has to be able each one. -do_ping "$ns1" 100.64.0.2 -do_ping "$ns2" 100.64.0.1 -do_ping "$ns3" 100.64.0.1 -stop_if_error "Initial validation failed." + do_ping "$ns1" 100.64.0.3 + do_ping "$ns2" 100.64.0.3 + do_ping "$ns3" 100.64.0.2 -do_ping "$ns1" 100.64.0.3 -do_ping "$ns2" 100.64.0.3 -do_ping "$ns3" 100.64.0.2 + do_ping "$ns1" dead:beef:1::2 + do_ping "$ns1" dead:beef:1::3 + do_ping "$ns2" dead:beef:1::1 + do_ping "$ns2" dead:beef:1::2 + do_ping "$ns3" dead:beef:1::1 + do_ping "$ns3" dead:beef:1::2 -do_ping "$ns1" dead:beef:1::2 -do_ping "$ns1" dead:beef:1::3 -do_ping "$ns2" dead:beef:1::1 -do_ping "$ns2" dead:beef:1::2 -do_ping "$ns3" dead:beef:1::1 -do_ping "$ns3" dead:beef:1::2 - -stop_if_error "Initial validation failed." + stop_if_error "Initial validation failed." # Wait until supervisor all supervision frames have been processed and the node # entries have been merged. Otherwise duplicate frames will be observed which is # valid at this stage. -WAIT=5 -while [ ${WAIT} -gt 0 ] -do - grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table - if [ $? -ne 0 ] - then - break - fi - sleep 1 - let "WAIT = WAIT - 1" -done + WAIT=5 + while [ ${WAIT} -gt 0 ] + do + grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table + if [ $? -ne 0 ] + then + break + fi + sleep 1 + let "WAIT = WAIT - 1" + done # Just a safety delay in case the above check didn't handle it. -sleep 1 + sleep 1 -echo "INFO: Longer ping test." -do_ping_long "$ns1" 100.64.0.2 -do_ping_long "$ns1" dead:beef:1::2 -do_ping_long "$ns1" 100.64.0.3 -do_ping_long "$ns1" dead:beef:1::3 + echo "INFO: Longer ping test." + do_ping_long "$ns1" 100.64.0.2 + do_ping_long "$ns1" dead:beef:1::2 + do_ping_long "$ns1" 100.64.0.3 + do_ping_long "$ns1" dead:beef:1::3 -stop_if_error "Longer ping test failed." + stop_if_error "Longer ping test failed." -do_ping_long "$ns2" 100.64.0.1 -do_ping_long "$ns2" dead:beef:1::1 -do_ping_long "$ns2" 100.64.0.3 -do_ping_long "$ns2" dead:beef:1::2 -stop_if_error "Longer ping test failed." + do_ping_long "$ns2" 100.64.0.1 + do_ping_long "$ns2" dead:beef:1::1 + do_ping_long "$ns2" 100.64.0.3 + do_ping_long "$ns2" dead:beef:1::2 + stop_if_error "Longer ping test failed." -do_ping_long "$ns3" 100.64.0.1 -do_ping_long "$ns3" dead:beef:1::1 -do_ping_long "$ns3" 100.64.0.2 -do_ping_long "$ns3" dead:beef:1::2 -stop_if_error "Longer ping test failed." + do_ping_long "$ns3" 100.64.0.1 + do_ping_long "$ns3" dead:beef:1::1 + do_ping_long "$ns3" 100.64.0.2 + do_ping_long "$ns3" dead:beef:1::2 + stop_if_error "Longer ping test failed." -echo "INFO: Cutting one link." -do_ping_long "$ns1" 100.64.0.3 & + echo "INFO: Cutting one link." + do_ping_long "$ns1" 100.64.0.3 & -sleep 3 -ip -net "$ns3" link set ns3eth1 down -wait + sleep 3 + ip -net "$ns3" link set ns3eth1 down + wait -ip -net "$ns3" link set ns3eth1 up + ip -net "$ns3" link set ns3eth1 up -stop_if_error "Failed with one link down." + stop_if_error "Failed with one link down." -echo "INFO: Delay the link and drop a few packages." -tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms -tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% + echo "INFO: Delay the link and drop a few packages." + tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms + tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% -do_ping_long "$ns1" 100.64.0.2 -do_ping_long "$ns1" 100.64.0.3 + do_ping_long "$ns1" 100.64.0.2 + do_ping_long "$ns1" 100.64.0.3 -stop_if_error "Failed with delay and packetloss." + stop_if_error "Failed with delay and packetloss." -do_ping_long "$ns2" 100.64.0.1 -do_ping_long "$ns2" 100.64.0.3 + do_ping_long "$ns2" 100.64.0.1 + do_ping_long "$ns2" 100.64.0.3 -stop_if_error "Failed with delay and packetloss." + stop_if_error "Failed with delay and packetloss." -do_ping_long "$ns3" 100.64.0.1 -do_ping_long "$ns3" 100.64.0.2 -stop_if_error "Failed with delay and packetloss." + do_ping_long "$ns3" 100.64.0.1 + do_ping_long "$ns3" 100.64.0.2 + stop_if_error "Failed with delay and packetloss." + + echo "INFO: All good." +} + +setup_hsr_interfaces() +{ + echo "INFO: preparing interfaces." +# Three HSR nodes. Each node has one link to each of its neighbour, two links in total. +# +# ns1eth1 ----- ns2eth1 +# hsr1 hsr2 +# ns1eth2 ns2eth2 +# | | +# ns3eth1 ns3eth2 +# \ / +# hsr3 +# + # Interfaces + ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" + ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" + ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" + + # HSRv0. + ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0 + ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0 + ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0 + + # IP for HSR + ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 + ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad + ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 + ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad + ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 + ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + + # All Links up + ip -net "$ns1" link set ns1eth1 up + ip -net "$ns1" link set ns1eth2 up + ip -net "$ns1" link set hsr1 up + + ip -net "$ns2" link set ns2eth1 up + ip -net "$ns2" link set ns2eth2 up + ip -net "$ns2" link set hsr2 up + + ip -net "$ns3" link set ns3eth1 up + ip -net "$ns3" link set ns3eth2 up + ip -net "$ns3" link set hsr3 up +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +trap cleanup EXIT + +for i in "$ns1" "$ns2" "$ns3" ;do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done + +setup_hsr_interfaces +do_complete_ping_test -echo "INFO: All good." exit $ret From b0e9c3b5fdafbe60e7a82be69439f95e06a4de39 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 15 Sep 2023 20:10:06 +0200 Subject: [PATCH 282/333] selftests: hsr: Extend the testsuite to also cover HSRv1. The testsuite already has simply tests for HSRv0. The testuite would have been able to notice the v1 breakage if it was there at the time. Extend the testsuite to also cover HSRv1. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- tools/testing/selftests/net/hsr/hsr_ping.sh | 23 +++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index d4613b7b7188..1c6457e54625 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -203,7 +203,9 @@ do_complete_ping_test() setup_hsr_interfaces() { - echo "INFO: preparing interfaces." + local HSRv="$1" + + echo "INFO: preparing interfaces for HSRv${HSRv}." # Three HSR nodes. Each node has one link to each of its neighbour, two links in total. # # ns1eth1 ----- ns2eth1 @@ -219,10 +221,10 @@ setup_hsr_interfaces() ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" - # HSRv0. - ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0 - ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0 - ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0 + # HSRv0/1 + ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0 + ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0 + ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0 # IP for HSR ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 @@ -259,7 +261,16 @@ for i in "$ns1" "$ns2" "$ns3" ;do ip -net $i link set lo up done -setup_hsr_interfaces +setup_hsr_interfaces 0 +do_complete_ping_test +cleanup + +for i in "$ns1" "$ns2" "$ns3" ;do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done + +setup_hsr_interfaces 1 do_complete_ping_test exit $ret From ea852c17f5382a0a52041cfbd9a4451ae0fa1a38 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 15 Sep 2023 23:01:24 +0200 Subject: [PATCH 283/333] tsnep: Fix NAPI scheduling According to the NAPI documentation networking/napi.rst, drivers which have to mask interrupts explicitly should use the napi_schedule_prep() and __napi_schedule() calls. No problem seen so far with current implementation. Nevertheless, let's align the implementation with documentation. Signed-off-by: Gerhard Engleder Signed-off-by: David S. Miller --- drivers/net/ethernet/engleder/tsnep_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index f61bd89734c5..0cdf0de555ed 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -87,8 +87,11 @@ static irqreturn_t tsnep_irq(int irq, void *arg) /* handle TX/RX queue 0 interrupt */ if ((active & adapter->queue[0].irq_mask) != 0) { - tsnep_disable_irq(adapter, adapter->queue[0].irq_mask); - napi_schedule(&adapter->queue[0].napi); + if (napi_schedule_prep(&adapter->queue[0].napi)) { + tsnep_disable_irq(adapter, adapter->queue[0].irq_mask); + /* schedule after masking to avoid races */ + __napi_schedule(&adapter->queue[0].napi); + } } return IRQ_HANDLED; @@ -99,8 +102,11 @@ static irqreturn_t tsnep_irq_txrx(int irq, void *arg) struct tsnep_queue *queue = arg; /* handle TX/RX queue interrupt */ - tsnep_disable_irq(queue->adapter, queue->irq_mask); - napi_schedule(&queue->napi); + if (napi_schedule_prep(&queue->napi)) { + tsnep_disable_irq(queue->adapter, queue->irq_mask); + /* schedule after masking to avoid races */ + __napi_schedule(&queue->napi); + } return IRQ_HANDLED; } From a7f991953d73dd50c4c23b5437c0139960e1fad4 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 15 Sep 2023 23:01:25 +0200 Subject: [PATCH 284/333] tsnep: Fix ethtool channels According to the NAPI documentation networking/napi.rst, for the ethtool API a channel is a IRQ/NAPI which services queues of a given type. tsnep uses a single IRQ/NAPI instance for every TX/RX queue pair. Therefore, combined channels shall be returned instead of separate tx/rx channels. Signed-off-by: Gerhard Engleder Signed-off-by: David S. Miller --- drivers/net/ethernet/engleder/tsnep_ethtool.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/engleder/tsnep_ethtool.c b/drivers/net/ethernet/engleder/tsnep_ethtool.c index 716815dad7d2..65ec1abc9442 100644 --- a/drivers/net/ethernet/engleder/tsnep_ethtool.c +++ b/drivers/net/ethernet/engleder/tsnep_ethtool.c @@ -300,10 +300,8 @@ static void tsnep_ethtool_get_channels(struct net_device *netdev, { struct tsnep_adapter *adapter = netdev_priv(netdev); - ch->max_rx = adapter->num_rx_queues; - ch->max_tx = adapter->num_tx_queues; - ch->rx_count = adapter->num_rx_queues; - ch->tx_count = adapter->num_tx_queues; + ch->max_combined = adapter->num_queues; + ch->combined_count = adapter->num_queues; } static int tsnep_ethtool_get_ts_info(struct net_device *netdev, From 46589db3817bd8b523701274885984b5a5dda7d1 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 15 Sep 2023 23:01:26 +0200 Subject: [PATCH 285/333] tsnep: Fix NAPI polling with budget 0 According to the NAPI documentation networking/napi.rst, Rx specific APIs like page pool and XDP cannot be used at all when budget is 0. skb Tx processing should happen regardless of the budget. Stop NAPI polling after Tx processing and skip Rx processing if budget is 0. Signed-off-by: Gerhard Engleder Signed-off-by: David S. Miller --- drivers/net/ethernet/engleder/tsnep_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 0cdf0de555ed..8b992dc9bb52 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -1734,6 +1734,10 @@ static int tsnep_poll(struct napi_struct *napi, int budget) if (queue->tx) complete = tsnep_tx_poll(queue->tx, budget); + /* handle case where we are called by netpoll with a budget of 0 */ + if (unlikely(budget <= 0)) + return budget; + if (queue->rx) { done = queue->rx->xsk_pool ? tsnep_rx_poll_zc(queue->rx, napi, budget) : From 6bec041147a2a64a490d1f813e8a004443061b38 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 16 Sep 2023 12:52:45 +0200 Subject: [PATCH 286/333] mptcp: fix bogus receive window shrinkage with multiple subflows In case multiple subflows race to update the mptcp-level receive window, the subflow losing the race should use the window value provided by the "winning" subflow to update it's own tcp-level rcv_wnd. To such goal, the current code bogusly uses the mptcp-level rcv_wnd value as observed before the update attempt. On unlucky circumstances that may lead to TCP-level window shrinkage, and stall the other end. Address the issue feeding to the rcv wnd update the correct value. Fixes: f3589be0c420 ("mptcp: never shrink offered window") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/427 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/options.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index c254accb14de..cd15ec73073e 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1269,12 +1269,13 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) if (rcv_wnd == rcv_wnd_old) break; - if (before64(rcv_wnd_new, rcv_wnd)) { + + rcv_wnd_old = rcv_wnd; + if (before64(rcv_wnd_new, rcv_wnd_old)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE); goto raise_win; } MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT); - rcv_wnd_old = rcv_wnd; } return; } From d5fbeff1ab812b6c473b6924bee8748469462e2c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 16 Sep 2023 12:52:46 +0200 Subject: [PATCH 287/333] mptcp: move __mptcp_error_report in protocol.c This will simplify the next patch ("mptcp: process pending subflow error on close"). No functional change intended. Cc: stable@vger.kernel.org # v5.12+ Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 36 ++++++++++++++++++++++++++++++++++++ net/mptcp/subflow.c | 36 ------------------------------------ 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a7fc16f5175d..915860027b1a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -770,6 +770,42 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) return moved; } +void __mptcp_error_report(struct sock *sk) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int err = sock_error(ssk); + int ssk_state; + + if (!err) + continue; + + /* only propagate errors on fallen-back sockets or + * on MPC connect + */ + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) + continue; + + /* We need to propagate only transition to CLOSE state. + * Orphaned socket will see such state change via + * subflow_sched_work_if_closed() and that path will properly + * destroy the msk as needed. + */ + ssk_state = inet_sk_state_load(ssk); + if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) + inet_sk_state_store(sk, ssk_state); + WRITE_ONCE(sk->sk_err, -err); + + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ + smp_wmb(); + sk_error_report(sk); + break; + } +} + /* In most cases we will be able to lock the mptcp socket. If its already * owned, we need to defer to the work queue to avoid ABBA deadlock. */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9bf3c7bc1762..2f40c23fdb0d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1362,42 +1362,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } -void __mptcp_error_report(struct sock *sk) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int err = sock_error(ssk); - int ssk_state; - - if (!err) - continue; - - /* only propagate errors on fallen-back sockets or - * on MPC connect - */ - if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) - continue; - - /* We need to propagate only transition to CLOSE state. - * Orphaned socket will see such state change via - * subflow_sched_work_if_closed() and that path will properly - * destroy the msk as needed. - */ - ssk_state = inet_sk_state_load(ssk); - if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); - WRITE_ONCE(sk->sk_err, -err); - - /* This barrier is coupled with smp_rmb() in mptcp_poll() */ - smp_wmb(); - sk_error_report(sk); - break; - } -} - static void subflow_error_report(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn; From 9f1a98813b4b686482e5ef3c9d998581cace0ba6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 16 Sep 2023 12:52:47 +0200 Subject: [PATCH 288/333] mptcp: process pending subflow error on close On incoming TCP reset, subflow closing could happen before error propagation. That in turn could cause the socket error being ignored, and a missing socket state transition, as reported by Daire-Byrne. Address the issues explicitly checking for subflow socket error at close time. To avoid code duplication, factor-out of __mptcp_error_report() a new helper implementing the relevant bits. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/429 Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 63 ++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 915860027b1a..1c96b8da71df 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -770,40 +770,44 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) return moved; } +static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk) +{ + int err = sock_error(ssk); + int ssk_state; + + if (!err) + return false; + + /* only propagate errors on fallen-back sockets or + * on MPC connect + */ + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk))) + return false; + + /* We need to propagate only transition to CLOSE state. + * Orphaned socket will see such state change via + * subflow_sched_work_if_closed() and that path will properly + * destroy the msk as needed. + */ + ssk_state = inet_sk_state_load(ssk); + if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) + inet_sk_state_store(sk, ssk_state); + WRITE_ONCE(sk->sk_err, -err); + + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ + smp_wmb(); + sk_error_report(sk); + return true; +} + void __mptcp_error_report(struct sock *sk) { struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int err = sock_error(ssk); - int ssk_state; - - if (!err) - continue; - - /* only propagate errors on fallen-back sockets or - * on MPC connect - */ - if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) - continue; - - /* We need to propagate only transition to CLOSE state. - * Orphaned socket will see such state change via - * subflow_sched_work_if_closed() and that path will properly - * destroy the msk as needed. - */ - ssk_state = inet_sk_state_load(ssk); - if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); - WRITE_ONCE(sk->sk_err, -err); - - /* This barrier is coupled with smp_rmb() in mptcp_poll() */ - smp_wmb(); - sk_error_report(sk); - break; - } + mptcp_for_each_subflow(msk, subflow) + if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow))) + break; } /* In most cases we will be able to lock the mptcp socket. If its already @@ -2428,6 +2432,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, } out_release: + __mptcp_subflow_error_report(sk, ssk); release_sock(ssk); sock_put(ssk); From f6909dc1c1f4452879278128012da6c76bc186a5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 16 Sep 2023 12:52:48 +0200 Subject: [PATCH 289/333] mptcp: rename timer related helper to less confusing names The msk socket uses to different timeout to track close related events and retransmissions. The existing helpers do not indicate clearly which timer they actually touch, making the related code quite confusing. Change the existing helpers name to avoid such confusion. No functional change intended. This patch is linked to the next one ("mptcp: fix dangling connection hang-up"). The two patches are supposed to be backported together. Cc: stable@vger.kernel.org # v5.11+ Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 42 +++++++++++++++++++++--------------------- net/mptcp/protocol.h | 2 +- net/mptcp/subflow.c | 2 +- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1c96b8da71df..c8f38f303a90 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -405,7 +405,7 @@ drop: return false; } -static void mptcp_stop_timer(struct sock *sk) +static void mptcp_stop_rtx_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -911,12 +911,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list } } -static bool mptcp_timer_pending(struct sock *sk) +static bool mptcp_rtx_timer_pending(struct sock *sk) { return timer_pending(&inet_csk(sk)->icsk_retransmit_timer); } -static void mptcp_reset_timer(struct sock *sk) +static void mptcp_reset_rtx_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); unsigned long tout; @@ -1050,10 +1050,10 @@ static void __mptcp_clean_una(struct sock *sk) out: if (snd_una == READ_ONCE(msk->snd_nxt) && snd_una == READ_ONCE(msk->write_seq)) { - if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) - mptcp_stop_timer(sk); + if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) + mptcp_stop_rtx_timer(sk); } else { - mptcp_reset_timer(sk); + mptcp_reset_rtx_timer(sk); } } @@ -1626,8 +1626,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) mptcp_push_release(ssk, &info); /* ensure the rtx timer is running */ - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); if (do_check_data_fin) mptcp_check_send_data_fin(sk); } @@ -1690,8 +1690,8 @@ out: if (copied) { tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); if (msk->snd_data_fin_enable && msk->snd_nxt + 1 == msk->write_seq) @@ -2260,7 +2260,7 @@ static void mptcp_retransmit_timer(struct timer_list *t) sock_put(sk); } -static void mptcp_timeout_timer(struct timer_list *t) +static void mptcp_tout_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); @@ -2629,14 +2629,14 @@ static void __mptcp_retrans(struct sock *sk) reset_timer: mptcp_check_and_set_pending(sk); - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); } /* schedule the timeout timer for the relevant event: either close timeout * or mp_fail timeout. The close timeout takes precedence on the mp_fail one */ -void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout) +void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) { struct sock *sk = (struct sock *)msk; unsigned long timeout, close_timeout; @@ -2669,7 +2669,7 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); unlock_sock_fast(ssk, slow); - mptcp_reset_timeout(msk, 0); + mptcp_reset_tout_timer(msk, 0); } static void mptcp_do_fastclose(struct sock *sk) @@ -2758,7 +2758,7 @@ static void __mptcp_init_sock(struct sock *sk) /* re-use the csk retrans timer for MPTCP-level retrans */ timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); - timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); + timer_setup(&sk->sk_timer, mptcp_tout_timer, 0); } static void mptcp_ca_reset(struct sock *sk) @@ -2849,8 +2849,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) } else { pr_debug("Sending DATA_FIN on subflow %p", ssk); tcp_send_ack(ssk); - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); } break; } @@ -2933,7 +2933,7 @@ static void __mptcp_destroy_sock(struct sock *sk) might_sleep(); - mptcp_stop_timer(sk); + mptcp_stop_rtx_timer(sk); sk_stop_timer(sk, &sk->sk_timer); msk->pm.status = 0; mptcp_release_sched(msk); @@ -3053,7 +3053,7 @@ cleanup: __mptcp_destroy_sock(sk); do_cancel_work = true; } else { - mptcp_reset_timeout(msk, 0); + mptcp_reset_tout_timer(msk, 0); } return do_cancel_work; @@ -3116,7 +3116,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); - mptcp_stop_timer(sk); + mptcp_stop_rtx_timer(sk); sk_stop_timer(sk, &sk->sk_timer); if (msk->token) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7254b3562575..5e2026815c8e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -718,7 +718,7 @@ void mptcp_get_options(const struct sk_buff *skb, void mptcp_finish_connect(struct sock *sk); void __mptcp_set_connected(struct sock *sk); -void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout); +void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 2f40c23fdb0d..433f290984c8 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1226,7 +1226,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) WRITE_ONCE(subflow->fail_tout, fail_tout); tcp_send_ack(ssk); - mptcp_reset_timeout(msk, subflow->fail_tout); + mptcp_reset_tout_timer(msk, subflow->fail_tout); } static bool subflow_check_data_avail(struct sock *ssk) From 27e5ccc2d5a50ed61bb73153edb1066104b108b3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 16 Sep 2023 12:52:49 +0200 Subject: [PATCH 290/333] mptcp: fix dangling connection hang-up According to RFC 8684 section 3.3: A connection is not closed unless [...] or an implementation-specific connection-level send timeout. Currently the MPTCP protocol does not implement such timeout, and connection timing-out at the TCP-level never move to close state. Introduces a catch-up condition at subflow close time to move the MPTCP socket to close, too. That additionally allows removing similar existing inside the worker. Finally, allow some additional timeout for plain ESTABLISHED mptcp sockets, as the protocol allows creating new subflows even at that point and making the connection functional again. This issue is actually present since the beginning, but it is basically impossible to solve without a long chain of functional pre-requisites topped by commit bbd49d114d57 ("mptcp: consolidate transition to TCP_CLOSE in mptcp_do_fastclose()"). When backporting this current patch, please also backport this other commit as well. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/430 Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 86 ++++++++++++++++++++++---------------------- net/mptcp/protocol.h | 22 ++++++++++++ net/mptcp/subflow.c | 1 + 3 files changed, 65 insertions(+), 44 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c8f38f303a90..e252539b1e19 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -892,6 +892,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; mptcp_sockopt_sync_locked(msk, ssk); mptcp_subflow_joined(msk, ssk); + mptcp_stop_tout_timer(sk); return true; } @@ -2369,18 +2370,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, bool dispose_it, need_push = false; /* If the first subflow moved to a close state before accept, e.g. due - * to an incoming reset, mptcp either: - * - if either the subflow or the msk are dead, destroy the context - * (the subflow socket is deleted by inet_child_forget) and the msk - * - otherwise do nothing at the moment and take action at accept and/or - * listener shutdown - user-space must be able to accept() the closed - * socket. + * to an incoming reset or listener shutdown, the subflow socket is + * already deleted by inet_child_forget() and the mptcp socket can't + * survive too. */ - if (msk->in_accept_queue && msk->first == ssk) { - if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD)) - return; - + if (msk->in_accept_queue && msk->first == ssk && + (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) { /* ensure later check in mptcp_worker() will dispose the msk */ + mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1)); sock_set_flag(sk, SOCK_DEAD); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); mptcp_subflow_drop_ctx(ssk); @@ -2443,6 +2440,22 @@ out_release: out: if (need_push) __mptcp_push_pending(sk, 0); + + /* Catch every 'all subflows closed' scenario, including peers silently + * closing them, e.g. due to timeout. + * For established sockets, allow an additional timeout before closing, + * as the protocol can still create more subflows. + */ + if (list_is_singular(&msk->conn_list) && msk->first && + inet_sk_state_load(msk->first) == TCP_CLOSE) { + if (sk->sk_state != TCP_ESTABLISHED || + msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) { + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_close_wake_up(sk); + } else { + mptcp_start_tout_timer(sk); + } + } } void mptcp_close_ssk(struct sock *sk, struct sock *ssk, @@ -2486,23 +2499,14 @@ static void __mptcp_close_subflow(struct sock *sk) } -static bool mptcp_should_close(const struct sock *sk) +static bool mptcp_close_tout_expired(const struct sock *sk) { - s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp; - struct mptcp_subflow_context *subflow; + if (!inet_csk(sk)->icsk_mtup.probe_timestamp || + sk->sk_state == TCP_CLOSE) + return false; - if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue) - return true; - - /* if all subflows are in closed status don't bother with additional - * timeout - */ - mptcp_for_each_subflow(mptcp_sk(sk), subflow) { - if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) != - TCP_CLOSE) - return false; - } - return true; + return time_after32(tcp_jiffies32, + inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN); } static void mptcp_check_fastclose(struct mptcp_sock *msk) @@ -2641,15 +2645,16 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) struct sock *sk = (struct sock *)msk; unsigned long timeout, close_timeout; - if (!fail_tout && !sock_flag(sk, SOCK_DEAD)) + if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp) return; - close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN; + close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + + TCP_TIMEWAIT_LEN; /* the close timeout takes precedence on the fail one, and here at least one of * them is active */ - timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout; + timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout; sk_reset_timer(sk, &sk->sk_timer, timeout); } @@ -2668,8 +2673,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) mptcp_subflow_reset(ssk); WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); unlock_sock_fast(ssk, slow); - - mptcp_reset_tout_timer(msk, 0); } static void mptcp_do_fastclose(struct sock *sk) @@ -2706,18 +2709,14 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(sk); - /* There is no point in keeping around an orphaned sk timedout or - * closed, but we need the msk around to reply to incoming DATA_FIN, - * even if it is orphaned and in FIN_WAIT2 state - */ - if (sock_flag(sk, SOCK_DEAD)) { - if (mptcp_should_close(sk)) - mptcp_do_fastclose(sk); + if (mptcp_close_tout_expired(sk)) { + mptcp_do_fastclose(sk); + mptcp_close_wake_up(sk); + } - if (sk->sk_state == TCP_CLOSE) { - __mptcp_destroy_sock(sk); - goto unlock; - } + if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) { + __mptcp_destroy_sock(sk); + goto unlock; } if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) @@ -3016,7 +3015,6 @@ bool __mptcp_close(struct sock *sk, long timeout) cleanup: /* orphan all the subflows */ - inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast_nested(ssk); @@ -3053,7 +3051,7 @@ cleanup: __mptcp_destroy_sock(sk); do_cancel_work = true; } else { - mptcp_reset_tout_timer(msk, 0); + mptcp_start_tout_timer(sk); } return do_cancel_work; @@ -3117,7 +3115,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) inet_sk_state_store(sk, TCP_CLOSE); mptcp_stop_rtx_timer(sk); - sk_stop_timer(sk, &sk->sk_timer); + mptcp_stop_tout_timer(sk); if (msk->token) mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 5e2026815c8e..ed61d6850cce 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -719,6 +719,28 @@ void mptcp_get_options(const struct sk_buff *skb, void mptcp_finish_connect(struct sock *sk); void __mptcp_set_connected(struct sock *sk); void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); + +static inline void mptcp_stop_tout_timer(struct sock *sk) +{ + if (!inet_csk(sk)->icsk_mtup.probe_timestamp) + return; + + sk_stop_timer(sk, &sk->sk_timer); + inet_csk(sk)->icsk_mtup.probe_timestamp = 0; +} + +static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout) +{ + /* avoid 0 timestamp, as that means no close timeout */ + inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1; +} + +static inline void mptcp_start_tout_timer(struct sock *sk) +{ + mptcp_set_close_tout(sk, tcp_jiffies32); + mptcp_reset_tout_timer(mptcp_sk(sk), 0); +} + static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 433f290984c8..918c1a235790 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1552,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, mptcp_sock_graft(ssk, sk->sk_socket); iput(SOCK_INODE(sf)); WRITE_ONCE(msk->allow_infinite_fallback, false); + mptcp_stop_tout_timer(sk); return 0; failed_unlink: From 418f438a2db67503fe00cef5bbd64fd1f891e145 Mon Sep 17 00:00:00 2001 From: Peter Lafreniere Date: Sun, 17 Sep 2023 15:29:58 +0000 Subject: [PATCH 291/333] Documentation: netdev: fix dead link in ax25.rst http://linux-ax25.org has been down for nearly a year. Its official replacement is https://linux-ax25.in-berlin.de. Update the documentation to point there instead. And acknowledge that while the linux-hams list isn't entirely dead, it isn't what most would call 'active'. Remove that word. Link: https://marc.info/?m=166792551600315 Signed-off-by: Peter Lafreniere Signed-off-by: David S. Miller --- Documentation/networking/ax25.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ax25.rst b/Documentation/networking/ax25.rst index f060cfb1445a..605e72c6c877 100644 --- a/Documentation/networking/ax25.rst +++ b/Documentation/networking/ax25.rst @@ -7,9 +7,9 @@ AX.25 To use the amateur radio protocols within Linux you will need to get a suitable copy of the AX.25 Utilities. More detailed information about AX.25, NET/ROM and ROSE, associated programs and utilities can be -found on http://www.linux-ax25.org. +found on https://linux-ax25.in-berlin.de. -There is an active mailing list for discussing Linux amateur radio matters +There is a mailing list for discussing Linux amateur radio matters called linux-hams@vger.kernel.org. To subscribe to it, send a message to majordomo@vger.kernel.org with the words "subscribe linux-hams" in the body of the message, the subject field is ignored. You don't need to be From 1943f2b0ac5a9fde718c18c1f4ab8332c8b5cd60 Mon Sep 17 00:00:00 2001 From: Peter Lafreniere Date: Sun, 17 Sep 2023 15:30:10 +0000 Subject: [PATCH 292/333] MAINTAINERS: Update link for linux-ax25.org http://linux-ax25.org has been down for nearly a year. Its official replacement is https://linux-ax25.in-berlin.de. Update all links to the new URL. Link: https://marc.info/?m=166792551600315 Signed-off-by: Peter Lafreniere Signed-off-by: David S. Miller --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index bf0f54c24f81..c155eb535906 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3344,7 +3344,7 @@ AX.25 NETWORK LAYER M: Ralf Baechle L: linux-hams@vger.kernel.org S: Maintained -W: http://www.linux-ax25.org/ +W: https://linux-ax25.in-berlin.de F: include/net/ax25.h F: include/uapi/linux/ax25.h F: net/ax25/ @@ -14756,7 +14756,7 @@ NETROM NETWORK LAYER M: Ralf Baechle L: linux-hams@vger.kernel.org S: Maintained -W: http://www.linux-ax25.org/ +W: https://linux-ax25.in-berlin.de F: include/net/netrom.h F: include/uapi/linux/netrom.h F: net/netrom/ @@ -18607,7 +18607,7 @@ ROSE NETWORK LAYER M: Ralf Baechle L: linux-hams@vger.kernel.org S: Maintained -W: http://www.linux-ax25.org/ +W: https://linux-ax25.in-berlin.de F: include/net/rose.h F: include/uapi/linux/rose.h F: net/rose/ From 71273c46a34823e54af7828df67e5983ba85d6c2 Mon Sep 17 00:00:00 2001 From: Peter Lafreniere Date: Sun, 17 Sep 2023 15:30:21 +0000 Subject: [PATCH 293/333] ax25: Kconfig: Update link for linux-ax25.org http://linux-ax25.org has been down for nearly a year. Its official replacement is https://linux-ax25.in-berlin.de. Change all references to the old site in the ax25 Kconfig to its replacement. Link: https://marc.info/?m=166792551600315 Signed-off-by: Peter Lafreniere Signed-off-by: David S. Miller --- net/ax25/Kconfig | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig index d3a9843a043d..fdb666607f10 100644 --- a/net/ax25/Kconfig +++ b/net/ax25/Kconfig @@ -10,7 +10,7 @@ menuconfig HAMRADIO If you want to connect your Linux box to an amateur radio, answer Y here. You want to read and more specifically about AX.25 on Linux - . + . Note that the answer to this question won't directly affect the kernel: saying N will just cause the configurator to skip all @@ -61,7 +61,7 @@ config AX25_DAMA_SLAVE configuration. Linux cannot yet act as a DAMA server. This option only compiles DAMA slave support into the kernel. It still needs to be enabled at runtime. For more about DAMA see - . If unsure, say Y. + . If unsure, say Y. # placeholder until implemented config AX25_DAMA_MASTER @@ -87,9 +87,9 @@ config NETROM A comprehensive listing of all the software for Linux amateur radio users as well as information about how to configure an AX.25 port is contained in the Linux Ham Wiki, available from - . You also might want to check out the - file . More information about - digital amateur radio in general is on the WWW at + . You also might want to check out + the file . More information + about digital amateur radio in general is on the WWW at . To compile this driver as a module, choose M here: the @@ -106,9 +106,9 @@ config ROSE A comprehensive listing of all the software for Linux amateur radio users as well as information about how to configure an AX.25 port is contained in the Linux Ham Wiki, available from - . You also might want to check out the - file . More information about - digital amateur radio in general is on the WWW at + . You also might want to check out + the file . More information + about digital amateur radio in general is on the WWW at . To compile this driver as a module, choose M here: the From 5f66db08cbd3ca471c66bacb0282902c79db9274 Mon Sep 17 00:00:00 2001 From: Stefan Moring Date: Sun, 17 Sep 2023 18:40:37 +0200 Subject: [PATCH 294/333] spi: imx: Take in account bits per word instead of assuming 8-bits The IMX spi driver has a hardcoded 8, breaking the driver for word lengths other than 8. Signed-off-by: Stefan Moring Reported-by: Sebastian Reichel Fixes: 15a6af94a277 ("spi: Increase imx51 ecspi burst length based on transfer length") Tested-by: Sebastian Reichel Link: https://lore.kernel.org/r/20230917164037.29284-1-stefanmoring@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index a8a74c7cb79f..498e35c8db2c 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -662,7 +662,7 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx, if (spi_imx->count >= 512) ctrl |= 0xFFF << MX51_ECSPI_CTRL_BL_OFFSET; else - ctrl |= (spi_imx->count*8 - 1) + ctrl |= (spi_imx->count * spi_imx->bits_per_word - 1) << MX51_ECSPI_CTRL_BL_OFFSET; } From 52954b750958dcab9e44935f0c32643279091c85 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 11 Sep 2023 20:00:28 +0200 Subject: [PATCH 295/333] gfs2: Fix another freeze/thaw hang On a thawed filesystem, the freeze glock is held in shared mode. In order to initiate a cluster-wide freeze, the node initiating the freeze drops the freeze glock and grabs it in exclusive mode. The other nodes recognize this as contention on the freeze glock; function freeze_go_callback is invoked. This indicates to them that they must freeze the filesystem locally, drop the freeze glock, and then re-acquire it in shared mode before being able to unfreeze the filesystem locally. While a node is trying to re-acquire the freeze glock in shared mode, additional contention can occur. In that case, the node must behave in the same way as above. Unfortunately, freeze_go_callback() contains a check that causes it to bail out when the freeze glock isn't held in shared mode. Fix that to allow the glock to be unlocked or held in shared mode. In addition, update a reference to trylock_super() which has been renamed to super_trylock_shared() in the meantime. Fixes: b77b4a4815a9 ("gfs2: Rework freeze / thaw logic") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index d26759a98b10..f41ca89d216b 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -567,15 +567,16 @@ static void freeze_go_callback(struct gfs2_glock *gl, bool remote) struct super_block *sb = sdp->sd_vfs; if (!remote || - gl->gl_state != LM_ST_SHARED || + (gl->gl_state != LM_ST_SHARED && + gl->gl_state != LM_ST_UNLOCKED) || gl->gl_demote_state != LM_ST_UNLOCKED) return; /* * Try to get an active super block reference to prevent racing with - * unmount (see trylock_super()). But note that unmount isn't the only - * place where a write lock on s_umount is taken, and we can fail here - * because of things like remount as well. + * unmount (see super_trylock_shared()). But note that unmount isn't + * the only place where a write lock on s_umount is taken, and we can + * fail here because of things like remount as well. */ if (down_read_trylock(&sb->s_umount)) { atomic_inc(&sb->s_active); From 62862485a4c3a52029fc30f4bdde9af04afdafc9 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 12 Sep 2023 08:05:51 -0500 Subject: [PATCH 296/333] gfs2: fix glock shrinker ref issues Before this patch, function gfs2_scan_glock_lru would only try to free glocks that had a reference count of 0. But if the reference count ever got to 0, the glock should have already been freed. Shrinker function gfs2_dispose_glock_lru checks whether glocks on the LRU are demote_ok, and if so, tries to demote them. But that's only possible if the reference count is at least 1. This patch changes gfs2_scan_glock_lru so it will try to demote and/or dispose of glocks that have a reference count of 1 and which are either demotable, or are already unlocked. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9cbf8d98489a..4a280be229a6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2010,7 +2010,9 @@ static long gfs2_scan_glock_lru(int nr) if (!test_bit(GLF_LOCK, &gl->gl_flags)) { if (!spin_trylock(&gl->gl_lockref.lock)) continue; - if (!gl->gl_lockref.count) { + if (gl->gl_lockref.count <= 1 && + (gl->gl_state == LM_ST_UNLOCKED || + demote_ok(gl))) { list_move(&gl->gl_lru, &dispose); atomic_dec(&lru_count); freed++; From fb95d536080e6c1db099f0023f59cd55adcc5d87 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 18 Sep 2023 08:13:41 -0500 Subject: [PATCH 297/333] gfs2: Fix quota=quiet oversight Patch eef46ab713f7 introduced a new gfs2 quota=quiet mount option. Checks for the new option were added to quota.c, but a check in gfs2_quota_lock_check() was overlooked. This patch adds the missing check. Fixes: eef46ab713f7 ("gfs2: Introduce new quota=quiet mount option") Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 21ada332d555..1429945215a0 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -50,7 +50,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (ret) return ret; - if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) + if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON && + sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) return 0; ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap); if (ret) From 2dd1d862817b850787f4755c05d55e5aeb76dd08 Mon Sep 17 00:00:00 2001 From: Ahmad Khalifa Date: Mon, 18 Sep 2023 19:47:22 +0100 Subject: [PATCH 298/333] hwmon: (nct6775) Fix non-existent ALARM warning Skip non-existent ALARM attribute to avoid a shift-out-of-bounds dmesg warning. Reported-by: Doug Smythies Closes: https://lore.kernel.org/linux-hwmon/ZQVzdlHgWdFhOVyQ@debian.me/T/#mc69b690660eb50734a6b07506d74a119e0266f1b Fixes: b7f1f7b2523a ("hwmon: (nct6775) Additional TEMP registers for nct6799") Signed-off-by: Ahmad Khalifa Link: https://lore.kernel.org/r/20230918184722.2033225-1-ahmad@khalifa.ws Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c index 02a71244fc3b..b5b81bd83bb1 100644 --- a/drivers/hwmon/nct6775-core.c +++ b/drivers/hwmon/nct6775-core.c @@ -1910,6 +1910,10 @@ static umode_t nct6775_in_is_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct nct6775_data *data = dev_get_drvdata(dev); int in = index / 5; /* voltage index */ + int nr = index % 5; /* attribute index */ + + if (nr == 1 && data->ALARM_BITS[in] == -1) + return 0; if (!(data->have_in & BIT(in))) return 0; From df1c357f25d808e30b216188330e708e09e1a412 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Mon, 18 Sep 2023 14:17:11 +0100 Subject: [PATCH 299/333] netfs: Only call folio_start_fscache() one time for each folio If a network filesystem using netfs implements a clamp_length() function, it can set subrequest lengths smaller than a page size. When we loop through the folios in netfs_rreq_unlock_folios() to set any folios to be written back, we need to make sure we only call folio_start_fscache() once for each folio. Otherwise, this simple testcase: mount -o fsc,rsize=1024,wsize=1024 127.0.0.1:/export /mnt/nfs dd if=/dev/zero of=/mnt/nfs/file.bin bs=4096 count=1 1+0 records in 1+0 records out 4096 bytes (4.1 kB, 4.0 KiB) copied, 0.0126359 s, 324 kB/s echo 3 > /proc/sys/vm/drop_caches cat /mnt/nfs/file.bin > /dev/null will trigger an oops similar to the following: page dumped because: VM_BUG_ON_FOLIO(folio_test_private_2(folio)) ------------[ cut here ]------------ kernel BUG at include/linux/netfs.h:44! ... CPU: 5 PID: 134 Comm: kworker/u16:5 Kdump: loaded Not tainted 6.4.0-rc5 ... RIP: 0010:netfs_rreq_unlock_folios+0x68e/0x730 [netfs] ... Call Trace: netfs_rreq_assess+0x497/0x660 [netfs] netfs_subreq_terminated+0x32b/0x610 [netfs] nfs_netfs_read_completion+0x14e/0x1a0 [nfs] nfs_read_completion+0x2f9/0x330 [nfs] rpc_free_task+0x72/0xa0 [sunrpc] rpc_async_release+0x46/0x70 [sunrpc] process_one_work+0x3bd/0x710 worker_thread+0x89/0x610 kthread+0x181/0x1c0 ret_from_fork+0x29/0x50 Fixes: 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers" Link: https://bugzilla.redhat.com/show_bug.cgi?id=2210612 Signed-off-by: Dave Wysochanski Reviewed-by: Jeff Layton Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230608214137.856006-1-dwysocha@redhat.com/ # v1 Link: https://lore.kernel.org/r/20230915185704.1082982-1-dwysocha@redhat.com/ # v2 Signed-off-by: Linus Torvalds --- fs/netfs/buffered_read.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 3404707ddbe7..2cd3ccf4c439 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -47,12 +47,14 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) xas_for_each(&xas, folio, last_page) { loff_t pg_end; bool pg_failed = false; + bool folio_started; if (xas_retry(&xas, folio)) continue; pg_end = folio_pos(folio) + folio_size(folio) - 1; + folio_started = false; for (;;) { loff_t sreq_end; @@ -60,8 +62,10 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) pg_failed = true; break; } - if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) + if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { folio_start_fscache(folio); + folio_started = true; + } pg_failed |= subreq_failed; sreq_end = subreq->start + subreq->len - 1; if (pg_end < sreq_end) From 37510dd566bdbff31a769cde2fa6654bccdb8b24 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 24 Aug 2023 17:34:21 +0200 Subject: [PATCH 300/333] xen: simplify evtchn_do_upcall() call maze There are several functions involved for performing the functionality of evtchn_do_upcall(): - __xen_evtchn_do_upcall() doing the real work - xen_hvm_evtchn_do_upcall() just being a wrapper for __xen_evtchn_do_upcall(), exposed for external callers - xen_evtchn_do_upcall() calling __xen_evtchn_do_upcall(), too, but without any user Simplify this maze by: - removing the unused xen_evtchn_do_upcall() - removing xen_hvm_evtchn_do_upcall() as the only left caller of __xen_evtchn_do_upcall(), while renaming __xen_evtchn_do_upcall() to xen_evtchn_do_upcall() Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Reviewed-by: Thomas Gleixner Signed-off-by: Juergen Gross --- arch/arm/xen/enlighten.c | 2 +- arch/x86/entry/common.c | 2 +- arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/enlighten_hvm.c | 2 +- drivers/xen/events/events_base.c | 21 ++------------------- drivers/xen/platform-pci.c | 2 +- include/xen/events.h | 3 +-- 7 files changed, 8 insertions(+), 26 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 7d59765aef22..c392e18f1e43 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -207,7 +207,7 @@ static void xen_power_off(void) static irqreturn_t xen_arm_callback(int irq, void *arg) { - xen_hvm_evtchn_do_upcall(); + xen_evtchn_do_upcall(); return IRQ_HANDLED; } diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 6c2826417b33..93c60c0c9d4a 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -294,7 +294,7 @@ static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs) inc_irq_stat(irq_hv_callback_count); - xen_hvm_evtchn_do_upcall(); + xen_evtchn_do_upcall(); set_irq_regs(old_regs); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b8db2148c07d..0337392a3121 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -32,7 +32,7 @@ EXPORT_SYMBOL_GPL(hypercall_page); * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info * but during boot it is switched to point to xen_vcpu_info. - * The pointer is used in __xen_evtchn_do_upcall to acknowledge pending events. + * The pointer is used in xen_evtchn_do_upcall to acknowledge pending events. */ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 9a192f51f1b0..3f8c34707c50 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -136,7 +136,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_xen_hvm_callback) inc_irq_stat(irq_hv_callback_count); - xen_hvm_evtchn_do_upcall(); + xen_evtchn_do_upcall(); set_irq_regs(old_regs); } diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 3bdd5b59661d..0bb86e6c4d0a 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1704,7 +1704,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) generic_handle_irq(irq); } -static int __xen_evtchn_do_upcall(void) +int xen_evtchn_do_upcall(void) { struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE; @@ -1735,24 +1735,7 @@ static int __xen_evtchn_do_upcall(void) return ret; } - -void xen_evtchn_do_upcall(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - irq_enter(); - - __xen_evtchn_do_upcall(); - - irq_exit(); - set_irq_regs(old_regs); -} - -int xen_hvm_evtchn_do_upcall(void) -{ - return __xen_evtchn_do_upcall(); -} -EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall); +EXPORT_SYMBOL_GPL(xen_evtchn_do_upcall); /* Rebind a new event channel to an existing irq. */ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index fcc819131572..544d3f9010b9 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -64,7 +64,7 @@ static uint64_t get_callback_via(struct pci_dev *pdev) static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id) { - return xen_hvm_evtchn_do_upcall(); + return xen_evtchn_do_upcall(); } static int xen_allocate_irq(struct pci_dev *pdev) diff --git a/include/xen/events.h b/include/xen/events.h index 95d5e28de324..23932b0673dc 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -105,8 +105,7 @@ int irq_from_virq(unsigned int cpu, unsigned int virq); evtchn_port_t evtchn_from_irq(unsigned irq); int xen_set_callback_via(uint64_t via); -void xen_evtchn_do_upcall(struct pt_regs *regs); -int xen_hvm_evtchn_do_upcall(void); +int xen_evtchn_do_upcall(void); /* Bind a pirq for a physical interrupt to an irq. */ int xen_bind_pirq_gsi_to_irq(unsigned gsi, From 361239fd1448d64faa4adba5bbf100401c0a606e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 13 Sep 2023 13:38:26 +0200 Subject: [PATCH 301/333] arm/xen: remove lazy mode related definitions include/xen/arm/hypervisor.h contains definitions related to paravirt lazy mode, which are used nowhere in the code. All paravirt lazy mode related users are in x86 code, so remove the definitions on Arm side. Signed-off-by: Juergen Gross Acked-by: Stefano Stabellini Link: https://lore.kernel.org/r/20230913113828.18421-2-jgross@suse.com Signed-off-by: Juergen Gross --- include/xen/arm/hypervisor.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/include/xen/arm/hypervisor.h b/include/xen/arm/hypervisor.h index 43ef24dd030e..9995695204f5 100644 --- a/include/xen/arm/hypervisor.h +++ b/include/xen/arm/hypervisor.h @@ -7,18 +7,6 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; -/* Lazy mode for batching updates / context switch */ -enum paravirt_lazy_mode { - PARAVIRT_LAZY_NONE, - PARAVIRT_LAZY_MMU, - PARAVIRT_LAZY_CPU, -}; - -static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void) -{ - return PARAVIRT_LAZY_NONE; -} - #ifdef CONFIG_XEN void __init xen_early_init(void); #else From a4a7644c15096f57f92252dd6e1046bf269c87d8 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 13 Sep 2023 13:38:27 +0200 Subject: [PATCH 302/333] x86/xen: move paravirt lazy code Only Xen is using the paravirt lazy mode code, so it can be moved to Xen specific sources. This allows to make some of the functions static or to merge them into their only call sites. While at it do a rename from "paravirt" to "xen" for all moved specifiers. No functional change. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20230913113828.18421-3-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/include/asm/paravirt_types.h | 15 ------ arch/x86/include/asm/xen/hypervisor.h | 26 +++++++++++ arch/x86/kernel/paravirt.c | 67 --------------------------- arch/x86/xen/enlighten_pv.c | 39 +++++++++++++--- arch/x86/xen/mmu_pv.c | 55 ++++++++++++++-------- arch/x86/xen/multicalls.h | 4 +- include/trace/events/xen.h | 12 ++--- 7 files changed, 102 insertions(+), 116 deletions(-) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 4acbcddddc29..772d03487520 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -9,13 +9,6 @@ struct paravirt_patch_site { u8 type; /* type of this instruction */ u8 len; /* length of original instruction */ }; - -/* Lazy mode for batching updates / context switch */ -enum paravirt_lazy_mode { - PARAVIRT_LAZY_NONE, - PARAVIRT_LAZY_MMU, - PARAVIRT_LAZY_CPU, -}; #endif #ifdef CONFIG_PARAVIRT @@ -549,14 +542,6 @@ int paravirt_disable_iospace(void); __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) -enum paravirt_lazy_mode paravirt_get_lazy_mode(void); -void paravirt_start_context_switch(struct task_struct *prev); -void paravirt_end_context_switch(struct task_struct *next); - -void paravirt_enter_lazy_mmu(void); -void paravirt_leave_lazy_mmu(void); -void paravirt_flush_lazy_mmu(void); - void _paravirt_nop(void); void paravirt_BUG(void); unsigned long paravirt_ret0(void); diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 5fc35f889cd1..ed05ce3df5c7 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -36,6 +36,7 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; +#include #include #define XEN_SIGNATURE "XenVMMXenVMM" @@ -63,4 +64,29 @@ void __init xen_pvh_init(struct boot_params *boot_params); void __init mem_map_via_hcall(struct boot_params *boot_params_p); #endif +/* Lazy mode for batching updates / context switch */ +enum xen_lazy_mode { + XEN_LAZY_NONE, + XEN_LAZY_MMU, + XEN_LAZY_CPU, +}; + +DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode); + +static inline void enter_lazy(enum xen_lazy_mode mode) +{ + BUG_ON(this_cpu_read(xen_lazy_mode) != XEN_LAZY_NONE); + + this_cpu_write(xen_lazy_mode, mode); +} + +static inline void leave_lazy(enum xen_lazy_mode mode) +{ + BUG_ON(this_cpu_read(xen_lazy_mode) != mode); + + this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); +} + +enum xen_lazy_mode xen_get_lazy_mode(void); + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 975f98d5eee5..97f1436c1a20 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -143,66 +143,7 @@ int paravirt_disable_iospace(void) return request_resource(&ioport_resource, &reserve_ioports); } -static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; - -static inline void enter_lazy(enum paravirt_lazy_mode mode) -{ - BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); - - this_cpu_write(paravirt_lazy_mode, mode); -} - -static void leave_lazy(enum paravirt_lazy_mode mode) -{ - BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode); - - this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); -} - -void paravirt_enter_lazy_mmu(void) -{ - enter_lazy(PARAVIRT_LAZY_MMU); -} - -void paravirt_leave_lazy_mmu(void) -{ - leave_lazy(PARAVIRT_LAZY_MMU); -} - -void paravirt_flush_lazy_mmu(void) -{ - preempt_disable(); - - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } - - preempt_enable(); -} - #ifdef CONFIG_PARAVIRT_XXL -void paravirt_start_context_switch(struct task_struct *prev) -{ - BUG_ON(preemptible()); - - if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); - } - enter_lazy(PARAVIRT_LAZY_CPU); -} - -void paravirt_end_context_switch(struct task_struct *next) -{ - BUG_ON(preemptible()); - - leave_lazy(PARAVIRT_LAZY_CPU); - - if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) - arch_enter_lazy_mmu_mode(); -} - static noinstr void pv_native_write_cr2(unsigned long val) { native_write_cr2(val); @@ -229,14 +170,6 @@ static noinstr void pv_native_safe_halt(void) } #endif -enum paravirt_lazy_mode paravirt_get_lazy_mode(void) -{ - if (in_interrupt()) - return PARAVIRT_LAZY_NONE; - - return this_cpu_read(paravirt_lazy_mode); -} - struct pv_info pv_info = { .name = "bare hardware", #ifdef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 49352fad7d1d..54b83825c4b6 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -101,6 +101,16 @@ struct tls_descs { struct desc_struct desc[3]; }; +DEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE; + +enum xen_lazy_mode xen_get_lazy_mode(void) +{ + if (in_interrupt()) + return XEN_LAZY_NONE; + + return this_cpu_read(xen_lazy_mode); +} + /* * Updating the 3 TLS descriptors in the GDT on every task switch is * surprisingly expensive so we avoid updating them if they haven't @@ -362,10 +372,25 @@ static noinstr unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } +static void xen_start_context_switch(struct task_struct *prev) +{ + BUG_ON(preemptible()); + + if (this_cpu_read(xen_lazy_mode) == XEN_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); + } + enter_lazy(XEN_LAZY_CPU); +} + static void xen_end_context_switch(struct task_struct *next) { + BUG_ON(preemptible()); + xen_mc_flush(); - paravirt_end_context_switch(next); + leave_lazy(XEN_LAZY_CPU); + if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) + arch_enter_lazy_mmu_mode(); } static unsigned long xen_store_tr(void) @@ -472,7 +497,7 @@ static void xen_set_ldt(const void *addr, unsigned entries) MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); } static void xen_load_gdt(const struct desc_ptr *dtr) @@ -568,7 +593,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) * exception between the new %fs descriptor being loaded and * %fs being effectively cleared at __switch_to(). */ - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) + if (xen_get_lazy_mode() == XEN_LAZY_CPU) loadsegment(fs, 0); xen_mc_batch(); @@ -577,7 +602,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) load_TLS_descriptor(t, cpu, 1); load_TLS_descriptor(t, cpu, 2); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); } static void xen_load_gs_index(unsigned int idx) @@ -909,7 +934,7 @@ static void xen_load_sp0(unsigned long sp0) mcs = xen_mc_entry(0); MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } @@ -973,7 +998,7 @@ static void xen_write_cr0(unsigned long cr0) MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); } static void xen_write_cr4(unsigned long cr4) @@ -1156,7 +1181,7 @@ static const typeof(pv_ops) xen_cpu_ops __initconst = { #endif .io_delay = xen_io_delay, - .start_context_switch = paravirt_start_context_switch, + .start_context_switch = xen_start_context_switch, .end_context_switch = xen_end_context_switch, }, }; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 1652c39e3dfb..b6830554ff69 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -236,7 +236,7 @@ static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) u.val = pmd_val_ma(val); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -270,7 +270,7 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) { struct mmu_update u; - if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) + if (xen_get_lazy_mode() != XEN_LAZY_MMU) return false; xen_mc_batch(); @@ -279,7 +279,7 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) u.val = pte_val_ma(pteval); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); return true; } @@ -325,7 +325,7 @@ void xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, u.val = pte_val_ma(pte); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } /* Assume pteval_t is equivalent to all the other *val_t types. */ @@ -419,7 +419,7 @@ static void xen_set_pud_hyper(pud_t *ptr, pud_t val) u.val = pud_val_ma(val); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -499,7 +499,7 @@ static void __init xen_set_p4d_hyper(p4d_t *ptr, p4d_t val) __xen_set_p4d_hyper(ptr, val); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -531,7 +531,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) if (user_ptr) __xen_set_p4d_hyper((p4d_t *)user_ptr, val); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } #if CONFIG_PGTABLE_LEVELS >= 5 @@ -1245,7 +1245,7 @@ static noinline void xen_flush_tlb(void) op->cmd = MMUEXT_TLB_FLUSH_LOCAL; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -1265,7 +1265,7 @@ static void xen_flush_tlb_one_user(unsigned long addr) op->arg1.linear_addr = addr & PAGE_MASK; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -1302,7 +1302,7 @@ static void xen_flush_tlb_multi(const struct cpumask *cpus, MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } static unsigned long xen_read_cr3(void) @@ -1361,7 +1361,7 @@ static void xen_write_cr3(unsigned long cr3) else __xen_write_cr3(false, 0); - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ + xen_mc_issue(XEN_LAZY_CPU); /* interrupts restored */ } /* @@ -1396,7 +1396,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) __xen_write_cr3(true, cr3); - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ + xen_mc_issue(XEN_LAZY_CPU); /* interrupts restored */ } static int xen_pgd_alloc(struct mm_struct *mm) @@ -1557,7 +1557,7 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS && !pinned) __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } } @@ -1587,7 +1587,7 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level) __set_pfn_prot(pfn, PAGE_KERNEL); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); ClearPagePinned(page); } @@ -1804,7 +1804,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) */ xen_mc_batch(); __xen_write_cr3(true, __pa(init_top_pgt)); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); /* We can't that easily rip out L3 and L2, as the Xen pagetables are * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for @@ -2083,6 +2083,23 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif } +static void xen_enter_lazy_mmu(void) +{ + enter_lazy(XEN_LAZY_MMU); +} + +static void xen_flush_lazy_mmu(void) +{ + preempt_disable(); + + if (xen_get_lazy_mode() == XEN_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + static void __init xen_post_allocator_init(void) { pv_ops.mmu.set_pte = xen_set_pte; @@ -2107,7 +2124,7 @@ static void xen_leave_lazy_mmu(void) { preempt_disable(); xen_mc_flush(); - paravirt_leave_lazy_mmu(); + leave_lazy(XEN_LAZY_MMU); preempt_enable(); } @@ -2166,9 +2183,9 @@ static const typeof(pv_ops) xen_mmu_ops __initconst = { .exit_mmap = xen_exit_mmap, .lazy_mode = { - .enter = paravirt_enter_lazy_mmu, + .enter = xen_enter_lazy_mmu, .leave = xen_leave_lazy_mmu, - .flush = paravirt_flush_lazy_mmu, + .flush = xen_flush_lazy_mmu, }, .set_fixmap = xen_set_fixmap, @@ -2385,7 +2402,7 @@ static noinline void xen_flush_tlb_all(void) op->cmd = MMUEXT_TLB_FLUSH_ALL; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 1c51b2c87f30..c3867b585e0d 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -26,7 +26,7 @@ static inline void xen_mc_batch(void) /* need to disable interrupts until this entry is complete */ local_irq_save(flags); - trace_xen_mc_batch(paravirt_get_lazy_mode()); + trace_xen_mc_batch(xen_get_lazy_mode()); __this_cpu_write(xen_mc_irq_flags, flags); } @@ -44,7 +44,7 @@ static inline void xen_mc_issue(unsigned mode) { trace_xen_mc_issue(mode); - if ((paravirt_get_lazy_mode() & mode) == 0) + if ((xen_get_lazy_mode() & mode) == 0) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index 44a3f565264d..0577f0cdd231 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -6,26 +6,26 @@ #define _TRACE_XEN_H #include -#include +#include #include struct multicall_entry; /* Multicalls */ DECLARE_EVENT_CLASS(xen_mc__batch, - TP_PROTO(enum paravirt_lazy_mode mode), + TP_PROTO(enum xen_lazy_mode mode), TP_ARGS(mode), TP_STRUCT__entry( - __field(enum paravirt_lazy_mode, mode) + __field(enum xen_lazy_mode, mode) ), TP_fast_assign(__entry->mode = mode), TP_printk("start batch LAZY_%s", - (__entry->mode == PARAVIRT_LAZY_MMU) ? "MMU" : - (__entry->mode == PARAVIRT_LAZY_CPU) ? "CPU" : "NONE") + (__entry->mode == XEN_LAZY_MMU) ? "MMU" : + (__entry->mode == XEN_LAZY_CPU) ? "CPU" : "NONE") ); #define DEFINE_XEN_MC_BATCH(name) \ DEFINE_EVENT(xen_mc__batch, name, \ - TP_PROTO(enum paravirt_lazy_mode mode), \ + TP_PROTO(enum xen_lazy_mode mode), \ TP_ARGS(mode)) DEFINE_XEN_MC_BATCH(xen_mc_batch); From 49147beb0ccbf4c5bb81a44be93ec3bc5e4a79f1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 13 Sep 2023 13:38:28 +0200 Subject: [PATCH 303/333] x86/xen: allow nesting of same lazy mode When running as a paravirtualized guest under Xen, Linux is using "lazy mode" for issuing hypercalls which don't need to take immediate effect in order to improve performance (examples are e.g. multiple PTE changes). There are two different lazy modes defined: MMU and CPU lazy mode. Today it is not possible to nest multiple lazy mode sections, even if they are of the same kind. A recent change in memory management added nesting of MMU lazy mode sections, resulting in a regression when running as Xen PV guest. Technically there is no reason why nesting of multiple sections of the same kind of lazy mode shouldn't be allowed. So add support for that for fixing the regression. Fixes: bcc6cc832573 ("mm: add default definition of set_ptes()") Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20230913113828.18421-4-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/hypervisor.h | 15 +++++++++++++-- arch/x86/xen/enlighten_pv.c | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index ed05ce3df5c7..7048dfacc04b 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -72,10 +72,18 @@ enum xen_lazy_mode { }; DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode); +DECLARE_PER_CPU(unsigned int, xen_lazy_nesting); static inline void enter_lazy(enum xen_lazy_mode mode) { - BUG_ON(this_cpu_read(xen_lazy_mode) != XEN_LAZY_NONE); + enum xen_lazy_mode old_mode = this_cpu_read(xen_lazy_mode); + + if (mode == old_mode) { + this_cpu_inc(xen_lazy_nesting); + return; + } + + BUG_ON(old_mode != XEN_LAZY_NONE); this_cpu_write(xen_lazy_mode, mode); } @@ -84,7 +92,10 @@ static inline void leave_lazy(enum xen_lazy_mode mode) { BUG_ON(this_cpu_read(xen_lazy_mode) != mode); - this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); + if (this_cpu_read(xen_lazy_nesting) == 0) + this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); + else + this_cpu_dec(xen_lazy_nesting); } enum xen_lazy_mode xen_get_lazy_mode(void); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 54b83825c4b6..bbbfdd495ebd 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -102,6 +102,7 @@ struct tls_descs { }; DEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE; +DEFINE_PER_CPU(unsigned int, xen_lazy_nesting); enum xen_lazy_mode xen_get_lazy_mode(void) { From 0fc6ff5a0f0488e09b496773c440ed5bb36d1f0d Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Mon, 11 Sep 2023 18:59:31 +0000 Subject: [PATCH 304/333] xen/efi: refactor deprecated strncpy `strncpy` is deprecated for use on NUL-terminated destination strings [1]. `efi_loader_signature` has space for 4 bytes. We are copying "Xen" (3 bytes) plus a NUL-byte which makes 4 total bytes. With that being said, there is currently not a bug with the current `strncpy()` implementation in terms of buffer overreads but we should favor a more robust string interface either way. A suitable replacement is `strscpy` [2] due to the fact that it guarantees NUL-termination on the destination buffer while being functionally the same in this case. Link: www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings[1] Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2] Link: https://github.com/KSPP/linux/issues/90 Cc: linux-hardening@vger.kernel.org Cc: Kees Cook Signed-off-by: Justin Stitt Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230911-strncpy-arch-x86-xen-efi-c-v1-1-96ab2bba2feb@google.com Signed-off-by: Juergen Gross --- arch/x86/xen/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 863d0d6b3edc..7250d0e0e1a9 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -138,7 +138,7 @@ void __init xen_efi_init(struct boot_params *boot_params) if (efi_systab_xen == NULL) return; - strncpy((char *)&boot_params->efi_info.efi_loader_signature, "Xen", + strscpy((char *)&boot_params->efi_info.efi_loader_signature, "Xen", sizeof(boot_params->efi_info.efi_loader_signature)); boot_params->efi_info.efi_systab = (__u32)__pa(efi_systab_xen); boot_params->efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32); From 263cb0cc5abac7c22a6c0dfa7e50e89d8e6c6900 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 30 Aug 2023 23:06:14 +0200 Subject: [PATCH 305/333] media: imx-mipi-csis: Remove an incorrect fwnode_handle_put() call The commit in Fixes has removed an fwnode_graph_get_endpoint_by_id() call in mipi_csis_subdev_init(). So the reference that was taken should not be released anymore in the error handling path of the probe and in the remove function. Remove the now incorrect fwnode_handle_put() calls. Fixes: 1029939b3782 ("media: v4l: async: Simplify async sub-device fwnode matching") Signed-off-by: Christophe JAILLET Reviewed-by: Laurent Pinchart Reviewed-by: Rui Miguel Silva Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- drivers/media/platform/nxp/imx-mipi-csis.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/media/platform/nxp/imx-mipi-csis.c b/drivers/media/platform/nxp/imx-mipi-csis.c index 16f19a640130..5f93712bf485 100644 --- a/drivers/media/platform/nxp/imx-mipi-csis.c +++ b/drivers/media/platform/nxp/imx-mipi-csis.c @@ -1490,7 +1490,6 @@ err_cleanup: v4l2_async_unregister_subdev(&csis->sd); err_disable_clock: mipi_csis_clk_disable(csis); - fwnode_handle_put(csis->sd.fwnode); return ret; } @@ -1510,7 +1509,6 @@ static void mipi_csis_remove(struct platform_device *pdev) mipi_csis_clk_disable(csis); v4l2_subdev_cleanup(&csis->sd); media_entity_cleanup(&csis->sd.entity); - fwnode_handle_put(csis->sd.fwnode); pm_runtime_set_suspended(&pdev->dev); } From aadb0330cfb60829318ef02ccfb9dd09cd14d920 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 19 Sep 2023 10:12:05 +0300 Subject: [PATCH 306/333] ALSA: usb-audio: scarlett_gen2: Fix another -Wformat-truncation warning The recent enablement of -Wformat-truncation leads to a false-positive warning for mixer_scarlett_gen2.c. For suppressing the warning, replace snprintf() with scnprintf(). As stated in the above, truncation doesn't matter. Fixes: 78bd8f5126f8 ("ALSA: usb-audio: scarlett_gen2: Fix -Wformat-truncation warning") Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230919071205.10684-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 5c6f50f38840..d260be8cb6bc 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -3205,8 +3205,8 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer) /* Add input phantom controls */ if (info->inputs_per_phantom == 1) { for (i = 0; i < info->phantom_count; i++) { - snprintf(s, sizeof(s), fmt, i + 1, - "Phantom Power", "Switch"); + scnprintf(s, sizeof(s), fmt, i + 1, + "Phantom Power", "Switch"); err = scarlett2_add_new_ctl( mixer, &scarlett2_phantom_ctl, i, 1, s, &private->phantom_ctls[i]); From 8070274b472e2e9f5f67a990f5e697634c415708 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 18 Sep 2023 00:53:28 +0800 Subject: [PATCH 307/333] net: stmmac: fix incorrect rxq|txq_stats reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 133466c3bbe1 ("net: stmmac: use per-queue 64 bit statistics where necessary") caused one regression as found by Uwe, the backtrace looks like: INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-rc1-00449-g133466c3bbe1-dirty #21 Hardware name: STM32 (Device Tree Support) unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x60/0x90 dump_stack_lvl from register_lock_class+0x98c/0x99c register_lock_class from __lock_acquire+0x74/0x293c __lock_acquire from lock_acquire+0x134/0x398 lock_acquire from stmmac_get_stats64+0x2ac/0x2fc stmmac_get_stats64 from dev_get_stats+0x44/0x130 dev_get_stats from rtnl_fill_stats+0x38/0x120 rtnl_fill_stats from rtnl_fill_ifinfo+0x834/0x17f4 rtnl_fill_ifinfo from rtmsg_ifinfo_build_skb+0xc0/0x144 rtmsg_ifinfo_build_skb from rtmsg_ifinfo+0x50/0x88 rtmsg_ifinfo from __dev_notify_flags+0xc0/0xec __dev_notify_flags from dev_change_flags+0x50/0x5c dev_change_flags from ip_auto_config+0x2f4/0x1260 ip_auto_config from do_one_initcall+0x70/0x35c do_one_initcall from kernel_init_freeable+0x2ac/0x308 kernel_init_freeable from kernel_init+0x1c/0x138 kernel_init from ret_from_fork+0x14/0x2c The reason is the rxq|txq_stats structures are not what expected because stmmac_open() -> __stmmac_open() the structure is overwritten by "memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf));" This causes the well initialized syncp member of rxq|txq_stats is overwritten unexpectedly as pointed out by Johannes and Uwe. Fix this issue by moving rxq|txq_stats back to stmmac_extra_stats. For SMP cache friendly, we also mark stmmac_txq_stats and stmmac_rxq_stats as ____cacheline_aligned_in_smp. Fixes: 133466c3bbe1 ("net: stmmac: use per-queue 64 bit statistics where necessary") Signed-off-by: Jisheng Zhang Reported-by: Uwe Kleine-König Tested-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20230917165328.3403-1-jszhang@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/common.h | 7 +- .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 16 +-- .../net/ethernet/stmicro/stmmac/dwmac4_lib.c | 16 +-- .../net/ethernet/stmicro/stmmac/dwmac_lib.c | 16 +-- .../ethernet/stmicro/stmmac/dwxgmac2_dma.c | 16 +-- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 - .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 32 ++--- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 125 ++++++++++-------- 8 files changed, 120 insertions(+), 110 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 403cb397d4d3..1e996c29043d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -70,7 +70,7 @@ struct stmmac_txq_stats { u64 tx_tso_frames; u64 tx_tso_nfrags; struct u64_stats_sync syncp; -}; +} ____cacheline_aligned_in_smp; struct stmmac_rxq_stats { u64 rx_bytes; @@ -79,7 +79,7 @@ struct stmmac_rxq_stats { u64 rx_normal_irq_n; u64 napi_poll; struct u64_stats_sync syncp; -}; +} ____cacheline_aligned_in_smp; /* Extra statistic and debug information exposed by ethtool */ struct stmmac_extra_stats { @@ -202,6 +202,9 @@ struct stmmac_extra_stats { unsigned long mtl_est_hlbf; unsigned long mtl_est_btre; unsigned long mtl_est_btrlm; + /* per queue statistics */ + struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES]; + struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES]; unsigned long rx_dropped; unsigned long rx_errors; unsigned long tx_dropped; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 01e77368eef1..465ff1fd4785 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -441,8 +441,8 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv, struct stmmac_extra_stats *x, u32 chan, u32 dir) { - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan]; - struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; int ret = 0; u32 v; @@ -455,9 +455,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv, if (v & EMAC_TX_INT) { ret |= handle_tx; - u64_stats_update_begin(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_normal_irq_n++; - u64_stats_update_end(&tx_q->txq_stats.syncp); + u64_stats_update_begin(&txq_stats->syncp); + txq_stats->tx_normal_irq_n++; + u64_stats_update_end(&txq_stats->syncp); } if (v & EMAC_TX_DMA_STOP_INT) @@ -479,9 +479,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv, if (v & EMAC_RX_INT) { ret |= handle_rx; - u64_stats_update_begin(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_normal_irq_n++; - u64_stats_update_end(&rx_q->rxq_stats.syncp); + u64_stats_update_begin(&rxq_stats->syncp); + rxq_stats->rx_normal_irq_n++; + u64_stats_update_end(&rxq_stats->syncp); } if (v & EMAC_RX_BUF_UA_INT) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 980e5f8a37ec..9470d3fd2ded 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -171,8 +171,8 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs; u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan)); u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan]; - struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; int ret = 0; if (dir == DMA_DIR_RX) @@ -201,15 +201,15 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, } /* TX/RX NORMAL interrupts */ if (likely(intr_status & DMA_CHAN_STATUS_RI)) { - u64_stats_update_begin(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_normal_irq_n++; - u64_stats_update_end(&rx_q->rxq_stats.syncp); + u64_stats_update_begin(&rxq_stats->syncp); + rxq_stats->rx_normal_irq_n++; + u64_stats_update_end(&rxq_stats->syncp); ret |= handle_rx; } if (likely(intr_status & DMA_CHAN_STATUS_TI)) { - u64_stats_update_begin(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_normal_irq_n++; - u64_stats_update_end(&tx_q->txq_stats.syncp); + u64_stats_update_begin(&txq_stats->syncp); + txq_stats->tx_normal_irq_n++; + u64_stats_update_end(&txq_stats->syncp); ret |= handle_tx; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index aaa09b16b016..7907d62d3437 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -162,8 +162,8 @@ static void show_rx_process_state(unsigned int status) int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, struct stmmac_extra_stats *x, u32 chan, u32 dir) { - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan]; - struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; int ret = 0; /* read the status register (CSR5) */ u32 intr_status = readl(ioaddr + DMA_STATUS); @@ -215,16 +215,16 @@ int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, u32 value = readl(ioaddr + DMA_INTR_ENA); /* to schedule NAPI on real RIE event. */ if (likely(value & DMA_INTR_ENA_RIE)) { - u64_stats_update_begin(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_normal_irq_n++; - u64_stats_update_end(&rx_q->rxq_stats.syncp); + u64_stats_update_begin(&rxq_stats->syncp); + rxq_stats->rx_normal_irq_n++; + u64_stats_update_end(&rxq_stats->syncp); ret |= handle_rx; } } if (likely(intr_status & DMA_STATUS_TI)) { - u64_stats_update_begin(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_normal_irq_n++; - u64_stats_update_end(&tx_q->txq_stats.syncp); + u64_stats_update_begin(&txq_stats->syncp); + txq_stats->tx_normal_irq_n++; + u64_stats_update_end(&txq_stats->syncp); ret |= handle_tx; } if (unlikely(intr_status & DMA_STATUS_ERI)) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index fa69d64a8694..3cde695fec91 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -337,8 +337,8 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, struct stmmac_extra_stats *x, u32 chan, u32 dir) { - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan]; - struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan)); u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); int ret = 0; @@ -367,15 +367,15 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, /* TX/RX NORMAL interrupts */ if (likely(intr_status & XGMAC_NIS)) { if (likely(intr_status & XGMAC_RI)) { - u64_stats_update_begin(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_normal_irq_n++; - u64_stats_update_end(&rx_q->rxq_stats.syncp); + u64_stats_update_begin(&rxq_stats->syncp); + rxq_stats->rx_normal_irq_n++; + u64_stats_update_end(&rxq_stats->syncp); ret |= handle_rx; } if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { - u64_stats_update_begin(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_normal_irq_n++; - u64_stats_update_end(&tx_q->txq_stats.syncp); + u64_stats_update_begin(&txq_stats->syncp); + txq_stats->tx_normal_irq_n++; + u64_stats_update_end(&txq_stats->syncp); ret |= handle_tx; } } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 3401e888a9f6..cd7a9768de5f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -78,7 +78,6 @@ struct stmmac_tx_queue { dma_addr_t dma_tx_phy; dma_addr_t tx_tail_addr; u32 mss; - struct stmmac_txq_stats txq_stats; }; struct stmmac_rx_buffer { @@ -123,7 +122,6 @@ struct stmmac_rx_queue { unsigned int len; unsigned int error; } state; - struct stmmac_rxq_stats rxq_stats; }; struct stmmac_channel { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index b7ac7abecdd3..6aa5c0556d22 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -548,14 +548,14 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data) pos = data; for (q = 0; q < tx_cnt; q++) { - struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[q]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q]; struct stmmac_txq_stats snapshot; data = pos; do { - start = u64_stats_fetch_begin(&tx_q->txq_stats.syncp); - snapshot = tx_q->txq_stats; - } while (u64_stats_fetch_retry(&tx_q->txq_stats.syncp, start)); + start = u64_stats_fetch_begin(&txq_stats->syncp); + snapshot = *txq_stats; + } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); p = (char *)&snapshot + offsetof(struct stmmac_txq_stats, tx_pkt_n); for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) { @@ -566,14 +566,14 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data) pos = data; for (q = 0; q < rx_cnt; q++) { - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[q]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q]; struct stmmac_rxq_stats snapshot; data = pos; do { - start = u64_stats_fetch_begin(&rx_q->rxq_stats.syncp); - snapshot = rx_q->rxq_stats; - } while (u64_stats_fetch_retry(&rx_q->rxq_stats.syncp, start)); + start = u64_stats_fetch_begin(&rxq_stats->syncp); + snapshot = *rxq_stats; + } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); p = (char *)&snapshot + offsetof(struct stmmac_rxq_stats, rx_pkt_n); for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) { @@ -637,14 +637,14 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, pos = j; for (i = 0; i < rx_queues_count; i++) { - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[i]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[i]; struct stmmac_rxq_stats snapshot; j = pos; do { - start = u64_stats_fetch_begin(&rx_q->rxq_stats.syncp); - snapshot = rx_q->rxq_stats; - } while (u64_stats_fetch_retry(&rx_q->rxq_stats.syncp, start)); + start = u64_stats_fetch_begin(&rxq_stats->syncp); + snapshot = *rxq_stats; + } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); data[j++] += snapshot.rx_pkt_n; data[j++] += snapshot.rx_normal_irq_n; @@ -654,14 +654,14 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, pos = j; for (i = 0; i < tx_queues_count; i++) { - struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[i]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[i]; struct stmmac_txq_stats snapshot; j = pos; do { - start = u64_stats_fetch_begin(&tx_q->txq_stats.syncp); - snapshot = tx_q->txq_stats; - } while (u64_stats_fetch_retry(&tx_q->txq_stats.syncp, start)); + start = u64_stats_fetch_begin(&txq_stats->syncp); + snapshot = *txq_stats; + } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); data[j++] += snapshot.tx_pkt_n; data[j++] += snapshot.tx_normal_irq_n; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2206789802bf..83c567a89a46 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2426,6 +2426,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) { struct netdev_queue *nq = netdev_get_tx_queue(priv->dev, queue); struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue]; struct xsk_buff_pool *pool = tx_q->xsk_pool; unsigned int entry = tx_q->cur_tx; struct dma_desc *tx_desc = NULL; @@ -2505,9 +2506,9 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); entry = tx_q->cur_tx; } - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_set_ic_bit += tx_set_ic_bit; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->tx_set_ic_bit += tx_set_ic_bit; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); if (tx_desc) { stmmac_flush_tx_descriptors(priv, queue); @@ -2547,6 +2548,7 @@ static void stmmac_bump_dma_threshold(struct stmmac_priv *priv, u32 chan) static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) { struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue]; unsigned int bytes_compl = 0, pkts_compl = 0; unsigned int entry, xmits = 0, count = 0; u32 tx_packets = 0, tx_errors = 0; @@ -2706,11 +2708,11 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) if (tx_q->dirty_tx != tx_q->cur_tx) stmmac_tx_timer_arm(priv, queue); - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_packets += tx_packets; - tx_q->txq_stats.tx_pkt_n += tx_packets; - tx_q->txq_stats.tx_clean++; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->tx_packets += tx_packets; + txq_stats->tx_pkt_n += tx_packets; + txq_stats->tx_clean++; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); priv->xstats.tx_errors += tx_errors; @@ -4114,6 +4116,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) int nfrags = skb_shinfo(skb)->nr_frags; u32 queue = skb_get_queue_mapping(skb); unsigned int first_entry, tx_packets; + struct stmmac_txq_stats *txq_stats; int tmp_pay_len = 0, first_tx; struct stmmac_tx_queue *tx_q; bool has_vlan, set_ic; @@ -4124,6 +4127,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) int i; tx_q = &priv->dma_conf.tx_queue[queue]; + txq_stats = &priv->xstats.txq_stats[queue]; first_tx = tx_q->cur_tx; /* Compute header lengths */ @@ -4282,13 +4286,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue)); } - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_bytes += skb->len; - tx_q->txq_stats.tx_tso_frames++; - tx_q->txq_stats.tx_tso_nfrags += nfrags; + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->tx_bytes += skb->len; + txq_stats->tx_tso_frames++; + txq_stats->tx_tso_nfrags += nfrags; if (set_ic) - tx_q->txq_stats.tx_set_ic_bit++; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + txq_stats->tx_set_ic_bit++; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); if (priv->sarc_type) stmmac_set_desc_sarc(priv, first, priv->sarc_type); @@ -4359,6 +4363,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) u32 queue = skb_get_queue_mapping(skb); int nfrags = skb_shinfo(skb)->nr_frags; int gso = skb_shinfo(skb)->gso_type; + struct stmmac_txq_stats *txq_stats; struct dma_edesc *tbs_desc = NULL; struct dma_desc *desc, *first; struct stmmac_tx_queue *tx_q; @@ -4368,6 +4373,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) dma_addr_t des; tx_q = &priv->dma_conf.tx_queue[queue]; + txq_stats = &priv->xstats.txq_stats[queue]; first_tx = tx_q->cur_tx; if (priv->tx_path_in_lpi_mode && priv->eee_sw_timer_en) @@ -4519,11 +4525,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue)); } - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_bytes += skb->len; + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->tx_bytes += skb->len; if (set_ic) - tx_q->txq_stats.tx_set_ic_bit++; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + txq_stats->tx_set_ic_bit++; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); if (priv->sarc_type) stmmac_set_desc_sarc(priv, first, priv->sarc_type); @@ -4730,6 +4736,7 @@ static unsigned int stmmac_rx_buf2_len(struct stmmac_priv *priv, static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, struct xdp_frame *xdpf, bool dma_map) { + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue]; struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; unsigned int entry = tx_q->cur_tx; struct dma_desc *tx_desc; @@ -4789,9 +4796,9 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, unsigned long flags; tx_q->tx_count_frames = 0; stmmac_set_tx_ic(priv, tx_desc); - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_set_ic_bit++; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->tx_set_ic_bit++; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); } stmmac_enable_dma_transmission(priv, priv->ioaddr); @@ -4936,7 +4943,7 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue, struct dma_desc *p, struct dma_desc *np, struct xdp_buff *xdp) { - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[queue]; struct stmmac_channel *ch = &priv->channel[queue]; unsigned int len = xdp->data_end - xdp->data; enum pkt_hash_types hash_type; @@ -4966,10 +4973,10 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue, skb_record_rx_queue(skb, queue); napi_gro_receive(&ch->rxtx_napi, skb); - flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_pkt_n++; - rx_q->rxq_stats.rx_bytes += len; - u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags); + flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); + rxq_stats->rx_pkt_n++; + rxq_stats->rx_bytes += len; + u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); } static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget) @@ -5042,6 +5049,7 @@ static struct stmmac_xdp_buff *xsk_buff_to_stmmac_ctx(struct xdp_buff *xdp) static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue) { + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[queue]; struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue]; unsigned int count = 0, error = 0, len = 0; int dirty = stmmac_rx_dirty(priv, queue); @@ -5205,9 +5213,9 @@ read_again: stmmac_finalize_xdp_rx(priv, xdp_status); - flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_pkt_n += count; - u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags); + flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); + rxq_stats->rx_pkt_n += count; + u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); priv->xstats.rx_dropped += rx_dropped; priv->xstats.rx_errors += rx_errors; @@ -5235,6 +5243,7 @@ read_again: static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) { u32 rx_errors = 0, rx_dropped = 0, rx_bytes = 0, rx_packets = 0; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[queue]; struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue]; struct stmmac_channel *ch = &priv->channel[queue]; unsigned int count = 0, error = 0, len = 0; @@ -5496,11 +5505,11 @@ drain_data: stmmac_rx_refill(priv, queue); - flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_packets += rx_packets; - rx_q->rxq_stats.rx_bytes += rx_bytes; - rx_q->rxq_stats.rx_pkt_n += count; - u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags); + flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); + rxq_stats->rx_packets += rx_packets; + rxq_stats->rx_bytes += rx_bytes; + rxq_stats->rx_pkt_n += count; + u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); priv->xstats.rx_dropped += rx_dropped; priv->xstats.rx_errors += rx_errors; @@ -5513,15 +5522,15 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget) struct stmmac_channel *ch = container_of(napi, struct stmmac_channel, rx_napi); struct stmmac_priv *priv = ch->priv_data; - struct stmmac_rx_queue *rx_q; + struct stmmac_rxq_stats *rxq_stats; u32 chan = ch->index; unsigned long flags; int work_done; - rx_q = &priv->dma_conf.rx_queue[chan]; - flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.napi_poll++; - u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags); + rxq_stats = &priv->xstats.rxq_stats[chan]; + flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); + rxq_stats->napi_poll++; + u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); work_done = stmmac_rx(priv, budget, chan); if (work_done < budget && napi_complete_done(napi, work_done)) { @@ -5540,15 +5549,15 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) struct stmmac_channel *ch = container_of(napi, struct stmmac_channel, tx_napi); struct stmmac_priv *priv = ch->priv_data; - struct stmmac_tx_queue *tx_q; + struct stmmac_txq_stats *txq_stats; u32 chan = ch->index; unsigned long flags; int work_done; - tx_q = &priv->dma_conf.tx_queue[chan]; - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.napi_poll++; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + txq_stats = &priv->xstats.txq_stats[chan]; + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->napi_poll++; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); work_done = stmmac_tx_clean(priv, budget, chan); work_done = min(work_done, budget); @@ -5570,20 +5579,20 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget) container_of(napi, struct stmmac_channel, rxtx_napi); struct stmmac_priv *priv = ch->priv_data; int rx_done, tx_done, rxtx_done; - struct stmmac_rx_queue *rx_q; - struct stmmac_tx_queue *tx_q; + struct stmmac_rxq_stats *rxq_stats; + struct stmmac_txq_stats *txq_stats; u32 chan = ch->index; unsigned long flags; - rx_q = &priv->dma_conf.rx_queue[chan]; - flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.napi_poll++; - u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags); + rxq_stats = &priv->xstats.rxq_stats[chan]; + flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); + rxq_stats->napi_poll++; + u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); - tx_q = &priv->dma_conf.tx_queue[chan]; - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.napi_poll++; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + txq_stats = &priv->xstats.txq_stats[chan]; + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->napi_poll++; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); tx_done = stmmac_tx_clean(priv, budget, chan); tx_done = min(tx_done, budget); @@ -6926,7 +6935,7 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64 int q; for (q = 0; q < tx_cnt; q++) { - struct stmmac_txq_stats *txq_stats = &priv->dma_conf.tx_queue[q].txq_stats; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q]; u64 tx_packets; u64 tx_bytes; @@ -6941,7 +6950,7 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64 } for (q = 0; q < rx_cnt; q++) { - struct stmmac_rxq_stats *rxq_stats = &priv->dma_conf.rx_queue[q].rxq_stats; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q]; u64 rx_packets; u64 rx_bytes; @@ -7342,9 +7351,9 @@ int stmmac_dvr_probe(struct device *device, priv->dev = ndev; for (i = 0; i < MTL_MAX_RX_QUEUES; i++) - u64_stats_init(&priv->dma_conf.rx_queue[i].rxq_stats.syncp); + u64_stats_init(&priv->xstats.rxq_stats[i].syncp); for (i = 0; i < MTL_MAX_TX_QUEUES; i++) - u64_stats_init(&priv->dma_conf.tx_queue[i].txq_stats.syncp); + u64_stats_init(&priv->xstats.txq_stats[i].syncp); stmmac_set_ethtool_ops(ndev); priv->pause = pause; From bd3caddf299a640efb66c6022efed7fe744db626 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Mon, 18 Sep 2023 15:48:36 +0800 Subject: [PATCH 308/333] net: hns3: add cmdq check for vf periodic service task When the vf cmdq is disabled, there is no need to keep these task running. So this patch skip these task when the cmdq is disabled. Fixes: ff200099d271 ("net: hns3: remove unnecessary work in hclgevf_main") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 7a2f9233d695..a4d68fb216fb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1855,7 +1855,8 @@ static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev) unsigned long delta = round_jiffies_relative(HZ); struct hnae3_handle *handle = &hdev->nic; - if (test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) + if (test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state) || + test_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state)) return; if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) { From f9f651261130cdcb7adc9a3e365b356bc2749ab3 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Mon, 18 Sep 2023 15:48:37 +0800 Subject: [PATCH 309/333] net: hns3: fix GRE checksum offload issue The device_version V3 hardware can't offload the checksum for IP in GRE packets, but can do it for NvGRE. So default to disable the checksum and GSO offload for GRE, but keep the ability to enable it when only using NvGRE. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index b4895c7b3efd..cf50368441b7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3353,6 +3353,15 @@ static void hns3_set_default_feature(struct net_device *netdev) NETIF_F_HW_TC); netdev->hw_enc_features |= netdev->vlan_features | NETIF_F_TSO_MANGLEID; + + /* The device_version V3 hardware can't offload the checksum for IP in + * GRE packets, but can do it for NvGRE. So default to disable the + * checksum and GSO offload for GRE. + */ + if (ae_dev->dev_version > HNAE3_DEVICE_VERSION_V2) { + netdev->features &= ~NETIF_F_GSO_GRE; + netdev->features &= ~NETIF_F_GSO_GRE_CSUM; + } } static int hns3_alloc_buffer(struct hns3_enet_ring *ring, From f2ed304922a55690529bcca59678dd92d7466ce8 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Mon, 18 Sep 2023 15:48:38 +0800 Subject: [PATCH 310/333] net: hns3: only enable unicast promisc when mac table full Currently, the driver will enable unicast promisc for the function once configure mac address fail. It's unreasonable when the failure is caused by using same mac address with other functions. So only enable unicast promisc when mac table full. Fixes: c631c696823c ("net: hns3: refactor the promisc mode setting") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 8ca368424436..c0d03283775f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -8824,7 +8824,7 @@ static void hclge_update_overflow_flags(struct hclge_vport *vport, if (mac_type == HCLGE_MAC_ADDR_UC) { if (is_all_added) vport->overflow_promisc_flags &= ~HNAE3_OVERFLOW_UPE; - else + else if (hclge_is_umv_space_full(vport, true)) vport->overflow_promisc_flags |= HNAE3_OVERFLOW_UPE; } else { if (is_all_added) From 1a7be66e4685b8541546222c305cce9710718a88 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Mon, 18 Sep 2023 15:48:39 +0800 Subject: [PATCH 311/333] net: hns3: fix fail to delete tc flower rules during reset issue Firmware does not respond driver commands during reset Therefore, rule will fail to delete while the firmware is resetting So, if failed to delete rule, set rule state to TO_DEL, and the rule will be deleted when periodic task being scheduled. Fixes: 0205ec041ec6 ("net: hns3: add support for hw tc offload of tc flower") Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c0d03283775f..2bd77871f3bf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -7348,6 +7348,12 @@ static int hclge_del_cls_flower(struct hnae3_handle *handle, ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, rule->location, NULL, false); if (ret) { + /* if tcam config fail, set rule state to TO_DEL, + * so the rule will be deleted when periodic + * task being scheduled. + */ + hclge_update_fd_list(hdev, HCLGE_FD_TO_DEL, rule->location, NULL); + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); spin_unlock_bh(&hdev->fd_rule_lock); return ret; } From 0770063096d5da4a8e467b6e73c1646a75589628 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Mon, 18 Sep 2023 15:48:40 +0800 Subject: [PATCH 312/333] net: hns3: add 5ms delay before clear firmware reset irq source Currently the reset process in hns3 and firmware watchdog init process is asynchronous. we think firmware watchdog initialization is completed before hns3 clear the firmware interrupt source. However, firmware initialization may not complete early. so we add delay before hns3 clear firmware interrupt source and 5 ms delay is enough to avoid second firmware reset interrupt. Fixes: c1a81619d73a ("net: hns3: Add mailbox interrupt handling to PF driver") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 2bd77871f3bf..c42574e29747 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3564,9 +3564,14 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) static void hclge_clear_event_cause(struct hclge_dev *hdev, u32 event_type, u32 regclr) { +#define HCLGE_IMP_RESET_DELAY 5 + switch (event_type) { case HCLGE_VECTOR0_EVENT_PTP: case HCLGE_VECTOR0_EVENT_RST: + if (regclr == BIT(HCLGE_VECTOR0_IMPRESET_INT_B)) + mdelay(HCLGE_IMP_RESET_DELAY); + hclge_write_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG, regclr); break; case HCLGE_VECTOR0_EVENT_MBX: From 44bdb313da57322c9b3c108eb66981c6ec6509f4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Sep 2023 09:13:51 +0000 Subject: [PATCH 313/333] net: bridge: use DEV_STATS_INC() syzbot/KCSAN reported data-races in br_handle_frame_finish() [1] This function can run from multiple cpus without mutual exclusion. Adopt SMP safe DEV_STATS_INC() to update dev->stats fields. Handles updates to dev->stats.tx_dropped while we are at it. [1] BUG: KCSAN: data-race in br_handle_frame_finish / br_handle_frame_finish read-write to 0xffff8881374b2178 of 8 bytes by interrupt on cpu 1: br_handle_frame_finish+0xd4f/0xef0 net/bridge/br_input.c:189 br_nf_hook_thresh+0x1ed/0x220 br_nf_pre_routing_finish_ipv6+0x50f/0x540 NF_HOOK include/linux/netfilter.h:304 [inline] br_nf_pre_routing_ipv6+0x1e3/0x2a0 net/bridge/br_netfilter_ipv6.c:178 br_nf_pre_routing+0x526/0xba0 net/bridge/br_netfilter_hooks.c:508 nf_hook_entry_hookfn include/linux/netfilter.h:144 [inline] nf_hook_bridge_pre net/bridge/br_input.c:272 [inline] br_handle_frame+0x4c9/0x940 net/bridge/br_input.c:417 __netif_receive_skb_core+0xa8a/0x21e0 net/core/dev.c:5417 __netif_receive_skb_one_core net/core/dev.c:5521 [inline] __netif_receive_skb+0x57/0x1b0 net/core/dev.c:5637 process_backlog+0x21f/0x380 net/core/dev.c:5965 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 run_ksoftirqd+0x17/0x20 kernel/softirq.c:921 smpboot_thread_fn+0x30a/0x4a0 kernel/smpboot.c:164 kthread+0x1d7/0x210 kernel/kthread.c:388 ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 read-write to 0xffff8881374b2178 of 8 bytes by interrupt on cpu 0: br_handle_frame_finish+0xd4f/0xef0 net/bridge/br_input.c:189 br_nf_hook_thresh+0x1ed/0x220 br_nf_pre_routing_finish_ipv6+0x50f/0x540 NF_HOOK include/linux/netfilter.h:304 [inline] br_nf_pre_routing_ipv6+0x1e3/0x2a0 net/bridge/br_netfilter_ipv6.c:178 br_nf_pre_routing+0x526/0xba0 net/bridge/br_netfilter_hooks.c:508 nf_hook_entry_hookfn include/linux/netfilter.h:144 [inline] nf_hook_bridge_pre net/bridge/br_input.c:272 [inline] br_handle_frame+0x4c9/0x940 net/bridge/br_input.c:417 __netif_receive_skb_core+0xa8a/0x21e0 net/core/dev.c:5417 __netif_receive_skb_one_core net/core/dev.c:5521 [inline] __netif_receive_skb+0x57/0x1b0 net/core/dev.c:5637 process_backlog+0x21f/0x380 net/core/dev.c:5965 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 do_softirq+0x5e/0x90 kernel/softirq.c:454 __local_bh_enable_ip+0x64/0x70 kernel/softirq.c:381 __raw_spin_unlock_bh include/linux/spinlock_api_smp.h:167 [inline] _raw_spin_unlock_bh+0x36/0x40 kernel/locking/spinlock.c:210 spin_unlock_bh include/linux/spinlock.h:396 [inline] batadv_tt_local_purge+0x1a8/0x1f0 net/batman-adv/translation-table.c:1356 batadv_tt_purge+0x2b/0x630 net/batman-adv/translation-table.c:3560 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703 worker_thread+0x525/0x730 kernel/workqueue.c:2784 kthread+0x1d7/0x210 kernel/kthread.c:388 ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 value changed: 0x00000000000d7190 -> 0x00000000000d7191 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 14848 Comm: kworker/u4:11 Not tainted 6.6.0-rc1-syzkaller-00236-gad8a69f361b9 #0 Fixes: 1c29fc4989bc ("[BRIDGE]: keep track of received multicast packets") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Roopa Prabhu Cc: Nikolay Aleksandrov Cc: bridge@lists.linux-foundation.org Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20230918091351.1356153-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/bridge/br_forward.c | 4 ++-- net/bridge/br_input.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 9d7bc8b96b53..7431f89e897b 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -124,7 +124,7 @@ static int deliver_clone(const struct net_bridge_port *prev, skb = skb_clone(skb, GFP_ATOMIC); if (!skb) { - dev->stats.tx_dropped++; + DEV_STATS_INC(dev, tx_dropped); return -ENOMEM; } @@ -268,7 +268,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, skb = skb_copy(skb, GFP_ATOMIC); if (!skb) { - dev->stats.tx_dropped++; + DEV_STATS_INC(dev, tx_dropped); return; } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index c34a0b0901b0..c729528b5e85 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -181,12 +181,12 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if ((mdst && mdst->host_joined) || br_multicast_is_router(brmctx, skb)) { local_rcv = true; - br->dev->stats.multicast++; + DEV_STATS_INC(br->dev, multicast); } mcast_hit = true; } else { local_rcv = true; - br->dev->stats.multicast++; + DEV_STATS_INC(br->dev, multicast); } break; case BR_PKT_UNICAST: From deff8486a40e75813f2841f533c7572489981bae Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 19 Sep 2023 09:11:53 +0100 Subject: [PATCH 314/333] ALSA: hda: cs35l56: Use the new RUNTIME_PM_OPS() macro Use RUNTIME_PM_OPS() instead of the old SET_RUNTIME_PM_OPS(). This means we don't need __maybe_unused on the functions. Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230919081153.19793-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 87ffe8fbff99..7adc1d373d65 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -105,7 +105,7 @@ static void cs35l56_hda_playback_hook(struct device *dev, int action) } } -static int __maybe_unused cs35l56_hda_runtime_suspend(struct device *dev) +static int cs35l56_hda_runtime_suspend(struct device *dev) { struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev); @@ -115,7 +115,7 @@ static int __maybe_unused cs35l56_hda_runtime_suspend(struct device *dev) return cs35l56_runtime_suspend_common(&cs35l56->base); } -static int __maybe_unused cs35l56_hda_runtime_resume(struct device *dev) +static int cs35l56_hda_runtime_resume(struct device *dev) { struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev); int ret; @@ -1015,7 +1015,7 @@ void cs35l56_hda_remove(struct device *dev) EXPORT_SYMBOL_NS_GPL(cs35l56_hda_remove, SND_HDA_SCODEC_CS35L56); const struct dev_pm_ops cs35l56_hda_pm_ops = { - SET_RUNTIME_PM_OPS(cs35l56_hda_runtime_suspend, cs35l56_hda_runtime_resume, NULL) + RUNTIME_PM_OPS(cs35l56_hda_runtime_suspend, cs35l56_hda_runtime_resume, NULL) SYSTEM_SLEEP_PM_OPS(cs35l56_hda_system_suspend, cs35l56_hda_system_resume) LATE_SYSTEM_SLEEP_PM_OPS(cs35l56_hda_system_suspend_late, cs35l56_hda_system_resume_early) From 41b07476da38ac2878a14e5b8fe0312c41ea36e3 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 19 Sep 2023 16:27:16 +0800 Subject: [PATCH 315/333] ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support New platform SSID:0x231f. 0x17 was only speaker pin, DAC assigned will be 0x03. Headphone assigned to 0x02. Playback via headphone will get EQ filter processing. So, it needs to swap DAC. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/8d63c6e360124e3ea2523753050e6f05@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 883a7e865bc5..751783f3a15c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10577,6 +10577,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x17, 0x90170110}, {0x19, 0x03a11030}, {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0287, 0x17aa, "Lenovo", ALC287_FIXUP_THINKPAD_I2S_SPK, + {0x17, 0x90170110}, /* 0x231f with RTK I2S AMP */ + {0x19, 0x04a11040}, + {0x21, 0x04211020}), SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE, {0x12, 0x90a60130}, {0x17, 0x90170110}, From 492032760127251e5540a5716a70996bacf2a3fd Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Mon, 18 Sep 2023 20:30:11 +0800 Subject: [PATCH 316/333] team: fix null-ptr-deref when team device type is changed Get a null-ptr-deref bug as follows with reproducer [1]. BUG: kernel NULL pointer dereference, address: 0000000000000228 ... RIP: 0010:vlan_dev_hard_header+0x35/0x140 [8021q] ... Call Trace: ? __die+0x24/0x70 ? page_fault_oops+0x82/0x150 ? exc_page_fault+0x69/0x150 ? asm_exc_page_fault+0x26/0x30 ? vlan_dev_hard_header+0x35/0x140 [8021q] ? vlan_dev_hard_header+0x8e/0x140 [8021q] neigh_connected_output+0xb2/0x100 ip6_finish_output2+0x1cb/0x520 ? nf_hook_slow+0x43/0xc0 ? ip6_mtu+0x46/0x80 ip6_finish_output+0x2a/0xb0 mld_sendpack+0x18f/0x250 mld_ifc_work+0x39/0x160 process_one_work+0x1e6/0x3f0 worker_thread+0x4d/0x2f0 ? __pfx_worker_thread+0x10/0x10 kthread+0xe5/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 [1] $ teamd -t team0 -d -c '{"runner": {"name": "loadbalance"}}' $ ip link add name t-dummy type dummy $ ip link add link t-dummy name t-dummy.100 type vlan id 100 $ ip link add name t-nlmon type nlmon $ ip link set t-nlmon master team0 $ ip link set t-nlmon nomaster $ ip link set t-dummy up $ ip link set team0 up $ ip link set t-dummy.100 down $ ip link set t-dummy.100 master team0 When enslave a vlan device to team device and team device type is changed from non-ether to ether, header_ops of team device is changed to vlan_header_ops. That is incorrect and will trigger null-ptr-deref for vlan->real_dev in vlan_dev_hard_header() because team device is not a vlan device. Cache eth_header_ops in team_setup(), then assign cached header_ops to header_ops of team net device when its type is changed from non-ether to ether to fix the bug. Fixes: 1d76efe1577b ("team: add support for non-ethernet devices") Suggested-by: Hangbin Liu Reviewed-by: Hangbin Liu Signed-off-by: Ziyang Xuan Reviewed-by: Jiri Pirko Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230918123011.1884401-1-william.xuanziyang@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/team/team.c | 10 +++++++++- include/linux/if_team.h | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index e8b94580194e..508d9a392ab1 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2115,7 +2115,12 @@ static const struct ethtool_ops team_ethtool_ops = { static void team_setup_by_port(struct net_device *dev, struct net_device *port_dev) { - dev->header_ops = port_dev->header_ops; + struct team *team = netdev_priv(dev); + + if (port_dev->type == ARPHRD_ETHER) + dev->header_ops = team->header_ops_cache; + else + dev->header_ops = port_dev->header_ops; dev->type = port_dev->type; dev->hard_header_len = port_dev->hard_header_len; dev->needed_headroom = port_dev->needed_headroom; @@ -2162,8 +2167,11 @@ static int team_dev_type_check_change(struct net_device *dev, static void team_setup(struct net_device *dev) { + struct team *team = netdev_priv(dev); + ether_setup(dev); dev->max_mtu = ETH_MAX_MTU; + team->header_ops_cache = dev->header_ops; dev->netdev_ops = &team_netdev_ops; dev->ethtool_ops = &team_ethtool_ops; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 1b9b15a492fa..cdc684e04a2f 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -189,6 +189,8 @@ struct team { struct net_device *dev; /* associated netdevice */ struct team_pcpu_stats __percpu *pcpu_stats; + const struct header_ops *header_ops_cache; + struct mutex lock; /* used for overall locking, e.g. port lists write */ /* From f1d95df0f31048f1c59092648997686e3f7d9478 Mon Sep 17 00:00:00 2001 From: Artem Chernyshev Date: Mon, 18 Sep 2023 16:56:23 +0300 Subject: [PATCH 317/333] net: rds: Fix possible NULL-pointer dereference In rds_rdma_cm_event_handler_cmn() check, if conn pointer exists before dereferencing it as rdma_set_service_type() argument Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: fd261ce6a30e ("rds: rdma: update rdma transport for tos") Signed-off-by: Artem Chernyshev Signed-off-by: David S. Miller --- net/rds/rdma_transport.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index d36f3f6b4351..b15cf316b23a 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -86,11 +86,13 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_ADDR_RESOLVED: - rdma_set_service_type(cm_id, conn->c_tos); - rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32); - /* XXX do we need to clean up if this fails? */ - ret = rdma_resolve_route(cm_id, - RDS_RDMA_RESOLVE_TIMEOUT_MS); + if (conn) { + rdma_set_service_type(cm_id, conn->c_tos); + rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32); + /* XXX do we need to clean up if this fails? */ + ret = rdma_resolve_route(cm_id, + RDS_RDMA_RESOLVE_TIMEOUT_MS); + } break; case RDMA_CM_EVENT_ROUTE_RESOLVED: From 4e4b1798cc90e376b8b61d0098b4093898a32227 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 18 Sep 2023 11:40:15 -0400 Subject: [PATCH 318/333] vxlan: Add missing entries to vxlan_get_size() There are some attributes added by vxlan_fill_info() which are not accounted for in vxlan_get_size(). Add them. I didn't find a way to trigger an actual problem from this miscalculation since there is usually extra space in netlink size calculations like if_nlmsg_size(); but maybe I just didn't search long enough. Fixes: 3511494ce2f3 ("vxlan: Group Policy extension") Fixes: e1e5314de08b ("vxlan: implement GPE") Fixes: 0ace2ca89cbd ("vxlan: Use checksum partial with remote checksum offload") Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device") Signed-off-by: Benjamin Poirier Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index e463f59e95c2..5b5597073b00 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -4331,6 +4331,10 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LOCALBYPASS */ + nla_total_size(0) + /* IFLA_VXLAN_GBP */ + nla_total_size(0) + /* IFLA_VXLAN_GPE */ + nla_total_size(0) + /* IFLA_VXLAN_REMCSUM_NOPARTIAL */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_VNIFILTER */ 0; } From c9bd26513b3a11b3adb3c2ed8a31a01a87173ff1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 15 Sep 2023 15:18:11 +0200 Subject: [PATCH 319/333] netfilter: nf_tables: disable toggling dormant table state more than once nft -f -< Cc: Bing-Jhong Billy Jheng Cc: info@starlabs.sg Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d819b4d42962..a3680638ec60 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1219,6 +1219,10 @@ static int nf_tables_updtable(struct nft_ctx *ctx) flags & NFT_TABLE_F_OWNER)) return -EOPNOTSUPP; + /* No dormant off/on/off/on games in single transaction */ + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return -EINVAL; + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, sizeof(struct nft_trans_table)); if (trans == NULL) From cf5000a7787cbc10341091d37245a42c119d26c5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 19 Sep 2023 15:36:13 +0200 Subject: [PATCH 320/333] netfilter: nf_tables: fix memleak when more than 255 elements expired When more than 255 elements expired we're supposed to switch to a new gc container structure. This never happens: u8 type will wrap before reaching the boundary and nft_trans_gc_space() always returns true. This means we recycle the initial gc container structure and lose track of the elements that came before. While at it, don't deref 'gc' after we've passed it to call_rcu. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a4455f4995ab..7c816359d5a9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1682,7 +1682,7 @@ struct nft_trans_gc { struct net *net; struct nft_set *set; u32 seq; - u8 count; + u16 count; void *priv[NFT_TRANS_GC_BATCHCOUNT]; struct rcu_head rcu; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a3680638ec60..4356189360fb 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9579,12 +9579,15 @@ static int nft_trans_gc_space(struct nft_trans_gc *trans) struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, unsigned int gc_seq, gfp_t gfp) { + struct nft_set *set; + if (nft_trans_gc_space(gc)) return gc; + set = gc->set; nft_trans_gc_queue_work(gc); - return nft_trans_gc_alloc(gc->set, gc_seq, gfp); + return nft_trans_gc_alloc(set, gc_seq, gfp); } void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) @@ -9599,15 +9602,18 @@ void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) { + struct nft_set *set; + if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net))) return NULL; if (nft_trans_gc_space(gc)) return gc; + set = gc->set; call_rcu(&gc->rcu, nft_trans_gc_trans_free); - return nft_trans_gc_alloc(gc->set, 0, gfp); + return nft_trans_gc_alloc(set, 0, gfp); } void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) From 7433b6d2afd512d04398c73aa984d1e285be125b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 19 Sep 2023 20:04:45 +0200 Subject: [PATCH 321/333] netfilter: ipset: Fix race between IPSET_CMD_CREATE and IPSET_CMD_SWAP Kyle Zeng reported that there is a race between IPSET_CMD_ADD and IPSET_CMD_SWAP in netfilter/ip_set, which can lead to the invocation of `__ip_set_put` on a wrong `set`, triggering the `BUG_ON(set->ref == 0);` check in it. The race is caused by using the wrong reference counter, i.e. the ref counter instead of ref_netlink. Fixes: 24e227896bbf ("netfilter: ipset: Add schedule point in call_ad().") Reported-by: Kyle Zeng Closes: https://lore.kernel.org/netfilter-devel/ZPZqetxOmH+w%2Fmyc@westworld/#r Tested-by: Kyle Zeng Signed-off-by: Jozsef Kadlecsik Signed-off-by: Florian Westphal --- net/netfilter/ipset/ip_set_core.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e564b5174261..35d2f9c9ada0 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -682,6 +682,14 @@ __ip_set_put(struct ip_set *set) /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need * a separate reference counter */ +static void +__ip_set_get_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + set->ref_netlink++; + write_unlock_bh(&ip_set_ref_lock); +} + static void __ip_set_put_netlink(struct ip_set *set) { @@ -1693,11 +1701,11 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb, do { if (retried) { - __ip_set_get(set); + __ip_set_get_netlink(set); nfnl_unlock(NFNL_SUBSYS_IPSET); cond_resched(); nfnl_lock(NFNL_SUBSYS_IPSET); - __ip_set_put(set); + __ip_set_put_netlink(set); } ip_set_lock(set); From 22b6e7f3d6d51ff2716480f3d8f3098d90d69165 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Tue, 19 Sep 2023 10:27:15 +0800 Subject: [PATCH 322/333] net: hinic: Fix warning-hinic_set_vlan_fliter() warn: variable dereferenced before check 'hwdev' 'hwdev' is checked too late and hwdev will not be NULL, so remove the check Fixes: 2acf960e3be6 ("net: hinic: Add support for configuration of rx-vlan-filter by ethtool") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202309112354.pikZCmyk-lkp@intel.com/ Signed-off-by: Cai Huoqing Reviewed-by: Vadim Fedorenko Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_port.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c index 9406237c461e..f81a43d2cdfc 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_port.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c @@ -456,9 +456,6 @@ int hinic_set_vlan_fliter(struct hinic_dev *nic_dev, u32 en) u16 out_size = sizeof(vlan_filter); int err; - if (!hwdev) - return -EINVAL; - vlan_filter.func_idx = HINIC_HWIF_FUNC_IDX(hwif); vlan_filter.enable = en; From 4a0f07d71b0483cc08c03cefa7c85749e187c214 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 19 Sep 2023 18:44:06 +0800 Subject: [PATCH 323/333] net/handshake: Fix memory leak in __sock_create() and sock_alloc_file() When making CONFIG_DEBUG_KMEMLEAK=y and CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y, modprobe handshake-test and then rmmmod handshake-test, the below memory leak is detected. The struct socket_alloc which is allocated by alloc_inode_sb() in __sock_create() is not freed. And the struct dentry which is allocated by __d_alloc() in sock_alloc_file() is not freed. Since fput() will call file->f_op->release() which is sock_close() here and it will call __sock_release(). and fput() will call dput(dentry) to free the struct dentry. So replace sock_release() with fput() to fix the below memory leak. After applying this patch, the following memory leak is never detected. unreferenced object 0xffff888109165840 (size 768): comm "kunit_try_catch", pid 1852, jiffies 4294685807 (age 976.262s) hex dump (first 32 bytes): 01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00 ......ZZ ....... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] sock_alloc_inode+0x1f/0x1b0 [] alloc_inode+0x5b/0x1a0 [] new_inode_pseudo+0xd/0x70 [] sock_alloc+0x3c/0x260 [] __sock_create+0x66/0x3d0 [] 0xffffffffa0209ba2 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810f472008 (size 192): comm "kunit_try_catch", pid 1852, jiffies 4294685808 (age 976.261s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 08 20 47 0f 81 88 ff ff ......... G..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0209bbb [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810958e580 (size 224): comm "kunit_try_catch", pid 1852, jiffies 4294685808 (age 976.261s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0209bbb [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810926dc88 (size 192): comm "kunit_try_catch", pid 1854, jiffies 4294685809 (age 976.271s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 88 dc 26 09 81 88 ff ff ..........&..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0208fdc [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810a241380 (size 224): comm "kunit_try_catch", pid 1854, jiffies 4294685809 (age 976.271s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0208fdc [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888109165040 (size 768): comm "kunit_try_catch", pid 1856, jiffies 4294685811 (age 976.269s) hex dump (first 32 bytes): 01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00 ......ZZ ....... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] sock_alloc_inode+0x1f/0x1b0 [] alloc_inode+0x5b/0x1a0 [] new_inode_pseudo+0xd/0x70 [] sock_alloc+0x3c/0x260 [] __sock_create+0x66/0x3d0 [] 0xffffffffa0208860 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810926d568 (size 192): comm "kunit_try_catch", pid 1856, jiffies 4294685811 (age 976.269s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 68 d5 26 09 81 88 ff ff ........h.&..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0208879 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810a240580 (size 224): comm "kunit_try_catch", pid 1856, jiffies 4294685811 (age 976.347s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0208879 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888109164c40 (size 768): comm "kunit_try_catch", pid 1858, jiffies 4294685816 (age 976.342s) hex dump (first 32 bytes): 01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00 ......ZZ ....... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] sock_alloc_inode+0x1f/0x1b0 [] alloc_inode+0x5b/0x1a0 [] new_inode_pseudo+0xd/0x70 [] sock_alloc+0x3c/0x260 [] __sock_create+0x66/0x3d0 [] 0xffffffffa0208541 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810926cd18 (size 192): comm "kunit_try_catch", pid 1858, jiffies 4294685816 (age 976.342s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 18 cd 26 09 81 88 ff ff ..........&..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa020855a [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810a240200 (size 224): comm "kunit_try_catch", pid 1858, jiffies 4294685816 (age 976.342s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa020855a [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888109164840 (size 768): comm "kunit_try_catch", pid 1860, jiffies 4294685817 (age 976.416s) hex dump (first 32 bytes): 01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00 ......ZZ ....... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] sock_alloc_inode+0x1f/0x1b0 [] alloc_inode+0x5b/0x1a0 [] new_inode_pseudo+0xd/0x70 [] sock_alloc+0x3c/0x260 [] __sock_create+0x66/0x3d0 [] 0xffffffffa02093e2 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810926cab8 (size 192): comm "kunit_try_catch", pid 1860, jiffies 4294685817 (age 976.416s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 b8 ca 26 09 81 88 ff ff ..........&..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa02093fb [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810a240040 (size 224): comm "kunit_try_catch", pid 1860, jiffies 4294685817 (age 976.416s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa02093fb [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888109166440 (size 768): comm "kunit_try_catch", pid 1862, jiffies 4294685819 (age 976.489s) hex dump (first 32 bytes): 01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00 ......ZZ ....... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] sock_alloc_inode+0x1f/0x1b0 [] alloc_inode+0x5b/0x1a0 [] new_inode_pseudo+0xd/0x70 [] sock_alloc+0x3c/0x260 [] __sock_create+0x66/0x3d0 [] 0xffffffffa02097c1 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810926c398 (size 192): comm "kunit_try_catch", pid 1862, jiffies 4294685819 (age 976.489s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 98 c3 26 09 81 88 ff ff ..........&..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa02097da [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888107e0b8c0 (size 224): comm "kunit_try_catch", pid 1862, jiffies 4294685819 (age 976.489s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa02097da [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888109164440 (size 768): comm "kunit_try_catch", pid 1864, jiffies 4294685821 (age 976.487s) hex dump (first 32 bytes): 01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00 ......ZZ ....... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] sock_alloc_inode+0x1f/0x1b0 [] alloc_inode+0x5b/0x1a0 [] new_inode_pseudo+0xd/0x70 [] sock_alloc+0x3c/0x260 [] __sock_create+0x66/0x3d0 [] 0xffffffffa020824e [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810f4cf698 (size 192): comm "kunit_try_catch", pid 1864, jiffies 4294685821 (age 976.501s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 98 f6 4c 0f 81 88 ff ff ..........L..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0208267 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888107e0b000 (size 224): comm "kunit_try_catch", pid 1864, jiffies 4294685821 (age 976.501s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0208267 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 Fixes: 88232ec1ec5e ("net/handshake: Add Kunit tests for the handshake consumer API") Signed-off-by: Jinjie Ruan Signed-off-by: David S. Miller --- net/handshake/handshake-test.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c index 6d37bab35c8f..16ed7bfd29e4 100644 --- a/net/handshake/handshake-test.c +++ b/net/handshake/handshake-test.c @@ -235,7 +235,7 @@ static void handshake_req_submit_test4(struct kunit *test) KUNIT_EXPECT_PTR_EQ(test, req, result); handshake_req_cancel(sock->sk); - sock_release(sock); + fput(filp); } static void handshake_req_submit_test5(struct kunit *test) @@ -272,7 +272,7 @@ static void handshake_req_submit_test5(struct kunit *test) /* Assert */ KUNIT_EXPECT_EQ(test, err, -EAGAIN); - sock_release(sock); + fput(filp); hn->hn_pending = saved; } @@ -306,7 +306,7 @@ static void handshake_req_submit_test6(struct kunit *test) KUNIT_EXPECT_EQ(test, err, -EBUSY); handshake_req_cancel(sock->sk); - sock_release(sock); + fput(filp); } static void handshake_req_cancel_test1(struct kunit *test) @@ -340,7 +340,7 @@ static void handshake_req_cancel_test1(struct kunit *test) /* Assert */ KUNIT_EXPECT_TRUE(test, result); - sock_release(sock); + fput(filp); } static void handshake_req_cancel_test2(struct kunit *test) @@ -382,7 +382,7 @@ static void handshake_req_cancel_test2(struct kunit *test) /* Assert */ KUNIT_EXPECT_TRUE(test, result); - sock_release(sock); + fput(filp); } static void handshake_req_cancel_test3(struct kunit *test) @@ -427,7 +427,7 @@ static void handshake_req_cancel_test3(struct kunit *test) /* Assert */ KUNIT_EXPECT_FALSE(test, result); - sock_release(sock); + fput(filp); } static struct handshake_req *handshake_req_destroy_test; @@ -471,7 +471,7 @@ static void handshake_req_destroy_test1(struct kunit *test) handshake_req_cancel(sock->sk); /* Act */ - sock_release(sock); + fput(filp); /* Assert */ KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req); From db58b5eea8a47da3bed6b128dfcbdaf336c6a244 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Sep 2023 16:40:22 +0200 Subject: [PATCH 324/333] Revert "tmpfs: add support for multigrain timestamps" This reverts commit d48c3397291690c3576d6c983b0a86ecbc203cac. Users reported regressions due to enabling multi-grained timestamps unconditionally. As no clear consensus on a solution has come up and the discussion has gone back to the drawing board revert the infrastructure changes for. If it isn't code that's here to stay, make it go away. Message-ID: <20230920-keine-eile-c9755b5825db@brauner> Acked-by: Jan Kara Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 02e62fccc80d..69595d341882 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4586,7 +4586,7 @@ static struct file_system_type shmem_fs_type = { #endif .kill_sb = kill_litter_super, #ifdef CONFIG_SHMEM - .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP | FS_MGTIME, + .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP, #else .fs_flags = FS_USERNS_MOUNT, #endif From f798accd5987dc2280e0ba9055edf1124af46a5f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Sep 2023 16:41:18 +0200 Subject: [PATCH 325/333] Revert "xfs: switch to multigrain timestamps" This reverts commit e44df2664746aed8b6dd5245eb711a0ce33c5cf5. Users reported regressions due to enabling multi-grained timestamps unconditionally. As no clear consensus on a solution has come up and the discussion has gone back to the drawing board revert the infrastructure changes for. If it isn't code that's here to stay, make it go away. Message-ID: <20230920-keine-eile-c9755b5825db@brauner> Acked-by: Jan Kara Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/xfs/libxfs/xfs_trans_inode.c | 6 +++--- fs/xfs/xfs_iops.c | 6 +++--- fs/xfs/xfs_super.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index ad22656376d3..6b2296ff248a 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -62,12 +62,12 @@ xfs_trans_ichgtime( ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - /* If the mtime changes, then ctime must also change */ - ASSERT(flags & XFS_ICHGTIME_CHG); + tv = current_time(inode); - tv = inode_set_ctime_current(inode); if (flags & XFS_ICHGTIME_MOD) inode->i_mtime = tv; + if (flags & XFS_ICHGTIME_CHG) + inode_set_ctime_to_ts(inode, tv); if (flags & XFS_ICHGTIME_CREATE) ip->i_crtime = tv; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 2ededd3f6b8c..1c1e6171209d 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -573,10 +573,10 @@ xfs_vn_getattr( stat->gid = vfsgid_into_kgid(vfsgid); stat->ino = ip->i_ino; stat->atime = inode->i_atime; + stat->mtime = inode->i_mtime; + stat->ctime = inode_get_ctime(inode); stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks); - fill_mg_cmtime(stat, request_mask, inode); - if (xfs_has_v3inodes(mp)) { if (request_mask & STATX_BTIME) { stat->result_mask |= STATX_BTIME; @@ -917,7 +917,7 @@ xfs_setattr_size( if (newsize != oldsize && !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { iattr->ia_ctime = iattr->ia_mtime = - current_mgtime(inode); + current_time(inode); iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 1f77014c6e1a..b5c202f5d96c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2065,7 +2065,7 @@ static struct file_system_type xfs_fs_type = { .init_fs_context = xfs_init_fs_context, .parameters = xfs_fs_parameters, .kill_sb = xfs_kill_sb, - .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("xfs"); From 50ec1d721e117496df0582e34f1f2f946a03e1be Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Sep 2023 16:41:45 +0200 Subject: [PATCH 326/333] Revert "ext4: switch to multigrain timestamps" This reverts commit 0269b585868e59b6a2ecc6ea685d39310e4fc18b. Users reported regressions due to enabling multi-grained timestamps unconditionally. As no clear consensus on a solution has come up and the discussion has gone back to the drawing board revert the infrastructure changes for. If it isn't code that's here to stay, make it go away. Message-ID: <20230920-keine-eile-c9755b5825db@brauner> Acked-by: Jan Kara Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 38217422f938..dbebd8b3127e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -7314,7 +7314,7 @@ static struct file_system_type ext4_fs_type = { .init_fs_context = ext4_init_fs_context, .parameters = ext4_param_specs, .kill_sb = ext4_kill_sb, - .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("ext4"); From efd34f0316169bf182c40d3b63068ca95df6bb99 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Sep 2023 16:41:47 +0200 Subject: [PATCH 327/333] Revert "btrfs: convert to multigrain timestamps" This reverts commit 50e9ceef1d4f644ee0049e82e360058a64ec284c. Users reported regressions due to enabling multi-grained timestamps unconditionally. As no clear consensus on a solution has come up and the discussion has gone back to the drawing board revert the infrastructure changes for. If it isn't code that's here to stay, make it go away. Message-ID: <20230920-keine-eile-c9755b5825db@brauner> Acked-by: Jan Kara Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/btrfs/file.c | 24 ++++++++++++++++++++---- fs/btrfs/super.c | 5 ++--- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ca46a529d56b..ad6f401ac0e1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1106,6 +1106,25 @@ void btrfs_check_nocow_unlock(struct btrfs_inode *inode) btrfs_drew_write_unlock(&inode->root->snapshot_lock); } +static void update_time_for_write(struct inode *inode) +{ + struct timespec64 now, ctime; + + if (IS_NOCMTIME(inode)) + return; + + now = current_time(inode); + if (!timespec64_equal(&inode->i_mtime, &now)) + inode->i_mtime = now; + + ctime = inode_get_ctime(inode); + if (!timespec64_equal(&ctime, &now)) + inode_set_ctime_to_ts(inode, now); + + if (IS_I_VERSION(inode)) + inode_inc_iversion(inode); +} + static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count) { @@ -1137,10 +1156,7 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, * need to start yet another transaction to update the inode as we will * update the inode when we finish writing whatever data we write. */ - if (!IS_NOCMTIME(inode)) { - inode->i_mtime = inode_set_ctime_current(inode); - inode_inc_iversion(inode); - } + update_time_for_write(inode); start_pos = round_down(pos, fs_info->sectorsize); oldsize = i_size_read(inode); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 09bfe68d2ea3..cffdd6f7f8e8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2150,7 +2150,7 @@ static struct file_system_type btrfs_fs_type = { .name = "btrfs", .mount = btrfs_mount, .kill_sb = btrfs_kill_super, - .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_MGTIME, + .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, }; static struct file_system_type btrfs_root_fs_type = { @@ -2158,8 +2158,7 @@ static struct file_system_type btrfs_root_fs_type = { .name = "btrfs", .mount = btrfs_mount_root, .kill_sb = btrfs_kill_super, - .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | - FS_ALLOW_IDMAP | FS_MGTIME, + .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("btrfs"); From 647aa768281f38cb1002edb3a1f673c3d66a8d81 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Sep 2023 16:40:13 +0200 Subject: [PATCH 328/333] Revert "fs: add infrastructure for multigrain timestamps" This reverts commit ffb6cf19e06334062744b7e3493f71e500964f8e. Users reported regressions due to enabling multi-grained timestamps unconditionally. As no clear consensus on a solution has come up and the discussion has gone back to the drawing board revert the infrastructure changes for. If it isn't code that's here to stay, make it go away. Message-ID: <20230920-keine-eile-c9755b5825db@brauner> Acked-by: Jan Kara Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/inode.c | 82 ++-------------------------------------------- fs/stat.c | 41 ++--------------------- include/linux/fs.h | 46 ++------------------------ 3 files changed, 7 insertions(+), 162 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 35fd688168c5..84bc3c76e5cc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2102,52 +2102,10 @@ int file_remove_privs(struct file *file) } EXPORT_SYMBOL(file_remove_privs); -/** - * current_mgtime - Return FS time (possibly fine-grained) - * @inode: inode. - * - * Return the current time truncated to the time granularity supported by - * the fs, as suitable for a ctime/mtime change. If the ctime is flagged - * as having been QUERIED, get a fine-grained timestamp. - */ -struct timespec64 current_mgtime(struct inode *inode) -{ - struct timespec64 now, ctime; - atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec; - long nsec = atomic_long_read(pnsec); - - if (nsec & I_CTIME_QUERIED) { - ktime_get_real_ts64(&now); - return timestamp_truncate(now, inode); - } - - ktime_get_coarse_real_ts64(&now); - now = timestamp_truncate(now, inode); - - /* - * If we've recently fetched a fine-grained timestamp - * then the coarse-grained one may still be earlier than the - * existing ctime. Just keep the existing value if so. - */ - ctime = inode_get_ctime(inode); - if (timespec64_compare(&ctime, &now) > 0) - now = ctime; - - return now; -} -EXPORT_SYMBOL(current_mgtime); - -static struct timespec64 current_ctime(struct inode *inode) -{ - if (is_mgtime(inode)) - return current_mgtime(inode); - return current_time(inode); -} - static int inode_needs_update_time(struct inode *inode) { int sync_it = 0; - struct timespec64 now = current_ctime(inode); + struct timespec64 now = current_time(inode); struct timespec64 ctime; /* First try to exhaust all avenues to not sync */ @@ -2578,43 +2536,9 @@ EXPORT_SYMBOL(current_time); */ struct timespec64 inode_set_ctime_current(struct inode *inode) { - struct timespec64 now; - struct timespec64 ctime; + struct timespec64 now = current_time(inode); - ctime.tv_nsec = READ_ONCE(inode->__i_ctime.tv_nsec); - if (!(ctime.tv_nsec & I_CTIME_QUERIED)) { - now = current_time(inode); - - /* Just copy it into place if it's not multigrain */ - if (!is_mgtime(inode)) { - inode_set_ctime_to_ts(inode, now); - return now; - } - - /* - * If we've recently updated with a fine-grained timestamp, - * then the coarse-grained one may still be earlier than the - * existing ctime. Just keep the existing value if so. - */ - ctime.tv_sec = inode->__i_ctime.tv_sec; - if (timespec64_compare(&ctime, &now) > 0) - return ctime; - - /* - * Ctime updates are usually protected by the inode_lock, but - * we can still race with someone setting the QUERIED flag. - * Try to swap the new nsec value into place. If it's changed - * in the interim, then just go with a fine-grained timestamp. - */ - if (cmpxchg(&inode->__i_ctime.tv_nsec, ctime.tv_nsec, - now.tv_nsec) != ctime.tv_nsec) - goto fine_grained; - inode->__i_ctime.tv_sec = now.tv_sec; - return now; - } -fine_grained: - ktime_get_real_ts64(&now); - inode_set_ctime_to_ts(inode, timestamp_truncate(now, inode)); + inode_set_ctime(inode, now.tv_sec, now.tv_nsec); return now; } EXPORT_SYMBOL(inode_set_ctime_current); diff --git a/fs/stat.c b/fs/stat.c index 6e60389d6a15..d43a5cc1bfa4 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -26,37 +26,6 @@ #include "internal.h" #include "mount.h" -/** - * fill_mg_cmtime - Fill in the mtime and ctime and flag ctime as QUERIED - * @stat: where to store the resulting values - * @request_mask: STATX_* values requested - * @inode: inode from which to grab the c/mtime - * - * Given @inode, grab the ctime and mtime out if it and store the result - * in @stat. When fetching the value, flag it as queried so the next write - * will use a fine-grained timestamp. - */ -void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode) -{ - atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec; - - /* If neither time was requested, then don't report them */ - if (!(request_mask & (STATX_CTIME|STATX_MTIME))) { - stat->result_mask &= ~(STATX_CTIME|STATX_MTIME); - return; - } - - stat->mtime = inode->i_mtime; - stat->ctime.tv_sec = inode->__i_ctime.tv_sec; - /* - * Atomically set the QUERIED flag and fetch the new value with - * the flag masked off. - */ - stat->ctime.tv_nsec = atomic_long_fetch_or(I_CTIME_QUERIED, pnsec) & - ~I_CTIME_QUERIED; -} -EXPORT_SYMBOL(fill_mg_cmtime); - /** * generic_fillattr - Fill in the basic attributes from the inode struct * @idmap: idmap of the mount the inode was found from @@ -89,14 +58,8 @@ void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask, stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); stat->atime = inode->i_atime; - - if (is_mgtime(inode)) { - fill_mg_cmtime(stat, request_mask, inode); - } else { - stat->mtime = inode->i_mtime; - stat->ctime = inode_get_ctime(inode); - } - + stat->mtime = inode->i_mtime; + stat->ctime = inode_get_ctime(inode); stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4aeb3fa11927..b528f063e8ff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1508,47 +1508,18 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kgid_has_mapping(fs_userns, kgid); } -struct timespec64 current_mgtime(struct inode *inode); struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); -/* - * Multigrain timestamps - * - * Conditionally use fine-grained ctime and mtime timestamps when there - * are users actively observing them via getattr. The primary use-case - * for this is NFS clients that use the ctime to distinguish between - * different states of the file, and that are often fooled by multiple - * operations that occur in the same coarse-grained timer tick. - * - * The kernel always keeps normalized struct timespec64 values in the ctime, - * which means that only the first 30 bits of the value are used. Use the - * 31st bit of the ctime's tv_nsec field as a flag to indicate that the value - * has been queried since it was last updated. - */ -#define I_CTIME_QUERIED (1L<<30) - /** * inode_get_ctime - fetch the current ctime from the inode * @inode: inode from which to fetch ctime * - * Grab the current ctime tv_nsec field from the inode, mask off the - * I_CTIME_QUERIED flag and return it. This is mostly intended for use by - * internal consumers of the ctime that aren't concerned with ensuring a - * fine-grained update on the next change (e.g. when preparing to store - * the value in the backing store for later retrieval). - * - * This is safe to call regardless of whether the underlying filesystem - * is using multigrain timestamps. + * Grab the current ctime from the inode and return it. */ static inline struct timespec64 inode_get_ctime(const struct inode *inode) { - struct timespec64 ctime; - - ctime.tv_sec = inode->__i_ctime.tv_sec; - ctime.tv_nsec = inode->__i_ctime.tv_nsec & ~I_CTIME_QUERIED; - - return ctime; + return inode->__i_ctime; } /** @@ -2334,7 +2305,6 @@ struct file_system_type { #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ -#define FS_MGTIME 64 /* FS uses multigrain timestamps */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; @@ -2358,17 +2328,6 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) -/** - * is_mgtime: is this inode using multigrain timestamps - * @inode: inode to test for multigrain timestamps - * - * Return true if the inode uses multigrain timestamps, false otherwise. - */ -static inline bool is_mgtime(const struct inode *inode) -{ - return inode->i_sb->s_type->fs_flags & FS_MGTIME; -} - extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -3054,7 +3013,6 @@ extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); -void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode); void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); From 6f411fb5ca9419090bee6a0a46425e0a5060b734 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 18 Sep 2023 17:36:09 +0200 Subject: [PATCH 329/333] net: ena: Flush XDP packets on error. xdp_do_flush() should be invoked before leaving the NAPI poll function after a XDP-redirect. This is not the case if the driver leaves via the error path (after having a redirect in one of its previous iterations). Invoke xdp_do_flush() also in the error path. Cc: Arthur Kiyanovski Cc: David Arinzon Cc: Noam Dagan Cc: Saeed Bishara Cc: Shay Agroskin Fixes: a318c70ad152b ("net: ena: introduce XDP redirect implementation") Acked-by: Arthur Kiyanovski Signed-off-by: Sebastian Andrzej Siewior Acked-by: Jesper Dangaard Brouer Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index ad32ca81f7ef..f955bde10cf9 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1833,6 +1833,9 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, return work_done; error: + if (xdp_flags & ENA_XDP_REDIRECT) + xdp_do_flush(); + adapter = netdev_priv(rx_ring->netdev); if (rc == -ENOSPC) { From edc0140cc3b7b91874ebe70eb7d2a851e8817ccc Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 18 Sep 2023 17:36:10 +0200 Subject: [PATCH 330/333] bnxt_en: Flush XDP for bnxt_poll_nitroa0()'s NAPI bnxt_poll_nitroa0() invokes bnxt_rx_pkt() which can run a XDP program which in turn can return XDP_REDIRECT. bnxt_rx_pkt() is also used by __bnxt_poll_work() which flushes (xdp_do_flush()) the packets after each round. bnxt_poll_nitroa0() lacks this feature. xdp_do_flush() should be invoked before leaving the NAPI callback. Invoke xdp_do_flush() after a redirect in bnxt_poll_nitroa0() NAPI. Cc: Michael Chan Fixes: f18c2b77b2e4e ("bnxt_en: optimized XDP_REDIRECT support") Reviewed-by: Andy Gospodarek Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Michael Chan Acked-by: Jesper Dangaard Brouer Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5cc0dbe12132..7551aa8068f8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2614,6 +2614,7 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) struct rx_cmp_ext *rxcmp1; u32 cp_cons, tmp_raw_cons; u32 raw_cons = cpr->cp_raw_cons; + bool flush_xdp = false; u32 rx_pkts = 0; u8 event = 0; @@ -2648,6 +2649,8 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) rx_pkts++; else if (rc == -EBUSY) /* partial completion */ break; + if (event & BNXT_REDIRECT_EVENT) + flush_xdp = true; } else if (unlikely(TX_CMP_TYPE(txcmp) == CMPL_BASE_TYPE_HWRM_DONE)) { bnxt_hwrm_handler(bp, txcmp); @@ -2667,6 +2670,8 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) if (event & BNXT_AGG_EVENT) bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + if (flush_xdp) + xdp_do_flush(); if (!bnxt_has_work(bp, cpr) && rx_pkts < budget) { napi_complete_done(napi, rx_pkts); From 70b2b6892645e58ed6f051dad7f8d1083f0ad553 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 18 Sep 2023 17:36:11 +0200 Subject: [PATCH 331/333] octeontx2-pf: Do xdp_do_flush() after redirects. xdp_do_flush() should be invoked before leaving the NAPI poll function if XDP-redirect has been performed. Invoke xdp_do_flush() before leaving NAPI. Cc: Geetha sowjanya Cc: Subbaraya Sundeep Cc: Sunil Goutham Cc: hariprasad Fixes: 06059a1a9a4a5 ("octeontx2-pf: Add XDP support to netdev PF") Signed-off-by: Sebastian Andrzej Siewior Acked-by: Geethasowjanya Akula Acked-by: Jesper Dangaard Brouer Signed-off-by: Paolo Abeni --- .../marvell/octeontx2/nic/otx2_txrx.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index e77d43848955..53b2a4ef5298 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -29,7 +29,8 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, struct bpf_prog *prog, struct nix_cqe_rx_s *cqe, - struct otx2_cq_queue *cq); + struct otx2_cq_queue *cq, + bool *need_xdp_flush); static int otx2_nix_cq_op_status(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) @@ -337,7 +338,7 @@ static bool otx2_check_rcv_errors(struct otx2_nic *pfvf, static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf, struct napi_struct *napi, struct otx2_cq_queue *cq, - struct nix_cqe_rx_s *cqe) + struct nix_cqe_rx_s *cqe, bool *need_xdp_flush) { struct nix_rx_parse_s *parse = &cqe->parse; struct nix_rx_sg_s *sg = &cqe->sg; @@ -353,7 +354,7 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf, } if (pfvf->xdp_prog) - if (otx2_xdp_rcv_pkt_handler(pfvf, pfvf->xdp_prog, cqe, cq)) + if (otx2_xdp_rcv_pkt_handler(pfvf, pfvf->xdp_prog, cqe, cq, need_xdp_flush)) return; skb = napi_get_frags(napi); @@ -388,6 +389,7 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf, struct napi_struct *napi, struct otx2_cq_queue *cq, int budget) { + bool need_xdp_flush = false; struct nix_cqe_rx_s *cqe; int processed_cqe = 0; @@ -409,13 +411,15 @@ process_cqe: cq->cq_head++; cq->cq_head &= (cq->cqe_cnt - 1); - otx2_rcv_pkt_handler(pfvf, napi, cq, cqe); + otx2_rcv_pkt_handler(pfvf, napi, cq, cqe, &need_xdp_flush); cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID; cqe->sg.seg_addr = 0x00; processed_cqe++; cq->pend_cqe--; } + if (need_xdp_flush) + xdp_do_flush(); /* Free CQEs to HW */ otx2_write64(pfvf, NIX_LF_CQ_OP_DOOR, @@ -1354,7 +1358,8 @@ bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, u64 iova, int len, u16 qidx) static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, struct bpf_prog *prog, struct nix_cqe_rx_s *cqe, - struct otx2_cq_queue *cq) + struct otx2_cq_queue *cq, + bool *need_xdp_flush) { unsigned char *hard_start, *data; int qidx = cq->cq_idx; @@ -1391,8 +1396,10 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, DMA_FROM_DEVICE); - if (!err) + if (!err) { + *need_xdp_flush = true; return true; + } put_page(page); break; default: From 1703b2e0de653b459ca6230be32ce7f2ea0ae7ee Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Tue, 19 Sep 2023 10:03:31 -0700 Subject: [PATCH 332/333] igc: Expose tx-usecs coalesce setting to user When users attempt to obtain the coalesce setting using the ethtool command, current code always returns 0 for tx-usecs. This is because I225/6 always uses a queue pair setting, hence tx_coalesce_usecs does not return a value during the igc_ethtool_get_coalesce() callback process. The pair queue condition checking in igc_ethtool_get_coalesce() is removed by this patch so that the user gets information of the value of tx-usecs. Even if i225/6 is using queue pair setting, there is no harm in notifying the user of the tx-usecs. The implementation of the current code may have previously been a copy of the legacy code i210. Since I225 has the queue pair setting enabled, tx-usecs will always adhere to the user-set rx-usecs value. An error message will appear when the user attempts to set the tx-usecs value for the input parameters because, by default, they should only set the rx-usecs value. This patch also adds the helper function to get the previous rx coalesce value similar to tx coalesce. How to test: User can get the coalesce value using ethtool command. Example command: Get: ethtool -c Previous output: rx-usecs: 3 rx-frames: n/a rx-usecs-irq: n/a rx-frames-irq: n/a tx-usecs: 0 tx-frames: n/a tx-usecs-irq: n/a tx-frames-irq: n/a New output: rx-usecs: 3 rx-frames: n/a rx-usecs-irq: n/a rx-frames-irq: n/a tx-usecs: 3 tx-frames: n/a tx-usecs-irq: n/a tx-frames-irq: n/a Fixes: 8c5ad0dae93c ("igc: Add ethtool support") Signed-off-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20230919170331.1581031-1-anthony.l.nguyen@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 31 ++++++++++++-------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 93bce729be76..7ab6dd58e400 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -868,6 +868,18 @@ static void igc_ethtool_get_stats(struct net_device *netdev, spin_unlock(&adapter->stats64_lock); } +static int igc_ethtool_get_previous_rx_coalesce(struct igc_adapter *adapter) +{ + return (adapter->rx_itr_setting <= 3) ? + adapter->rx_itr_setting : adapter->rx_itr_setting >> 2; +} + +static int igc_ethtool_get_previous_tx_coalesce(struct igc_adapter *adapter) +{ + return (adapter->tx_itr_setting <= 3) ? + adapter->tx_itr_setting : adapter->tx_itr_setting >> 2; +} + static int igc_ethtool_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, @@ -875,17 +887,8 @@ static int igc_ethtool_get_coalesce(struct net_device *netdev, { struct igc_adapter *adapter = netdev_priv(netdev); - if (adapter->rx_itr_setting <= 3) - ec->rx_coalesce_usecs = adapter->rx_itr_setting; - else - ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2; - - if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) { - if (adapter->tx_itr_setting <= 3) - ec->tx_coalesce_usecs = adapter->tx_itr_setting; - else - ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2; - } + ec->rx_coalesce_usecs = igc_ethtool_get_previous_rx_coalesce(adapter); + ec->tx_coalesce_usecs = igc_ethtool_get_previous_tx_coalesce(adapter); return 0; } @@ -910,8 +913,12 @@ static int igc_ethtool_set_coalesce(struct net_device *netdev, ec->tx_coalesce_usecs == 2) return -EINVAL; - if ((adapter->flags & IGC_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs) + if ((adapter->flags & IGC_FLAG_QUEUE_PAIRS) && + ec->tx_coalesce_usecs != igc_ethtool_get_previous_tx_coalesce(adapter)) { + NL_SET_ERR_MSG_MOD(extack, + "Queue Pair mode enabled, both Rx and Tx coalescing controlled by rx-usecs"); return -EINVAL; + } /* If ITR is disabled, disable DMAC */ if (ec->rx_coalesce_usecs == 0) { From fc21f08375dbf654bd1fda748261955de580ac14 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 19 Sep 2023 19:39:49 +0100 Subject: [PATCH 333/333] sfc: handle error pointers returned by rhashtable_lookup_get_insert_fast() Several places in TC offload code assumed that the return from rhashtable_lookup_get_insert_fast() was always either NULL or a valid pointer to an existing entry, but in fact that function can return an error pointer. In that case, perform the usual cleanup of the newly created entry, then pass up the error, rather than attempting to take a reference on the old entry. Fixes: d902e1a737d4 ("sfc: bare bones TC offload on EF100") Reported-by: Dan Carpenter Signed-off-by: Edward Cree Link: https://lore.kernel.org/r/20230919183949.59392-1-edward.cree@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/sfc/tc.c | 21 ++++++++++++++++++--- drivers/net/ethernet/sfc/tc_conntrack.c | 7 ++++++- drivers/net/ethernet/sfc/tc_counters.c | 2 ++ drivers/net/ethernet/sfc/tc_encap_actions.c | 4 ++++ 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 047322b04d4f..834f000ba1c4 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -136,6 +136,8 @@ static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx if (old) { /* don't need our new entry */ kfree(ped); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return ERR_CAST(old); if (!refcount_inc_not_zero(&old->ref)) return ERR_PTR(-EAGAIN); /* existing entry found, ref taken */ @@ -602,6 +604,8 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, kfree(encap); if (pseudo) /* don't need our new pseudo either */ efx_tc_flower_release_encap_match(efx, pseudo); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return PTR_ERR(old); /* check old and new em_types are compatible */ switch (old->type) { case EFX_TC_EM_DIRECT: @@ -700,6 +704,8 @@ static struct efx_tc_recirc_id *efx_tc_get_recirc_id(struct efx_nic *efx, if (old) { /* don't need our new entry */ kfree(rid); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return ERR_CAST(old); if (!refcount_inc_not_zero(&old->ref)) return ERR_PTR(-EAGAIN); /* existing entry found */ @@ -1482,7 +1488,10 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht, &rule->linkage, efx_tc_match_action_ht_params); - if (old) { + if (IS_ERR(old)) { + rc = PTR_ERR(old); + goto release; + } else if (old) { netif_dbg(efx, drv, efx->net_dev, "Ignoring already-offloaded rule (cookie %lx)\n", tc->cookie); @@ -1697,7 +1706,10 @@ static int efx_tc_flower_replace_lhs(struct efx_nic *efx, old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht, &rule->linkage, efx_tc_lhs_rule_ht_params); - if (old) { + if (IS_ERR(old)) { + rc = PTR_ERR(old); + goto release; + } else if (old) { netif_dbg(efx, drv, efx->net_dev, "Already offloaded rule (cookie %lx)\n", tc->cookie); rc = -EEXIST; @@ -1858,7 +1870,10 @@ static int efx_tc_flower_replace(struct efx_nic *efx, old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht, &rule->linkage, efx_tc_match_action_ht_params); - if (old) { + if (IS_ERR(old)) { + rc = PTR_ERR(old); + goto release; + } else if (old) { netif_dbg(efx, drv, efx->net_dev, "Already offloaded rule (cookie %lx)\n", tc->cookie); NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded"); diff --git a/drivers/net/ethernet/sfc/tc_conntrack.c b/drivers/net/ethernet/sfc/tc_conntrack.c index 8e06bfbcbea1..44bb57670340 100644 --- a/drivers/net/ethernet/sfc/tc_conntrack.c +++ b/drivers/net/ethernet/sfc/tc_conntrack.c @@ -298,7 +298,10 @@ static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone, old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_ht, &conn->linkage, efx_tc_ct_ht_params); - if (old) { + if (IS_ERR(old)) { + rc = PTR_ERR(old); + goto release; + } else if (old) { netif_dbg(efx, drv, efx->net_dev, "Already offloaded conntrack (cookie %lx)\n", tc->cookie); rc = -EEXIST; @@ -482,6 +485,8 @@ struct efx_tc_ct_zone *efx_tc_ct_register_zone(struct efx_nic *efx, u16 zone, if (old) { /* don't need our new entry */ kfree(ct_zone); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return ERR_CAST(old); if (!refcount_inc_not_zero(&old->ref)) return ERR_PTR(-EAGAIN); /* existing entry found */ diff --git a/drivers/net/ethernet/sfc/tc_counters.c b/drivers/net/ethernet/sfc/tc_counters.c index 0fafb47ea082..c44088424323 100644 --- a/drivers/net/ethernet/sfc/tc_counters.c +++ b/drivers/net/ethernet/sfc/tc_counters.c @@ -236,6 +236,8 @@ struct efx_tc_counter_index *efx_tc_flower_get_counter_index( if (old) { /* don't need our new entry */ kfree(ctr); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return ERR_CAST(old); if (!refcount_inc_not_zero(&old->ref)) return ERR_PTR(-EAGAIN); /* existing entry found */ diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c index 7e8bcdb222ad..87443f9dfd22 100644 --- a/drivers/net/ethernet/sfc/tc_encap_actions.c +++ b/drivers/net/ethernet/sfc/tc_encap_actions.c @@ -132,6 +132,8 @@ static int efx_bind_neigh(struct efx_nic *efx, /* don't need our new entry */ put_net_track(neigh->net, &neigh->ns_tracker); kfree(neigh); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return PTR_ERR(old); if (!refcount_inc_not_zero(&old->ref)) return -EAGAIN; /* existing entry found, ref taken */ @@ -640,6 +642,8 @@ struct efx_tc_encap_action *efx_tc_flower_create_encap_md( if (old) { /* don't need our new entry */ kfree(encap); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return ERR_CAST(old); if (!refcount_inc_not_zero(&old->ref)) return ERR_PTR(-EAGAIN); /* existing entry found, ref taken */