From 879cf8006475642b747aaaa4d06f7044ab2de794 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 11 Jan 2022 10:26:58 +0300 Subject: [PATCH 001/367] regulator: max20086: fix error code in max20086_parse_regulators_dt() This code accidentally returns PTR_ERR(NULL) which is success. It should return a negative error code. Fixes: bfff546aae50 ("regulator: Add MAX20086-MAX20089 driver") Signed-off-by: Dan Carpenter Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20220111072657.GK11243@kili Signed-off-by: Mark Brown --- drivers/regulator/max20086-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c index fbc56b043071..63aa6ec3254a 100644 --- a/drivers/regulator/max20086-regulator.c +++ b/drivers/regulator/max20086-regulator.c @@ -140,7 +140,7 @@ static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) node = of_get_child_by_name(chip->dev->of_node, "regulators"); if (!node) { dev_err(chip->dev, "regulators node not found\n"); - return PTR_ERR(node); + return -ENODEV; } for (i = 0; i < chip->info->num_outputs; ++i) From d068eebbd4822b6c14a7ea375dfe53ca5c69c776 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Fri, 17 Dec 2021 16:48:54 +0100 Subject: [PATCH 002/367] cgroup/cpuset: Make child cpusets restrict parents on v1 hierarchy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 1f1562fcd04a ("cgroup/cpuset: Don't let child cpusets restrict parent in default hierarchy") inteded to relax the check only on the default hierarchy (or v2 mode) but it dropped the check in v1 too. This patch returns and separates the legacy-only validations so that they can be considered only in the v1 mode, which should enforce the old constraints for the sake of compatibility. Fixes: 1f1562fcd04a ("cgroup/cpuset: Don't let child cpusets restrict parent in default hierarchy") Suggested-by: Waiman Long Signed-off-by: Michal Koutný Reviewed-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 52 ++++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index dc653ab26e50..bb3531e7fda7 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -590,6 +590,35 @@ static inline void free_cpuset(struct cpuset *cs) kfree(cs); } +/* + * validate_change_legacy() - Validate conditions specific to legacy (v1) + * behavior. + */ +static int validate_change_legacy(struct cpuset *cur, struct cpuset *trial) +{ + struct cgroup_subsys_state *css; + struct cpuset *c, *par; + int ret; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + /* Each of our child cpusets must be a subset of us */ + ret = -EBUSY; + cpuset_for_each_child(c, css, cur) + if (!is_cpuset_subset(c, trial)) + goto out; + + /* On legacy hierarchy, we must be a subset of our parent cpuset. */ + ret = -EACCES; + par = parent_cs(cur); + if (par && !is_cpuset_subset(trial, par)) + goto out; + + ret = 0; +out: + return ret; +} + /* * validate_change() - Used to validate that any proposed cpuset change * follows the structural rules for cpusets. @@ -614,20 +643,21 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) { struct cgroup_subsys_state *css; struct cpuset *c, *par; - int ret; - - /* The checks don't apply to root cpuset */ - if (cur == &top_cpuset) - return 0; + int ret = 0; rcu_read_lock(); - par = parent_cs(cur); - /* On legacy hierarchy, we must be a subset of our parent cpuset. */ - ret = -EACCES; - if (!is_in_v2_mode() && !is_cpuset_subset(trial, par)) + if (!is_in_v2_mode()) + ret = validate_change_legacy(cur, trial); + if (ret) goto out; + /* Remaining checks don't apply to root cpuset */ + if (cur == &top_cpuset) + goto out; + + par = parent_cs(cur); + /* * If either I or some sibling (!= me) is exclusive, we can't * overlap @@ -1175,9 +1205,7 @@ enum subparts_cmd { * * Because of the implicit cpu exclusive nature of a partition root, * cpumask changes that violates the cpu exclusivity rule will not be - * permitted when checked by validate_change(). The validate_change() - * function will also prevent any changes to the cpu list if it is not - * a superset of children's cpu lists. + * permitted when checked by validate_change(). */ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, struct cpumask *newmask, From 094d00f8ca58c5d29b25e23b4daaed1ff1f13b41 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jan 2022 08:57:58 +0000 Subject: [PATCH 003/367] KVM: arm64: pkvm: Use the mm_ops indirection for cache maintenance CMOs issued from EL2 cannot directly use the kernel helpers, as EL2 doesn't have a mapping of the guest pages. Oops. Instead, use the mm_ops indirection to use helpers that will perform a mapping at EL2 and allow the CMO to be effective. Fixes: 25aa28691bb9 ("KVM: arm64: Move guest CMOs to the fault handlers") Reviewed-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220114125038.1336965-1-maz@kernel.org --- arch/arm64/kvm/hyp/pgtable.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 844a6f003fd5..2cb3867eb7c2 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -983,13 +983,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, */ stage2_put_pte(ptep, mmu, addr, level, mm_ops); - if (need_flush) { - kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops); - - dcache_clean_inval_poc((unsigned long)pte_follow, - (unsigned long)pte_follow + - kvm_granule_size(level)); - } + if (need_flush && mm_ops->dcache_clean_inval_poc) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); if (childp) mm_ops->put_page(childp); @@ -1151,15 +1147,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct kvm_pgtable *pgt = arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; kvm_pte_t pte = *ptep; - kvm_pte_t *pte_follow; if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) return 0; - pte_follow = kvm_pte_follow(pte, mm_ops); - dcache_clean_inval_poc((unsigned long)pte_follow, - (unsigned long)pte_follow + - kvm_granule_size(level)); + if (mm_ops->dcache_clean_inval_poc) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); return 0; } From 4ee7e4a6c9b298da44029ed9ec8ed23ae49cc209 Mon Sep 17 00:00:00 2001 From: Christoph Fritz Date: Wed, 12 Jan 2022 19:33:21 +0100 Subject: [PATCH 004/367] ovl: fix NULL pointer dereference in copy up warning This patch is fixing a NULL pointer dereference to get a recently introduced warning message working. Fixes: 5b0a414d06c3 ("ovl: fix filattr copy-up failure") Signed-off-by: Christoph Fritz Cc: # v5.15 Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index b193d08a3dc3..347b06479663 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -145,7 +145,7 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old, if (err == -ENOTTY || err == -EINVAL) return 0; pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n", - old, err); + old->dentry, err); return err; } @@ -168,7 +168,7 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old, err = ovl_real_fileattr_get(new, &newfa); if (err) { pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n", - new, err); + new->dentry, err); return err; } From 94fd19752b28aa66c98e7991734af91dfc529f8f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 14 Jan 2022 16:57:56 +0100 Subject: [PATCH 005/367] ovl: don't fail copy up if no fileattr support on upper Christoph Fritz is reporting that failure to copy up fileattr when upper doesn't support fileattr or xattr results in a regression. Return success in these failure cases; this reverts overlayfs to the old behavior. Add a pr_warn_once() in these cases to still let the user know about the copy up failures. Reported-by: Christoph Fritz Fixes: 72db82115d2b ("ovl: copy up sync/noatime fileattr flags") Cc: # v5.15 Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 347b06479663..e040970408d4 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -157,7 +157,9 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old, */ if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) { err = ovl_set_protattr(inode, new->dentry, &oldfa); - if (err) + if (err == -EPERM) + pr_warn_once("copying fileattr: no xattr on upper\n"); + else if (err) return err; } @@ -167,6 +169,14 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old, err = ovl_real_fileattr_get(new, &newfa); if (err) { + /* + * Returning an error if upper doesn't support fileattr will + * result in a regression, so revert to the old behavior. + */ + if (err == -ENOTTY || err == -EINVAL) { + pr_warn_once("copying fileattr: no support on upper\n"); + return 0; + } pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n", new->dentry, err); return err; From 45378cd33905966baf16d12ab0adbd56794ee075 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 8 Jan 2022 14:01:18 +0000 Subject: [PATCH 006/367] irqchip/apple-aic: Drop unused ipi_hwirq field This field was never used, remove it. Signed-off-by: Marc Zyngier Acked-by: Hector Martin Link: https://lore.kernel.org/r/20220108140118.3378937-1-maz@kernel.org --- drivers/irqchip/irq-apple-aic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 3759dc36cc8f..53c4783b5b9b 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -178,7 +178,6 @@ struct aic_irq_chip { struct irq_domain *hw_domain; struct irq_domain *ipi_domain; int nr_hw; - int ipi_hwirq; }; static DEFINE_PER_CPU(uint32_t, aic_fiq_unmasked); From 291e79c7e2eb6fdc016453597b78482e06199d0f Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Sun, 9 Jan 2022 15:54:32 +0100 Subject: [PATCH 007/367] irqchip/realtek-rtl: Map control data to virq The driver assigned the irqchip and irq handler to the hardware irq, instead of the virq. This is incorrect, and only worked because these irq numbers happened to be the same on the devices used for testing the original driver. Fixes: 9f3a0f34b84a ("irqchip: Add support for Realtek RTL838x/RTL839x interrupt controller") Signed-off-by: Sander Vanheule Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/4b4936606480265db47df152f00bc2ed46340599.1641739718.git.sander@svanheule.net --- drivers/irqchip/irq-realtek-rtl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-realtek-rtl.c b/drivers/irqchip/irq-realtek-rtl.c index fd9f275592d2..d6788dd93c7b 100644 --- a/drivers/irqchip/irq-realtek-rtl.c +++ b/drivers/irqchip/irq-realtek-rtl.c @@ -62,7 +62,7 @@ static struct irq_chip realtek_ictl_irq = { static int intc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { - irq_set_chip_and_handler(hw, &realtek_ictl_irq, handle_level_irq); + irq_set_chip_and_handler(irq, &realtek_ictl_irq, handle_level_irq); return 0; } From 91351b5dd0fd494eb2d85e1bb6aca77b067447e0 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Sun, 9 Jan 2022 15:54:33 +0100 Subject: [PATCH 008/367] irqchip/realtek-rtl: Fix off-by-one in routing There is an offset between routing values (1..6) and the connected MIPS CPU interrupts (2..7), but no distinction was made between these two values. This issue was previously hidden during testing, because an interrupt mapping was used where for each required interrupt another (unused) routing was configured, with an offset of +1. Offset the CPU IRQ numbers by -1 to retrieve the correct routing value. Fixes: 9f3a0f34b84a ("irqchip: Add support for Realtek RTL838x/RTL839x interrupt controller") Signed-off-by: Sander Vanheule Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/177b920aa8d8610615692d0e657e509f363c85ca.1641739718.git.sander@svanheule.net --- drivers/irqchip/irq-realtek-rtl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-realtek-rtl.c b/drivers/irqchip/irq-realtek-rtl.c index d6788dd93c7b..568614edd88f 100644 --- a/drivers/irqchip/irq-realtek-rtl.c +++ b/drivers/irqchip/irq-realtek-rtl.c @@ -95,7 +95,8 @@ out: * SoC interrupts are cascaded to MIPS CPU interrupts according to the * interrupt-map in the device tree. Each SoC interrupt gets 4 bits for * the CPU interrupt in an Interrupt Routing Register. Max 32 SoC interrupts - * thus go into 4 IRRs. + * thus go into 4 IRRs. A routing value of '0' means the interrupt is left + * disconnected. Routing values {1..15} connect to output lines {0..14}. */ static int __init map_interrupts(struct device_node *node, struct irq_domain *domain) { @@ -134,7 +135,7 @@ static int __init map_interrupts(struct device_node *node, struct irq_domain *do of_node_put(cpu_ictl); cpu_int = be32_to_cpup(imap + 2); - if (cpu_int > 7) + if (cpu_int > 7 || cpu_int < 2) return -EINVAL; if (!(mips_irqs_set & BIT(cpu_int))) { @@ -143,7 +144,8 @@ static int __init map_interrupts(struct device_node *node, struct irq_domain *do mips_irqs_set |= BIT(cpu_int); } - regs[(soc_int * 4) / 32] |= cpu_int << (soc_int * 4) % 32; + /* Use routing values (1..6) for CPU interrupts (2..7) */ + regs[(soc_int * 4) / 32] |= (cpu_int - 1) << (soc_int * 4) % 32; imap += 3; } From 960dd884ddf5621ae6284cd3a42724500a97ae4c Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Sun, 9 Jan 2022 15:54:34 +0100 Subject: [PATCH 009/367] irqchip/realtek-rtl: Service all pending interrupts Instead of only servicing the lowest pending interrupt line, make sure all pending SoC interrupts are serviced before exiting the chained handler. This adds a small overhead if only one interrupt is pending, but should prevent rapid re-triggering of the handler. Signed-off-by: Sander Vanheule Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/5082ad3cb8b4eedf55075561b93eff6570299fe1.1641739718.git.sander@svanheule.net --- drivers/irqchip/irq-realtek-rtl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-realtek-rtl.c b/drivers/irqchip/irq-realtek-rtl.c index 568614edd88f..50a56820c99b 100644 --- a/drivers/irqchip/irq-realtek-rtl.c +++ b/drivers/irqchip/irq-realtek-rtl.c @@ -76,16 +76,20 @@ static void realtek_irq_dispatch(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irq_domain *domain; - unsigned int pending; + unsigned long pending; + unsigned int soc_int; chained_irq_enter(chip, desc); pending = readl(REG(RTL_ICTL_GIMR)) & readl(REG(RTL_ICTL_GISR)); + if (unlikely(!pending)) { spurious_interrupt(); goto out; } + domain = irq_desc_get_handler_data(desc); - generic_handle_domain_irq(domain, __ffs(pending)); + for_each_set_bit(soc_int, &pending, 32) + generic_handle_domain_irq(domain, soc_int); out: chained_irq_exit(chip, desc); From c831d92890e037aafee662e66172d406804e4818 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Dec 2021 15:46:21 +0100 Subject: [PATCH 010/367] irqchip/loongson-pch-ms: Use bitmap_free() to free bitmap kfree() and bitmap_free() are the same. But using the latter is more consistent when freeing memory allocated with bitmap_zalloc(). Signed-off-by: Christophe JAILLET Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/0b982ab54844803049c217b2899baa59602faacd.1640529916.git.christophe.jaillet@wanadoo.fr --- drivers/irqchip/irq-loongson-pch-msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c index 32562b7e681b..e3801c4a77ed 100644 --- a/drivers/irqchip/irq-loongson-pch-msi.c +++ b/drivers/irqchip/irq-loongson-pch-msi.c @@ -241,7 +241,7 @@ static int pch_msi_init(struct device_node *node, return 0; err_map: - kfree(priv->msi_map); + bitmap_free(priv->msi_map); err_priv: kfree(priv); return ret; From 6e7f90d163afa8fc2efd6ae318e7c20156a5621f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 18 Jan 2022 17:00:16 -0500 Subject: [PATCH 011/367] lockd: fix server crash on reboot of client holding lock I thought I was iterating over the array when actually the iteration is over the values contained in the array? Ugh, keep it simple. Symptoms were a null deference in vfs_lock_file() when an NFSv3 client that previously held a lock came back up and sent a notify. Reported-by: Jonathan Woithe Fixes: 7f024fcd5c97 ("Keep read and write fds with each nlm_file") Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/lockd/svcsubs.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index cb3a7512c33e..54c2e42130ca 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -179,19 +179,20 @@ nlm_delete_file(struct nlm_file *file) static int nlm_unlock_files(struct nlm_file *file) { struct file_lock lock; - struct file *f; lock.fl_type = F_UNLCK; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; - for (f = file->f_file[0]; f <= file->f_file[1]; f++) { - if (f && vfs_lock_file(f, F_SETLK, &lock, NULL) < 0) { - pr_warn("lockd: unlock failure in %s:%d\n", - __FILE__, __LINE__); - return 1; - } - } + if (file->f_file[O_RDONLY] && + vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, &lock, NULL)) + goto out_err; + if (file->f_file[O_WRONLY] && + vfs_lock_file(file->f_file[O_WRONLY], F_SETLK, &lock, NULL)) + goto out_err; return 0; +out_err: + pr_warn("lockd: unlock failure in %s:%d\n", __FILE__, __LINE__); + return 1; } /* From 0a3d12ab5097b1d045e693412e6b366b7e82031b Mon Sep 17 00:00:00 2001 From: Padmanabha Srinivasaiah Date: Tue, 18 Jan 2022 01:51:26 +0100 Subject: [PATCH 012/367] drm/vc4: Fix deadlock on DSI device attach error DSI device attach to DSI host will be done with host device's lock held. Un-registering host in "device attach" error path (ex: probe retry) will result in deadlock with below call trace and non operational DSI display. Startup Call trace: [ 35.043036] rt_mutex_slowlock.constprop.21+0x184/0x1b8 [ 35.043048] mutex_lock_nested+0x7c/0xc8 [ 35.043060] device_del+0x4c/0x3e8 [ 35.043075] device_unregister+0x20/0x40 [ 35.043082] mipi_dsi_remove_device_fn+0x18/0x28 [ 35.043093] device_for_each_child+0x68/0xb0 [ 35.043105] mipi_dsi_host_unregister+0x40/0x90 [ 35.043115] vc4_dsi_host_attach+0xf0/0x120 [vc4] [ 35.043199] mipi_dsi_attach+0x30/0x48 [ 35.043209] tc358762_probe+0x128/0x164 [tc358762] [ 35.043225] mipi_dsi_drv_probe+0x28/0x38 [ 35.043234] really_probe+0xc0/0x318 [ 35.043244] __driver_probe_device+0x80/0xe8 [ 35.043254] driver_probe_device+0xb8/0x118 [ 35.043263] __device_attach_driver+0x98/0xe8 [ 35.043273] bus_for_each_drv+0x84/0xd8 [ 35.043281] __device_attach+0xf0/0x150 [ 35.043290] device_initial_probe+0x1c/0x28 [ 35.043300] bus_probe_device+0xa4/0xb0 [ 35.043308] deferred_probe_work_func+0xa0/0xe0 [ 35.043318] process_one_work+0x254/0x700 [ 35.043330] worker_thread+0x4c/0x448 [ 35.043339] kthread+0x19c/0x1a8 [ 35.043348] ret_from_fork+0x10/0x20 Shutdown Call trace: [ 365.565417] Call trace: [ 365.565423] __switch_to+0x148/0x200 [ 365.565452] __schedule+0x340/0x9c8 [ 365.565467] schedule+0x48/0x110 [ 365.565479] schedule_timeout+0x3b0/0x448 [ 365.565496] wait_for_completion+0xac/0x138 [ 365.565509] __flush_work+0x218/0x4e0 [ 365.565523] flush_work+0x1c/0x28 [ 365.565536] wait_for_device_probe+0x68/0x158 [ 365.565550] device_shutdown+0x24/0x348 [ 365.565561] kernel_restart_prepare+0x40/0x50 [ 365.565578] kernel_restart+0x20/0x70 [ 365.565591] __do_sys_reboot+0x10c/0x220 [ 365.565605] __arm64_sys_reboot+0x2c/0x38 [ 365.565619] invoke_syscall+0x4c/0x110 [ 365.565634] el0_svc_common.constprop.3+0xfc/0x120 [ 365.565648] do_el0_svc+0x2c/0x90 [ 365.565661] el0_svc+0x4c/0xf0 [ 365.565671] el0t_64_sync_handler+0x90/0xb8 [ 365.565682] el0t_64_sync+0x180/0x184 Signed-off-by: Padmanabha Srinivasaiah Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20220118005127.29015-1-treasure4paddy@gmail.com --- drivers/gpu/drm/vc4/vc4_dsi.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index a229da58962a..9300d3354c51 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -1262,7 +1262,6 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { struct vc4_dsi *dsi = host_to_dsi(host); - int ret; dsi->lanes = device->lanes; dsi->channel = device->channel; @@ -1297,18 +1296,15 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host, return 0; } - ret = component_add(&dsi->pdev->dev, &vc4_dsi_ops); - if (ret) { - mipi_dsi_host_unregister(&dsi->dsi_host); - return ret; - } - - return 0; + return component_add(&dsi->pdev->dev, &vc4_dsi_ops); } static int vc4_dsi_host_detach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { + struct vc4_dsi *dsi = host_to_dsi(host); + + component_del(&dsi->pdev->dev, &vc4_dsi_ops); return 0; } @@ -1686,9 +1682,7 @@ static int vc4_dsi_dev_remove(struct platform_device *pdev) struct device *dev = &pdev->dev; struct vc4_dsi *dsi = dev_get_drvdata(dev); - component_del(&pdev->dev, &vc4_dsi_ops); mipi_dsi_host_unregister(&dsi->dsi_host); - return 0; } From d3cbc6e323c9299d10c8d2e4127c77c7d05d07b1 Mon Sep 17 00:00:00 2001 From: Raymond Jay Golo Date: Thu, 13 Jan 2022 08:06:20 +0800 Subject: [PATCH 013/367] drm: panel-orientation-quirks: Add quirk for the 1Netbook OneXPlayer The 1Netbook OneXPlayer uses a panel which has been mounted 90 degrees rotated. Add a quirk for this. Signed-off-by: Raymond Jay Golo Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220113000619.90988-1-rjgolo@gmail.com --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 042bb80383c9..b910978d3e48 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -115,6 +115,12 @@ static const struct drm_dmi_panel_orientation_data lcd1280x1920_rightside_up = { .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +static const struct drm_dmi_panel_orientation_data lcd1600x2560_leftside_up = { + .width = 1600, + .height = 2560, + .orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP, +}; + static const struct dmi_system_id orientation_data[] = { { /* Acer One 10 (S1003) */ .matches = { @@ -275,6 +281,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Default string"), }, .driver_data = (void *)&onegx1_pro, + }, { /* OneXPlayer */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ONE-NETBOOK TECHNOLOGY CO., LTD."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"), + }, + .driver_data = (void *)&lcd1600x2560_leftside_up, }, { /* Samsung GalaxyBook 10.6 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), From b4c18c18ebf7cf1e602af88c12ef9cb0d6e5ce51 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 14 Jan 2022 19:36:03 -0800 Subject: [PATCH 014/367] regulator: MAX20086: add gpio/consumer.h max20086-regulator.c needs for an enum, some macros, and a function prototype. (seen on ARCH=m68k) Adding this header file fixes multiple build errors: ../drivers/regulator/max20086-regulator.c: In function 'max20086_i2c_probe': ../drivers/regulator/max20086-regulator.c:217:26: error: storage size of 'flags' isn't known 217 | enum gpiod_flags flags; ../drivers/regulator/max20086-regulator.c:261:27: error: 'GPIOD_OUT_HIGH' undeclared (first use in this function); did you mean 'GPIOF_INIT_HIGH'? 261 | flags = boot_on ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW; | ^~~~~~~~~~~~~~ ../drivers/regulator/max20086-regulator.c:261:44: error: 'GPIOD_OUT_LOW' undeclared (first use in this function); did you mean 'GPIOF_INIT_LOW'? 261 | flags = boot_on ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW; ../drivers/regulator/max20086-regulator.c:262:27: error: implicit declaration of function 'devm_gpiod_get'; did you mean 'devm_gpio_free'? [-Werror=implicit-function-declaration] 262 | chip->ena_gpiod = devm_gpiod_get(chip->dev, "enable", flags); ../drivers/regulator/max20086-regulator.c:217:26: warning: unused variable 'flags' [-Wunused-variable] 217 | enum gpiod_flags flags; Fixes: bfff546aae50 ("regulator: Add MAX20086-MAX20089 driver") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Watson Chow Cc: Mark Brown Cc: Laurent Pinchart Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20220115033603.24473-1-rdunlap@infradead.org Signed-off-by: Mark Brown --- drivers/regulator/max20086-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c index 63aa6ec3254a..b8bf76c170fe 100644 --- a/drivers/regulator/max20086-regulator.c +++ b/drivers/regulator/max20086-regulator.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include From e4d63473d3110afd170e6e0e48494d3789d26136 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Mon, 17 Jan 2022 13:17:44 +0100 Subject: [PATCH 015/367] spi: stm32-qspi: Update spi registering Some device driver need to communicate to qspi device during the remove process, qspi controller must be functional when spi_unregister_master() is called. To ensure this, replace devm_spi_register_master() by spi_register_master() and spi_unregister_master() is called directly in .remove callback before stopping the qspi controller. This issue was put in evidence using kernel v5.11 and later with a spi-nor which supports the software reset feature introduced by commit d73ee7534cc5 ("mtd: spi-nor: core: perform a Soft Reset on shutdown") Fixes: c530cd1d9d5e ("spi: spi-mem: add stm32 qspi controller") Signed-off-by: Patrice Chotard Cc: # 5.8.x Reviewed-by: Lukas Wunner Link: https://lore.kernel.org/r/20220117121744.29729-1-patrice.chotard@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32-qspi.c | 47 +++++++++++++----------------------- 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c index 514337c86d2c..ffdc55f87e82 100644 --- a/drivers/spi/spi-stm32-qspi.c +++ b/drivers/spi/spi-stm32-qspi.c @@ -688,7 +688,7 @@ static int stm32_qspi_probe(struct platform_device *pdev) struct resource *res; int ret, irq; - ctrl = spi_alloc_master(dev, sizeof(*qspi)); + ctrl = devm_spi_alloc_master(dev, sizeof(*qspi)); if (!ctrl) return -ENOMEM; @@ -697,58 +697,46 @@ static int stm32_qspi_probe(struct platform_device *pdev) res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi"); qspi->io_base = devm_ioremap_resource(dev, res); - if (IS_ERR(qspi->io_base)) { - ret = PTR_ERR(qspi->io_base); - goto err_master_put; - } + if (IS_ERR(qspi->io_base)) + return PTR_ERR(qspi->io_base); qspi->phys_base = res->start; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi_mm"); qspi->mm_base = devm_ioremap_resource(dev, res); - if (IS_ERR(qspi->mm_base)) { - ret = PTR_ERR(qspi->mm_base); - goto err_master_put; - } + if (IS_ERR(qspi->mm_base)) + return PTR_ERR(qspi->mm_base); qspi->mm_size = resource_size(res); - if (qspi->mm_size > STM32_QSPI_MAX_MMAP_SZ) { - ret = -EINVAL; - goto err_master_put; - } + if (qspi->mm_size > STM32_QSPI_MAX_MMAP_SZ) + return -EINVAL; irq = platform_get_irq(pdev, 0); - if (irq < 0) { - ret = irq; - goto err_master_put; - } + if (irq < 0) + return irq; ret = devm_request_irq(dev, irq, stm32_qspi_irq, 0, dev_name(dev), qspi); if (ret) { dev_err(dev, "failed to request irq\n"); - goto err_master_put; + return ret; } init_completion(&qspi->data_completion); init_completion(&qspi->match_completion); qspi->clk = devm_clk_get(dev, NULL); - if (IS_ERR(qspi->clk)) { - ret = PTR_ERR(qspi->clk); - goto err_master_put; - } + if (IS_ERR(qspi->clk)) + return PTR_ERR(qspi->clk); qspi->clk_rate = clk_get_rate(qspi->clk); - if (!qspi->clk_rate) { - ret = -EINVAL; - goto err_master_put; - } + if (!qspi->clk_rate) + return -EINVAL; ret = clk_prepare_enable(qspi->clk); if (ret) { dev_err(dev, "can not enable the clock\n"); - goto err_master_put; + return ret; } rstc = devm_reset_control_get_exclusive(dev, NULL); @@ -784,7 +772,7 @@ static int stm32_qspi_probe(struct platform_device *pdev) pm_runtime_enable(dev); pm_runtime_get_noresume(dev); - ret = devm_spi_register_master(dev, ctrl); + ret = spi_register_master(ctrl); if (ret) goto err_pm_runtime_free; @@ -806,8 +794,6 @@ err_dma_free: stm32_qspi_dma_free(qspi); err_clk_disable: clk_disable_unprepare(qspi->clk); -err_master_put: - spi_master_put(qspi->ctrl); return ret; } @@ -817,6 +803,7 @@ static int stm32_qspi_remove(struct platform_device *pdev) struct stm32_qspi *qspi = platform_get_drvdata(pdev); pm_runtime_get_sync(qspi->dev); + spi_unregister_master(qspi->ctrl); /* disable qspi */ writel_relaxed(0, qspi->io_base + QSPI_CR); stm32_qspi_dma_free(qspi); From 3cefddb72f80dc8d49ce605628ceb6525cfd64da Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Wed, 19 Jan 2022 10:32:44 +0100 Subject: [PATCH 016/367] spi: stm32: remove inexistant variables in struct stm32_spi_cfg comment Variables 'can_dma' and 'has_startbit' are described within the struct stm32_spi_cfg comment but have never existed in this structure so remove them. Signed-off-by: Alain Volmat Link: https://lore.kernel.org/r/20220119093245.624878-2-alain.volmat@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 9bd3fd1652f7..b5ef2470cefe 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -221,7 +221,6 @@ struct stm32_spi; * time between frames (if driver has this functionality) * @set_number_of_data: optional routine to configure registers to desired * number of data (if driver has this functionality) - * @can_dma: routine to determine if the transfer is eligible for DMA use * @transfer_one_dma_start: routine to start transfer a single spi_transfer * using DMA * @dma_rx_cb: routine to call after DMA RX channel operation is complete @@ -232,7 +231,6 @@ struct stm32_spi; * @baud_rate_div_min: minimum baud rate divisor * @baud_rate_div_max: maximum baud rate divisor * @has_fifo: boolean to know if fifo is used for driver - * @has_startbit: boolean to know if start bit is used to start transfer */ struct stm32_spi_cfg { const struct stm32_spi_regspec *regs; From 9df15d842a0f77f2b8ee29386f6d714e4220df57 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Wed, 19 Jan 2022 10:32:45 +0100 Subject: [PATCH 017/367] spi: stm32: make SPI_MASTER_MUST_TX flags only specific to STM32F4 Commit 61367d0b8f5e ("spi: stm32: Add 'SPI_SIMPLEX_RX', 'SPI_3WIRE_RX' support for stm32f4") allowed to properly communicate with the st-gyro-spi even when there is no tx_buf provided by setting the flag SPI_MASTER_MUST_TX and thus forcing a dummy TX buffer to work in Full Duplex. This behavior should kept only for the STM32F4 and not for other compatible since the STM32H7 do support SIMPLEX_RX and SIMPLEX_TX. Add the flags variable within the struct stm32_spi_cfg so that flags used at master registration time are compatible specific. Fixes: 61367d0b8f5e ("spi: stm32: Add 'SPI_SIMPLEX_RX', 'SPI_3WIRE_RX' support for stm32f4") Signed-off-by: Alain Volmat Link: https://lore.kernel.org/r/20220119093245.624878-3-alain.volmat@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index b5ef2470cefe..7fc24505a72c 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -231,6 +231,7 @@ struct stm32_spi; * @baud_rate_div_min: minimum baud rate divisor * @baud_rate_div_max: maximum baud rate divisor * @has_fifo: boolean to know if fifo is used for driver + * @flags: compatible specific SPI controller flags used at registration time */ struct stm32_spi_cfg { const struct stm32_spi_regspec *regs; @@ -251,6 +252,7 @@ struct stm32_spi_cfg { unsigned int baud_rate_div_min; unsigned int baud_rate_div_max; bool has_fifo; + u16 flags; }; /** @@ -1720,6 +1722,7 @@ static const struct stm32_spi_cfg stm32f4_spi_cfg = { .baud_rate_div_min = STM32F4_SPI_BR_DIV_MIN, .baud_rate_div_max = STM32F4_SPI_BR_DIV_MAX, .has_fifo = false, + .flags = SPI_MASTER_MUST_TX, }; static const struct stm32_spi_cfg stm32h7_spi_cfg = { @@ -1852,7 +1855,7 @@ static int stm32_spi_probe(struct platform_device *pdev) master->prepare_message = stm32_spi_prepare_msg; master->transfer_one = stm32_spi_transfer_one; master->unprepare_message = stm32_spi_unprepare_msg; - master->flags = SPI_MASTER_MUST_TX; + master->flags = spi->cfg->flags; spi->dma_tx = dma_request_chan(spi->dev, "tx"); if (IS_ERR(spi->dma_tx)) { From 217663f101a56ef77f82273818253fff082bf503 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 20 Jan 2022 13:57:22 +0100 Subject: [PATCH 018/367] fanotify: remove variable set but not used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code that uses the pointer info has been removed in 7326e382c21e ("fanotify: report old and/or new parent+name in FAN_RENAME event"). and fanotify_event_info() doesn't change 'event', so the declaration and assignment of info can be removed. Eliminate the following clang warning: fs/notify/fanotify/fanotify_user.c:161:24: warning: variable ‘info’ set but not used Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 73a3e939c921..2641b0802479 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -151,7 +151,6 @@ static size_t fanotify_event_len(unsigned int info_mode, struct fanotify_event *event) { size_t event_len = FAN_EVENT_METADATA_LEN; - struct fanotify_info *info; int fh_len; int dot_len = 0; @@ -161,8 +160,6 @@ static size_t fanotify_event_len(unsigned int info_mode, if (fanotify_is_error_event(event->mask)) event_len += FANOTIFY_ERROR_INFO_LEN; - info = fanotify_event_info(event); - if (fanotify_event_has_any_dir_fh(event)) { event_len += fanotify_dir_name_info_len(event); } else if ((info_mode & FAN_REPORT_NAME) && From 5298d4bfe80f6ae6ae2777bcd1357b0022d98573 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Jan 2022 07:56:14 +0100 Subject: [PATCH 019/367] unicode: clean up the Kconfig symbol confusion Turn the CONFIG_UNICODE symbol into a tristate that generates some always built in code and remove the confusing CONFIG_UNICODE_UTF8_DATA symbol. Note that a lot of the IS_ENABLED() checks could be turned from cpp statements into normal ifs, but this change is intended to be fairly mechanic, so that should be cleaned up later. Fixes: 2b3d04787012 ("unicode: Add utf8-data module") Reported-by: Linus Torvalds Reviewed-by: Eric Biggers Signed-off-by: Christoph Hellwig Signed-off-by: Gabriel Krisman Bertazi --- fs/Makefile | 2 +- fs/ext4/ext4.h | 14 +++++++------- fs/ext4/hash.c | 2 +- fs/ext4/namei.c | 12 ++++++------ fs/ext4/super.c | 10 +++++----- fs/ext4/sysfs.c | 8 ++++---- fs/f2fs/dir.c | 10 +++++----- fs/f2fs/f2fs.h | 2 +- fs/f2fs/hash.c | 2 +- fs/f2fs/namei.c | 4 ++-- fs/f2fs/recovery.c | 4 ++-- fs/f2fs/super.c | 10 +++++----- fs/f2fs/sysfs.c | 10 +++++----- fs/libfs.c | 10 +++++----- fs/unicode/Kconfig | 18 +++++------------- fs/unicode/Makefile | 6 ++++-- include/linux/fs.h | 2 +- 17 files changed, 60 insertions(+), 66 deletions(-) diff --git a/fs/Makefile b/fs/Makefile index 84c5e4cdfee5..c71ee0127866 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -94,7 +94,7 @@ obj-$(CONFIG_EXPORTFS) += exportfs/ obj-$(CONFIG_NFSD) += nfsd/ obj-$(CONFIG_LOCKD) += lockd/ obj-$(CONFIG_NLS) += nls/ -obj-$(CONFIG_UNICODE) += unicode/ +obj-y += unicode/ obj-$(CONFIG_SYSV_FS) += sysv/ obj-$(CONFIG_SMBFS_COMMON) += smbfs_common/ obj-$(CONFIG_CIFS) += cifs/ diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 71a3cdceaa03..242e74cfb060 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2485,7 +2485,7 @@ struct ext4_filename { #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_str crypto_buf; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) struct fscrypt_str cf_name; #endif }; @@ -2721,7 +2721,7 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) extern int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, struct ext4_filename *fname); @@ -2754,7 +2754,7 @@ static inline int ext4_fname_setup_filename(struct inode *dir, ext4_fname_from_fscrypt_name(fname, &name); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, iname, fname); #endif return err; @@ -2773,7 +2773,7 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, ext4_fname_from_fscrypt_name(fname, &name); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname); #endif return err; @@ -2790,7 +2790,7 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) fname->usr_fname = NULL; fname->disk_name.name = NULL; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) kfree(fname->cf_name.name); fname->cf_name.name = NULL; #endif @@ -2806,7 +2806,7 @@ static inline int ext4_fname_setup_filename(struct inode *dir, fname->disk_name.name = (unsigned char *) iname->name; fname->disk_name.len = iname->len; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, iname, fname); #endif @@ -2822,7 +2822,7 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, static inline void ext4_fname_free_filename(struct ext4_filename *fname) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) kfree(fname->cf_name.name); fname->cf_name.name = NULL; #endif diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index f34f4176c1e7..147b5241dd94 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -290,7 +290,7 @@ static int __ext4fs_dirhash(const struct inode *dir, const char *name, int len, int ext4fs_dirhash(const struct inode *dir, const char *name, int len, struct dx_hash_info *hinfo) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) const struct unicode_map *um = dir->i_sb->s_encoding; int r, dlen; unsigned char *buff; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 52c9bd154122..269d2d051ede 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1317,7 +1317,7 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) dx_set_count(entries, count + 1); } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* * Test whether a case-insensitive directory entry matches the filename * being searched for. If quick is set, assume the name being looked up @@ -1428,7 +1428,7 @@ static bool ext4_match(struct inode *parent, f.crypto_buf = fname->crypto_buf; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) && (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) { if (fname->cf_name.name) { @@ -1800,7 +1800,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi } } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (!inode && IS_CASEFOLDED(dir)) { /* Eventually we want to call d_add_ci(dentry, NULL) * for negative dentries in the encoding case as @@ -2308,7 +2308,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, if (fscrypt_is_nokey_name(dentry)) return -ENOKEY; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) && sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name)) return -EINVAL; @@ -3126,7 +3126,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) ext4_fc_track_unlink(handle, dentry); retval = ext4_mark_inode_dirty(handle, dir); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the @@ -3231,7 +3231,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry)); if (!retval) ext4_fc_track_unlink(handle, dentry); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the diff --git a/fs/ext4/super.c b/fs/ext4/super.c index db9fe4843529..52be1ca38eef 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1302,7 +1302,7 @@ static void ext4_put_super(struct super_block *sb) kfree(sbi->s_blockgroup_lock); fs_put_dax(sbi->s_daxdev); fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif kfree(sbi); @@ -1962,7 +1962,7 @@ static const struct mount_opts { {Opt_err, 0, 0} }; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) static const struct ext4_sb_encodings { __u16 magic; char *name; @@ -3609,7 +3609,7 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#ifndef CONFIG_UNICODE +#if !IS_ENABLED(CONFIG_UNICODE) if (ext4_has_feature_casefold(sb)) { ext4_msg(sb, KERN_ERR, "Filesystem with casefold feature cannot be " @@ -4613,7 +4613,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (err < 0) goto failed_mount; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (ext4_has_feature_casefold(sb) && !sb->s_encoding) { const struct ext4_sb_encodings *encoding_info; struct unicode_map *encoding; @@ -5517,7 +5517,7 @@ failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index f61e65ae27d8..d233c24ea342 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -309,7 +309,7 @@ EXT4_ATTR_FEATURE(meta_bg_resize); EXT4_ATTR_FEATURE(encryption); EXT4_ATTR_FEATURE(test_dummy_encryption_v2); #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) EXT4_ATTR_FEATURE(casefold); #endif #ifdef CONFIG_FS_VERITY @@ -317,7 +317,7 @@ EXT4_ATTR_FEATURE(verity); #endif EXT4_ATTR_FEATURE(metadata_csum_seed); EXT4_ATTR_FEATURE(fast_commit); -#if defined(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) +#if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) EXT4_ATTR_FEATURE(encrypted_casefold); #endif @@ -329,7 +329,7 @@ static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(encryption), ATTR_LIST(test_dummy_encryption_v2), #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) ATTR_LIST(casefold), #endif #ifdef CONFIG_FS_VERITY @@ -337,7 +337,7 @@ static struct attribute *ext4_feat_attrs[] = { #endif ATTR_LIST(metadata_csum_seed), ATTR_LIST(fast_commit), -#if defined(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) +#if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) ATTR_LIST(encrypted_casefold), #endif NULL, diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1820e9c106f7..166f08623362 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -16,7 +16,7 @@ #include "xattr.h" #include -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) extern struct kmem_cache *f2fs_cf_name_slab; #endif @@ -79,7 +79,7 @@ unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de) int f2fs_init_casefolded_name(const struct inode *dir, struct f2fs_filename *fname) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) struct super_block *sb = dir->i_sb; if (IS_CASEFOLDED(dir)) { @@ -174,7 +174,7 @@ void f2fs_free_filename(struct f2fs_filename *fname) kfree(fname->crypto_buf.name); fname->crypto_buf.name = NULL; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (fname->cf_name.name) { kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); fname->cf_name.name = NULL; @@ -208,7 +208,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, return f2fs_find_target_dentry(&d, fname, max_slots); } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* * Test whether a case-insensitive directory entry matches the filename * being searched for. @@ -266,7 +266,7 @@ static inline int f2fs_match_name(const struct inode *dir, { struct fscrypt_name f; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (fname->cf_name.name) { struct qstr cf = FSTR_TO_QSTR(&fname->cf_name); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d0d603187171..4da88928ffb5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -487,7 +487,7 @@ struct f2fs_filename { */ struct fscrypt_str crypto_buf; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* * For casefolded directories: the casefolded name, but it's left NULL * if the original name is not valid Unicode, if the directory is both diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index e3beac546c63..3cb1e7a24740 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -105,7 +105,7 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname) return; } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (IS_CASEFOLDED(dir)) { /* * If the casefolded name is provided, hash it instead of the diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a728a0af9ce0..5f213f05556d 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -561,7 +561,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out_iput; } out_splice: -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (!inode && IS_CASEFOLDED(dir)) { /* Eventually we want to call d_add_ci(dentry, NULL) * for negative dentries in the encoding case as @@ -622,7 +622,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) goto fail; } f2fs_delete_entry(de, page, dir, inode); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index d1664a0567ef..2fbbc820c00a 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -46,7 +46,7 @@ static struct kmem_cache *fsync_entry_slab; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) extern struct kmem_cache *f2fs_cf_name_slab; #endif @@ -149,7 +149,7 @@ static int init_recovered_filename(const struct inode *dir, if (err) return err; f2fs_hash_filename(dir, fname); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* Case-sensitive match is fine for recovery */ kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); fname->cf_name.name = NULL; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 15f12ece0ac6..b870c6459fa1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -256,7 +256,7 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...) va_end(args); } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) static const struct f2fs_sb_encodings { __u16 magic; char *name; @@ -1218,7 +1218,7 @@ default_check: return -EINVAL; } #endif -#ifndef CONFIG_UNICODE +#if !IS_ENABLED(CONFIG_UNICODE) if (f2fs_sb_has_casefold(sbi)) { f2fs_err(sbi, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE"); @@ -1578,7 +1578,7 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_iostat(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) kvfree(sbi->write_io[i]); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif kfree(sbi); @@ -3861,7 +3861,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (f2fs_sb_has_casefold(sbi) && !sbi->sb->s_encoding) { const struct f2fs_sb_encodings *encoding_info; struct unicode_map *encoding; @@ -4412,7 +4412,7 @@ free_bio_info: for (i = 0; i < NR_PAGE_TYPE; i++) kvfree(sbi->write_io[i]); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); sb->s_encoding = NULL; #endif diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 8408f77764e8..fa3d9cb2d69b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -192,7 +192,7 @@ static ssize_t unusable_show(struct f2fs_attr *a, static ssize_t encoding_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) struct super_block *sb = sbi->sb; if (f2fs_sb_has_casefold(sbi)) @@ -756,7 +756,7 @@ F2FS_GENERAL_RO_ATTR(avg_vblocks); #ifdef CONFIG_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption); F2FS_FEATURE_RO_ATTR(test_dummy_encryption_v2); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) F2FS_FEATURE_RO_ATTR(encrypted_casefold); #endif #endif /* CONFIG_FS_ENCRYPTION */ @@ -775,7 +775,7 @@ F2FS_FEATURE_RO_ATTR(lost_found); F2FS_FEATURE_RO_ATTR(verity); #endif F2FS_FEATURE_RO_ATTR(sb_checksum); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) F2FS_FEATURE_RO_ATTR(casefold); #endif F2FS_FEATURE_RO_ATTR(readonly); @@ -886,7 +886,7 @@ static struct attribute *f2fs_feat_attrs[] = { #ifdef CONFIG_FS_ENCRYPTION ATTR_LIST(encryption), ATTR_LIST(test_dummy_encryption_v2), -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) ATTR_LIST(encrypted_casefold), #endif #endif /* CONFIG_FS_ENCRYPTION */ @@ -905,7 +905,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(verity), #endif ATTR_LIST(sb_checksum), -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) ATTR_LIST(casefold), #endif ATTR_LIST(readonly), diff --git a/fs/libfs.c b/fs/libfs.c index ba7438ab9371..974125270a42 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1379,7 +1379,7 @@ bool is_empty_dir_inode(struct inode *inode) (inode->i_op == &empty_dir_inode_operations); } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* * Determine if the name of a dentry should be casefolded. * @@ -1473,7 +1473,7 @@ static const struct dentry_operations generic_encrypted_dentry_ops = { }; #endif -#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE) +#if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE) static const struct dentry_operations generic_encrypted_ci_dentry_ops = { .d_hash = generic_ci_d_hash, .d_compare = generic_ci_d_compare, @@ -1508,10 +1508,10 @@ void generic_set_encrypted_ci_d_ops(struct dentry *dentry) #ifdef CONFIG_FS_ENCRYPTION bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) bool needs_ci_ops = dentry->d_sb->s_encoding; #endif -#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE) +#if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE) if (needs_encrypt_ops && needs_ci_ops) { d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops); return; @@ -1523,7 +1523,7 @@ void generic_set_encrypted_ci_d_ops(struct dentry *dentry) return; } #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (needs_ci_ops) { d_set_d_op(dentry, &generic_ci_dentry_ops); return; diff --git a/fs/unicode/Kconfig b/fs/unicode/Kconfig index 610d7bc05d6e..da786a687fdc 100644 --- a/fs/unicode/Kconfig +++ b/fs/unicode/Kconfig @@ -3,21 +3,13 @@ # UTF-8 normalization # config UNICODE - bool "UTF-8 normalization and casefolding support" + tristate "UTF-8 normalization and casefolding support" help Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding - support. - -config UNICODE_UTF8_DATA - tristate "UTF-8 normalization and casefolding tables" - depends on UNICODE - default UNICODE - help - This contains a large table of case foldings, which can be loaded as - a separate module if you say M here. To be on the safe side stick - to the default of Y. Saying N here makes no sense, if you do not want - utf8 casefolding support, disable CONFIG_UNICODE instead. + support. If you say M here the large table of case foldings will + be a separate loadable module that gets requested only when a file + system actually use it. config UNICODE_NORMALIZATION_SELFTEST tristate "Test UTF-8 normalization support" - depends on UNICODE_UTF8_DATA + depends on UNICODE diff --git a/fs/unicode/Makefile b/fs/unicode/Makefile index 2f9d9188852b..0cc87423de82 100644 --- a/fs/unicode/Makefile +++ b/fs/unicode/Makefile @@ -1,8 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_UNICODE) += unicode.o +ifneq ($(CONFIG_UNICODE),) +obj-y += unicode.o +endif +obj-$(CONFIG_UNICODE) += utf8data.o obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o -obj-$(CONFIG_UNICODE_UTF8_DATA) += utf8data.o unicode-y := utf8-norm.o utf8-core.o diff --git a/include/linux/fs.h b/include/linux/fs.h index c8510da6cc6d..fdac22d16c2b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1490,7 +1490,7 @@ struct super_block { #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) struct unicode_map *s_encoding; __u16 s_encoding_flags; #endif From d11a327ed95dbec756b99cbfef2a7fd85c9eeb09 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jan 2022 21:07:47 +0000 Subject: [PATCH 020/367] KVM: arm64: vgic-v3: Restrict SEIS workaround to known broken systems Contrary to what df652bcf1136 ("KVM: arm64: vgic-v3: Work around GICv3 locally generated SErrors") was asserting, there is at least one other system out there (Cavium ThunderX2) implementing SEIS, and not in an obviously broken way. So instead of imposing the M1 workaround on an innocent bystander, let's limit it to the two known broken Apple implementations. Fixes: df652bcf1136 ("KVM: arm64: vgic-v3: Work around GICv3 locally generated SErrors") Reported-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220122103912.795026-1-maz@kernel.org --- arch/arm64/kvm/hyp/vgic-v3-sr.c | 3 +++ arch/arm64/kvm/vgic/vgic-v3.c | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 20db2f281cf2..4fb419f7b8b6 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -983,6 +983,9 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT; /* IDbits */ val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; + /* SEIS */ + if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) + val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT); /* A3V */ val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; /* EOImode */ diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 78cf674c1230..221489c29354 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -609,6 +609,18 @@ static int __init early_gicv4_enable(char *buf) } early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); +static const struct midr_range broken_seis[] = { + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM), + {}, +}; + +static bool vgic_v3_broken_seis(void) +{ + return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) && + is_midr_in_range_list(read_cpuid_id(), broken_seis)); +} + /** * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller * @info: pointer to the GIC description @@ -676,9 +688,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info) group1_trap = true; } - if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) { - kvm_info("GICv3 with locally generated SEI\n"); + if (vgic_v3_broken_seis()) { + kvm_info("GICv3 with broken locally generated SEI\n"); + kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK; group0_trap = true; group1_trap = true; if (ich_vtr_el2 & ICH_VTR_TDS_MASK) From a6501e4b380faee6bbf41bb2f833977c7ac6491a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Jan 2022 10:20:03 -0800 Subject: [PATCH 021/367] eeprom: at25: Restore missing allocation The at25 driver regressed in v5.17-rc1 due to a broken conflict resolution: the allocation of the object was accidentally removed. Restore it. This was found when building under CONFIG_FORTIFY_SOURCE=y and -Warray-bounds, which complained about strncpy() being used against an empty object: In function 'strncpy', inlined from 'at25_fw_to_chip.constprop' at drivers/misc/eeprom/at25.c:312:2: ./include/linux/fortify-string.h:48:33: warning: '__builtin_strncpy' offset [0, 9] is out of the bounds [0, 0] [-Warray-bounds] 48 | #define __underlying_strncpy __builtin_strncpy | ^ ./include/linux/fortify-string.h:59:16: note: in expansion of macro '__underlying_strncpy' 59 | return __underlying_strncpy(p, q, size); | ^~~~~~~~~~~~~~~~~~~~ In function 'strncpy', inlined from 'at25_fram_to_chip' at drivers/misc/eeprom/at25.c:373:2, inlined from 'at25_probe' at drivers/misc/eeprom/at25.c:453:10: ./include/linux/fortify-string.h:48:33: warning: '__builtin_strncpy' offset [0, 9] is out of the bounds [0, 0] [-Warray-bounds] 48 | #define __underlying_strncpy __builtin_strncpy | ^ ./include/linux/fortify-string.h:59:16: note: in expansion of macro '__underlying_strncpy' 59 | return __underlying_strncpy(p, q, size); | ^~~~~~~~~~~~~~~~~~~~ Link: https://lore.kernel.org/lkml/CAHp75VdqK7h63fz-cPaQ2MGaVdaR2f1Fb5kKCZidUG3RwLsAVA@mail.gmail.com/ Fixes: af40d16042d6 ("Merge v5.15-rc5 into char-misc-next") Cc: Arnd Bergmann Cc: Jiri Prchal Reviewed-by: Andy Shevchenko Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220118182003.3385019-1-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/at25.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c index c3305bdda69c..bee727ed98db 100644 --- a/drivers/misc/eeprom/at25.c +++ b/drivers/misc/eeprom/at25.c @@ -440,6 +440,10 @@ static int at25_probe(struct spi_device *spi) return -ENXIO; } + at25 = devm_kzalloc(&spi->dev, sizeof(*at25), GFP_KERNEL); + if (!at25) + return -ENOMEM; + mutex_init(&at25->lock); at25->spi = spi; spi_set_drvdata(spi, at25); From 16436f70abeebb29cd99444e27b310755806c1fa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 22 Jan 2022 16:16:14 +0100 Subject: [PATCH 022/367] irqchip/gic-v3-its: Fix build for !SMP Commit 835f442fdbce ("irqchip/gic-v3-its: Limit memreserve cpuhp state lifetime") added a reference to cpus_booted_once_mask, which does not exist on !SMP builds, breaking the build for such configurations. Given the intent of the check, short circuit it to always pass. Cc: Valentin Schneider Fixes: 835f442fdbce ("irqchip/gic-v3-its: Limit memreserve cpuhp state lifetime") Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220122151614.133766-1-ardb@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index ee83eb377d7e..7b8f1ec0ff78 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -5241,7 +5241,8 @@ static int its_cpu_memreserve_lpi(unsigned int cpu) out: /* Last CPU being brought up gets to issue the cleanup */ - if (cpumask_equal(&cpus_booted_once_mask, cpu_possible_mask)) + if (!IS_ENABLED(CONFIG_SMP) || + cpumask_equal(&cpus_booted_once_mask, cpu_possible_mask)) schedule_work(&rdist_memreserve_cpuhp_cleanup_work); gic_data_rdist()->flags |= RD_LOCAL_MEMRESERVE_DONE; From 1ea1d6a847d2b1d17fefd9196664b95f052a0775 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 13 Jan 2022 11:44:19 +0100 Subject: [PATCH 023/367] s390/nmi: handle guarded storage validity failures for KVM guests machine check validity bits reflect the state of the machine check. If a guest does not make use of guarded storage, the validity bit might be off. We can not use the host CR bit to decide if the validity bit must be on. So ignore "invalid" guarded storage controls for KVM guests in the host and rely on the machine check being forwarded to the guest. If no other errors happen from a host perspective everything is fine and no process must be killed and the host can continue to run. Cc: stable@vger.kernel.org Fixes: c929500d7a5a ("s390/nmi: s390: New low level handling for machine check happening in guest") Reported-by: Carsten Otte Signed-off-by: Christian Borntraeger Tested-by: Carsten Otte Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/nmi.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 0c9e894913dc..147c0d5fd9b4 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -307,11 +307,21 @@ static int notrace s390_validate_registers(union mci mci, int umode) if (cr2.gse) { if (!mci.gs) { /* - * Guarded storage register can't be restored and - * the current processes uses guarded storage. - * It has to be terminated. + * 2 cases: + * - machine check in kernel or userspace + * - machine check while running SIE (KVM guest) + * For kernel or userspace the userspace values of + * guarded storage control can not be recreated, the + * process must be terminated. + * For SIE the guest values of guarded storage can not + * be recreated. This is either due to a bug or due to + * GS being disabled in the guest. The guest will be + * notified by KVM code and the guests machine check + * handling must take care of this. The host values + * are saved by KVM and are not affected. */ - kill_task = 1; + if (!test_cpu_flag(CIF_MCCK_GUEST)) + kill_task = 1; } else { load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area); } From f094a39c6ba168f2df1edfd1731cca377af5f442 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 17 Jan 2022 18:40:32 +0100 Subject: [PATCH 024/367] s390/nmi: handle vector validity failures for KVM guests The machine check validity bit tells about the context. If a KVM guest was running the bit tells about the guest validity and the host state is not affected. As a guest can disable the guest validity this might result in unwanted host errors on machine checks. Cc: stable@vger.kernel.org Fixes: c929500d7a5a ("s390/nmi: s390: New low level handling for machine check happening in guest") Signed-off-by: Christian Borntraeger Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/nmi.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 147c0d5fd9b4..651a51914e34 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -264,7 +264,14 @@ static int notrace s390_validate_registers(union mci mci, int umode) /* Validate vector registers */ union ctlreg0 cr0; - if (!mci.vr) { + /* + * The vector validity must only be checked if not running a + * KVM guest. For KVM guests the machine check is forwarded by + * KVM and it is the responsibility of the guest to take + * appropriate actions. The host vector or FPU values have been + * saved by KVM and will be restored by KVM. + */ + if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) { /* * Vector registers can't be restored. If the kernel * currently uses vector registers the system is From 3d787b392d169d4a2e3aee6ac6dfd6ec39722cf2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 22 Jan 2022 10:24:31 +0100 Subject: [PATCH 025/367] s390/uaccess: fix compile error Compiling with e.g MARCH=z900 results in compile errors: arch/s390/lib/uaccess.c: In function 'copy_from_user_mvcos': >> arch/s390/lib/uaccess.c:65:15: error: variable 'spec' has initializer but incomplete type 65 | union oac spec = { Therefore make definition of union oac visible for all MARCHs. Reported-by: kernel test robot Cc: Nico Boehr Cc: Janis Schoetterl-Glausch Fixes: 012a224e1fa3 ("s390/uaccess: introduce bit field for OAC specifier") Signed-off-by: Heiko Carstens --- arch/s390/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 147cb3534ce4..d74e26b48604 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -47,8 +47,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); int __put_user_bad(void) __attribute__((noreturn)); int __get_user_bad(void) __attribute__((noreturn)); -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - union oac { unsigned int val; struct { @@ -71,6 +69,8 @@ union oac { }; }; +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + #define __put_get_user_asm(to, from, size, oac_spec) \ ({ \ int __rc; \ From e9b7c3a4263bdcfd31bc3d03d48ce0ded7a94635 Mon Sep 17 00:00:00 2001 From: Mihai Carabas Date: Wed, 19 Jan 2022 18:14:27 +0200 Subject: [PATCH 026/367] efi/libstub: arm64: Fix image check alignment at entry The kernel is aligned at SEGMENT_SIZE and this is the size populated in the PE headers: arch/arm64/kernel/efi-header.S: .long SEGMENT_ALIGN // SectionAlignment EFI_KIMG_ALIGN is defined as: (SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN : THREAD_ALIGN) So it depends on THREAD_ALIGN. On newer builds this message started to appear even though the loader is taking into account the PE header (which is stating SEGMENT_ALIGN). Fixes: c32ac11da3f8 ("efi/libstub: arm64: Double check image alignment at entry") Signed-off-by: Mihai Carabas Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/arm64-stub.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 2363fee9211c..9cc556013d08 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -119,9 +119,9 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, if (image->image_base != _text) efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n"); - if (!IS_ALIGNED((u64)_text, EFI_KIMG_ALIGN)) - efi_err("FIRMWARE BUG: kernel image not aligned on %ldk boundary\n", - EFI_KIMG_ALIGN >> 10); + if (!IS_ALIGNED((u64)_text, SEGMENT_ALIGN)) + efi_err("FIRMWARE BUG: kernel image not aligned on %dk boundary\n", + SEGMENT_ALIGN >> 10); kernel_size = _edata - _text; kernel_memsize = kernel_size + (_end - _edata); From f5390cd0b43c2e54c7cf5506c7da4a37c5cef746 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 12 Jan 2022 11:14:13 +0100 Subject: [PATCH 027/367] efi: runtime: avoid EFIv2 runtime services on Apple x86 machines Aditya reports [0] that his recent MacbookPro crashes in the firmware when using the variable services at runtime. The culprit appears to be a call to QueryVariableInfo(), which we did not use to call on Apple x86 machines in the past as they only upgraded from EFI v1.10 to EFI v2.40 firmware fairly recently, and QueryVariableInfo() (along with UpdateCapsule() et al) was added in EFI v2.00. The only runtime service introduced in EFI v2.00 that we actually use in Linux is QueryVariableInfo(), as the capsule based ones are optional, generally not used at runtime (all the LVFS/fwupd firmware update infrastructure uses helper EFI programs that invoke capsule update at boot time, not runtime), and not implemented by Apple machines in the first place. QueryVariableInfo() is used to 'safely' set variables, i.e., only when there is enough space. This prevents machines with buggy firmwares from corrupting their NVRAMs when they run out of space. Given that Apple machines have been using EFI v1.10 services only for the longest time (the EFI v2.0 spec was released in 2006, and Linux support for the newly introduced runtime services was added in 2011, but the MacbookPro12,1 released in 2015 still claims to be EFI v1.10 only), let's avoid the EFI v2.0 ones on all Apple x86 machines. [0] https://lore.kernel.org/all/6D757C75-65B1-468B-842D-10410081A8E4@live.com/ Cc: Cc: Jeremy Kerr Cc: Matthew Garrett Reported-by: Aditya Garg Tested-by: Orlando Chamberlain Signed-off-by: Ard Biesheuvel Tested-by: Aditya Garg Link: https://bugzilla.kernel.org/show_bug.cgi?id=215277 --- drivers/firmware/efi/efi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index ae79c3300129..7de3f5b6e8d0 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -722,6 +722,13 @@ void __init efi_systab_report_header(const efi_table_hdr_t *systab_hdr, systab_hdr->revision >> 16, systab_hdr->revision & 0xffff, vendor); + + if (IS_ENABLED(CONFIG_X86_64) && + systab_hdr->revision > EFI_1_10_SYSTEM_TABLE_REVISION && + !strcmp(vendor, "Apple")) { + pr_info("Apple Mac detected, using EFI v1.10 runtime services only\n"); + efi.runtime_version = EFI_1_10_SYSTEM_TABLE_REVISION; + } } static __initdata char memory_type_name[][13] = { From b36a2050040b2d839bdc044007cdd57101d7f881 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Fri, 21 Jan 2022 04:38:56 -0800 Subject: [PATCH 028/367] io_uring: fix bug in slow unregistering of nodes In some cases io_rsrc_ref_quiesce will call io_rsrc_node_switch_start, and then immediately flush the delayed work queue &ctx->rsrc_put_work. However the percpu_ref_put does not immediately destroy the node, it will be called asynchronously via RCU. That ends up with io_rsrc_node_ref_zero only being called after rsrc_put_work has been flushed, and so the process ends up sleeping for 1 second unnecessarily. This patch executes the put code immediately if we are busy quiescing. Fixes: 4a38aed2a0a7 ("io_uring: batch reap of dead file registrations") Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220121123856.3557884-1-dylany@fb.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e54c4127422e..dd4c801c7afd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7822,10 +7822,15 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref) struct io_ring_ctx *ctx = node->rsrc_data->ctx; unsigned long flags; bool first_add = false; + unsigned long delay = HZ; spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); node->done = true; + /* if we are mid-quiesce then do not delay */ + if (node->rsrc_data->quiesce) + delay = 0; + while (!list_empty(&ctx->rsrc_ref_list)) { node = list_first_entry(&ctx->rsrc_ref_list, struct io_rsrc_node, node); @@ -7838,7 +7843,7 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref) spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); if (first_add) - mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ); + mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay); } static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx) From 83114df32ae779df57e0af99a8ba6c3968b2ba3d Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 20 Jan 2022 10:10:25 +0000 Subject: [PATCH 029/367] block: fix memory leak in disk_register_independent_access_ranges kobject_init_and_add() takes reference even when it fails. According to the doc of kobject_init_and_add() If this function returns an error, kobject_put() must be called to properly clean up the memory associated with the object. Fix this issue by adding kobject_put(). Callback function blk_ia_ranges_sysfs_release() in kobject_put() can handle the pointer "iars" properly. Fixes: a2247f19ee1c ("block: Add independent access ranges support") Signed-off-by: Miaoqian Lin Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20220120101025.22411-1-linmq006@gmail.com Signed-off-by: Jens Axboe --- block/blk-ia-ranges.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index b925f3db3ab7..18c68d8b9138 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -144,7 +144,7 @@ int disk_register_independent_access_ranges(struct gendisk *disk, &q->kobj, "%s", "independent_access_ranges"); if (ret) { q->ia_ranges = NULL; - kfree(iars); + kobject_put(&iars->kobj); return ret; } From 1f52b0aba6fd37653416375cb8a1ca673acf8d5f Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 17 Jan 2022 16:13:28 +0000 Subject: [PATCH 030/367] x86/MCE/AMD: Allow thresholding interface updates after init Changes to the AMD Thresholding sysfs code prevents sysfs writes from updating the underlying registers once CPU init is completed, i.e. "threshold_banks" is set. Allow the registers to be updated if the thresholding interface is already initialized or if in the init path. Use the "set_lvt_off" value to indicate if running in the init path, since this value is only set during init. Fixes: a037f3ca0ea0 ("x86/mce/amd: Make threshold bank setting hotplug robust") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: Link: https://lore.kernel.org/r/20220117161328.19148-1-yazen.ghannam@amd.com --- arch/x86/kernel/cpu/mce/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index a1e2f41796dc..9f4b508886dd 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -423,7 +423,7 @@ static void threshold_restart_bank(void *_tr) u32 hi, lo; /* sysfs write might race against an offline operation */ - if (this_cpu_read(threshold_banks)) + if (!this_cpu_read(threshold_banks) && !tr->set_lvt_off) return; rdmsr(tr->b->address, lo, hi); From 96d9d1fa5cd505078534113308ced0aa56d8da58 Mon Sep 17 00:00:00 2001 From: Yanming Liu Date: Thu, 20 Jan 2022 04:20:52 +0800 Subject: [PATCH 031/367] Drivers: hv: balloon: account for vmbus packet header in max_pkt_size Commit adae1e931acd ("Drivers: hv: vmbus: Copy packets sent by Hyper-V out of the ring buffer") introduced a notion of maximum packet size in vmbus channel and used that size to initialize a buffer holding all incoming packet along with their vmbus packet header. hv_balloon uses the default maximum packet size VMBUS_DEFAULT_MAX_PKT_SIZE which matches its maximum message size, however vmbus_open expects this size to also include vmbus packet header. This leads to 4096 bytes dm_unballoon_request messages being truncated to 4080 bytes. When the driver tries to read next packet it starts from a wrong read_index, receives garbage and prints a lot of "Unhandled message: type: " in dmesg. Allocate the buffer with HV_HYP_PAGE_SIZE more bytes to make room for the header. Fixes: adae1e931acd ("Drivers: hv: vmbus: Copy packets sent by Hyper-V out of the ring buffer") Suggested-by: Michael Kelley (LINUX) Suggested-by: Andrea Parri (Microsoft) Signed-off-by: Yanming Liu Reviewed-by: Michael Kelley Reviewed-by: Andrea Parri (Microsoft) Link: https://lore.kernel.org/r/20220119202052.3006981-1-yanminglr@gmail.com Signed-off-by: Wei Liu --- drivers/hv/hv_balloon.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index ca873a3b98db..f2d05bff4245 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1660,6 +1660,13 @@ static int balloon_connect_vsp(struct hv_device *dev) unsigned long t; int ret; + /* + * max_pkt_size should be large enough for one vmbus packet header plus + * our receive buffer size. Hyper-V sends messages up to + * HV_HYP_PAGE_SIZE bytes long on balloon channel. + */ + dev->channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2; + ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0, balloon_onchannelcallback, dev); if (ret) From 30cc53897470d45219fb0a5eafd0cc8b0032cd1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 11 Jan 2022 18:29:18 +0100 Subject: [PATCH 032/367] pinctrl: thunderbay: comment process of building functions a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should make code a bit easier to follow. While at it use some "for" loops to simplify array iteration loops. Ref: 5d0674999cc5 ("pinctrl: keembay: comment process of building functions a bit") Signed-off-by: Rafał Miłecki Link: https://lore.kernel.org/r/20220111172919.6567-1-zajec5@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-thunderbay.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/pinctrl/pinctrl-thunderbay.c b/drivers/pinctrl/pinctrl-thunderbay.c index b5b47f4dd774..4756a23ca572 100644 --- a/drivers/pinctrl/pinctrl-thunderbay.c +++ b/drivers/pinctrl/pinctrl-thunderbay.c @@ -839,27 +839,30 @@ static int thunderbay_build_functions(struct thunderbay_pinctrl *tpc) void *ptr; int pin; - /* Total number of functions is unknown at this point. Allocate first. */ + /* + * Allocate maximum possible number of functions. Assume every pin + * being part of 8 (hw maximum) globally unique muxes. + */ tpc->nfuncs = 0; thunderbay_funcs = kcalloc(tpc->soc->npins * 8, sizeof(*thunderbay_funcs), GFP_KERNEL); if (!thunderbay_funcs) return -ENOMEM; - /* Find total number of functions and each's properties */ + /* Setup 1 function for each unique mux */ for (pin = 0; pin < tpc->soc->npins; pin++) { const struct pinctrl_pin_desc *pin_info = thunderbay_pins + pin; - struct thunderbay_mux_desc *pin_mux = pin_info->drv_data; + struct thunderbay_mux_desc *pin_mux; - while (pin_mux->name) { - struct function_desc *func = thunderbay_funcs; + for (pin_mux = pin_info->drv_data; pin_mux->name; pin_mux++) { + struct function_desc *func; - while (func->name) { + /* Check if we already have function for this mux */ + for (func = thunderbay_funcs; func->name; func++) { if (!strcmp(pin_mux->name, func->name)) { func->num_group_names++; break; } - func++; } if (!func->name) { @@ -868,8 +871,6 @@ static int thunderbay_build_functions(struct thunderbay_pinctrl *tpc) func->data = (int *)&pin_mux->mode; tpc->nfuncs++; } - - pin_mux++; } } From 25d2e41cf59bd6ccd23adc2965a157053bc3ed5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 11 Jan 2022 18:29:19 +0100 Subject: [PATCH 033/367] pinctrl: thunderbay: rework loops looking for groups names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the outer loop iterate over functions as that's the real subject. This simplifies code (and reduces amount of lines of code) as allocating memory for names doesn't require extra checks anymore. While at it use local "group_names" variable. It fixes: drivers/pinctrl/pinctrl-thunderbay.c: In function 'thunderbay_add_functions': drivers/pinctrl/pinctrl-thunderbay.c:815:8: warning: assignment discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers] 815 | grp = func->group_names; | ^ Ref: c26c4bfc1040 ("pinctrl: keembay: rework loops looking for groups names") Reported-by: Nathan Chancellor Signed-off-by: Rafał Miłecki Link: https://lore.kernel.org/r/20220111172919.6567-2-zajec5@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-thunderbay.c | 67 ++++++++++------------------ 1 file changed, 23 insertions(+), 44 deletions(-) diff --git a/drivers/pinctrl/pinctrl-thunderbay.c b/drivers/pinctrl/pinctrl-thunderbay.c index 4756a23ca572..79d44bca039e 100644 --- a/drivers/pinctrl/pinctrl-thunderbay.c +++ b/drivers/pinctrl/pinctrl-thunderbay.c @@ -773,63 +773,42 @@ static int thunderbay_build_groups(struct thunderbay_pinctrl *tpc) static int thunderbay_add_functions(struct thunderbay_pinctrl *tpc, struct function_desc *funcs) { - struct function_desc *function = funcs; int i; /* Assign the groups for each function */ - for (i = 0; i < tpc->soc->npins; i++) { - const struct pinctrl_pin_desc *pin_info = thunderbay_pins + i; - struct thunderbay_mux_desc *pin_mux = pin_info->drv_data; + for (i = 0; i < tpc->nfuncs; i++) { + struct function_desc *func = &funcs[i]; + const char **group_names; + unsigned int grp_idx = 0; + int j; - while (pin_mux->name) { - const char **grp; - int j, grp_num, match = 0; - size_t grp_size; - struct function_desc *func; + group_names = devm_kcalloc(tpc->dev, func->num_group_names, + sizeof(*group_names), GFP_KERNEL); + if (!group_names) + return -ENOMEM; - for (j = 0; j < tpc->nfuncs; j++) { - if (!strcmp(pin_mux->name, function[j].name)) { - match = 1; - break; - } + for (j = 0; j < tpc->soc->npins; j++) { + const struct pinctrl_pin_desc *pin_info = &thunderbay_pins[j]; + struct thunderbay_mux_desc *pin_mux; + + for (pin_mux = pin_info->drv_data; pin_mux->name; pin_mux++) { + if (!strcmp(pin_mux->name, func->name)) + group_names[grp_idx++] = pin_info->name; } - - if (!match) - return -EINVAL; - - func = function + j; - grp_num = func->num_group_names; - grp_size = sizeof(*func->group_names); - - if (!func->group_names) { - func->group_names = devm_kcalloc(tpc->dev, - grp_num, - grp_size, - GFP_KERNEL); - if (!func->group_names) { - kfree(func); - return -ENOMEM; - } - } - - grp = func->group_names; - while (*grp) - grp++; - - *grp = pin_info->name; - pin_mux++; } + + func->group_names = group_names; } /* Add all functions */ for (i = 0; i < tpc->nfuncs; i++) { pinmux_generic_add_function(tpc->pctrl, - function[i].name, - function[i].group_names, - function[i].num_group_names, - function[i].data); + funcs[i].name, + funcs[i].group_names, + funcs[i].num_group_names, + funcs[i].data); } - kfree(function); + kfree(funcs); return 0; } From aa28514592d52043f4837a6457d6310452135ae1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 4 Jan 2022 17:42:38 +0100 Subject: [PATCH 034/367] pinctrl: cherryview: Trigger hwirq0 for interrupt-lines without a mapping Commit bdfbef2d29dc ("pinctrl: cherryview: Don't use selection 0 to mark an interrupt line as unused") made the code properly differentiate between unset vs (hwirq) 0 entries in the GPIO-controller interrupt-line to GPIO pinnumber/hwirq mapping. This is causing some boards to not boot. This commit restores the old behavior of triggering hwirq 0 when receiving an interrupt on an interrupt-line for which there is no mapping. Fixes: bdfbef2d29dc ("pinctrl: cherryview: Don't use selection 0 to mark an interrupt line as unused") Reported-and-tested-by: Jarkko Nikula Signed-off-by: Hans de Goede Acked-by: Andy Shevchenko Acked-by: Mika Westerberg Link: https://lore.kernel.org/r/20220104164238.253142-1-hdegoede@redhat.com Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index abffda1fd51e..1d5818269076 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1471,8 +1471,9 @@ static void chv_gpio_irq_handler(struct irq_desc *desc) offset = cctx->intr_lines[intr_line]; if (offset == CHV_INVALID_HWIRQ) { - dev_err(dev, "interrupt on unused interrupt line %u\n", intr_line); - continue; + dev_warn_once(dev, "interrupt on unmapped interrupt line %u\n", intr_line); + /* Some boards expect hwirq 0 to trigger in this case */ + offset = 0; } generic_handle_domain_irq(gc->irq.domain, offset); From 1fd6bb5b47a65eacb063b37e6fa6df2b8fa92959 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 5 Jan 2022 17:29:52 +0000 Subject: [PATCH 035/367] pinctrl: sunxi: Fix H616 I2S3 pin data Two bugs have sneaked in the H616 pinctrl data: - PH9 uses the mux value of 0x3 twice (one should be 0x5 instead) - PH8 and PH9 use the "i2s3" function name twice in each pin For the double pin name we use the same trick we pulled for i2s0: append the pin function to the group name to designate the special function. Fixes: 25adc29407fb ("pinctrl: sunxi: Add support for the Allwinner H616 pin controller") Reported-by: SASANO Takayoshi Signed-off-by: Andre Przywara Reviewed-by: Jernej Skrabec Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20220105172952.23347-1-andre.przywara@arm.com Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c index ce1917e230f4..152b71226a80 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c @@ -363,16 +363,16 @@ static const struct sunxi_desc_pin h616_pins[] = { SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), SUNXI_FUNCTION(0x2, "uart2"), /* CTS */ - SUNXI_FUNCTION(0x3, "i2s3"), /* DO0 */ + SUNXI_FUNCTION(0x3, "i2s3_dout0"), /* DO0 */ SUNXI_FUNCTION(0x4, "spi1"), /* MISO */ - SUNXI_FUNCTION(0x5, "i2s3"), /* DI1 */ + SUNXI_FUNCTION(0x5, "i2s3_din1"), /* DI1 */ SUNXI_FUNCTION_IRQ_BANK(0x6, 6, 8)), /* PH_EINT8 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 9), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x3, "i2s3"), /* DI0 */ + SUNXI_FUNCTION(0x3, "i2s3_din0"), /* DI0 */ SUNXI_FUNCTION(0x4, "spi1"), /* CS1 */ - SUNXI_FUNCTION(0x3, "i2s3"), /* DO1 */ + SUNXI_FUNCTION(0x5, "i2s3_dout1"), /* DO1 */ SUNXI_FUNCTION_IRQ_BANK(0x6, 6, 9)), /* PH_EINT9 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 10), SUNXI_FUNCTION(0x0, "gpio_in"), From a66c5ed539277b9f2363bbace0dba88b85b36c26 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 6 Jan 2022 11:48:52 -0800 Subject: [PATCH 036/367] hwmon: (lm90) Reduce maximum conversion rate for G781 According to its datasheet, G781 supports a maximum conversion rate value of 8 (62.5 ms). However, chips labeled G781 and G780 were found to only support a maximum conversion rate value of 7 (125 ms). On the other side, chips labeled G781-1 and G784 were found to support a conversion rate value of 8. There is no known means to distinguish G780 from G781 or G784; all chips report the same manufacturer ID and chip revision. Setting the conversion rate register value to 8 on chips not supporting it causes unexpected behavior since the real conversion rate is set to 0 (16 seconds) if a value of 8 is written into the conversion rate register. Limit the conversion rate register value to 7 for all G78x chips to avoid the problem. Fixes: ae544f64cc7b ("hwmon: (lm90) Add support for GMT G781") Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 74019dff2550..cc5e48fe304b 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -373,7 +373,7 @@ static const struct lm90_params lm90_params[] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, - .max_convrate = 8, + .max_convrate = 7, }, [lm86] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT From bc341a1a98827925082e95db174734fc8bd68af6 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 8 Jan 2022 11:37:19 -0800 Subject: [PATCH 037/367] hwmon: (lm90) Re-enable interrupts after alert clears If alert handling is broken, interrupts are disabled after an alert and re-enabled after the alert clears. However, if there is an interrupt handler, this does not apply if alerts were originally disabled and enabled when the driver was loaded. In that case, interrupts will stay disabled after an alert was handled though the alert handler even after the alert condition clears. Address the situation by always re-enabling interrupts after the alert condition clears if there is an interrupt handler. Fixes: 2abdc357c55d9 ("hwmon: (lm90) Unmask hardware interrupt") Cc: Dmitry Osipenko Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index cc5e48fe304b..e4ecf3440d7c 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -848,7 +848,7 @@ static int lm90_update_device(struct device *dev) * Re-enable ALERT# output if it was originally enabled and * relevant alarms are all clear */ - if (!(data->config_orig & 0x80) && + if ((client->irq || !(data->config_orig & 0x80)) && !(data->alarms & data->alert_alarms)) { if (data->config & 0x80) { dev_dbg(&client->dev, "Re-enabling ALERT#\n"); From a53fff96f35763d132a36c620b183fdf11022d7a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 7 Jan 2022 11:05:23 -0800 Subject: [PATCH 038/367] hwmon: (lm90) Mark alert as broken for MAX6654 Experiments with MAX6654 show that its alert function is broken, similar to other chips supported by the lm90 driver. Mark it accordingly. Fixes: 229d495d8189 ("hwmon: (lm90) Add max6654 support to lm90 driver") Cc: Josh Lehan Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index e4ecf3440d7c..280ae5f58187 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -400,6 +400,7 @@ static const struct lm90_params lm90_params[] = { .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6654] = { + .flags = LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 7, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, From 94746b0ba479743355e0d3cc1cb9cfe3011fb8be Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 7 Jan 2022 11:11:00 -0800 Subject: [PATCH 039/367] hwmon: (lm90) Mark alert as broken for MAX6680 Experiments with MAX6680 and MAX6681 show that the alert function of those chips is broken, similar to other chips supported by the lm90 driver. Mark it accordingly. Fixes: 4667bcb8d8fc ("hwmon: (lm90) Introduce chip parameter structure") Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 280ae5f58187..06cb971c889b 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -419,7 +419,7 @@ static const struct lm90_params lm90_params[] = { }, [max6680] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT - | LM90_HAVE_CRIT_ALRM_SWP, + | LM90_HAVE_CRIT_ALRM_SWP | LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 7, }, From f614629f9c1080dcc844a8430e3fb4c37ebbf05d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 7 Jan 2022 12:36:41 -0800 Subject: [PATCH 040/367] hwmon: (lm90) Mark alert as broken for MAX6646/6647/6649 Experiments with MAX6646 and MAX6648 show that the alert function of those chips is broken, similar to other chips supported by the lm90 driver. Mark it accordingly. Fixes: 4667bcb8d8fc ("hwmon: (lm90) Introduce chip parameter structure") Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 06cb971c889b..ba01127c1deb 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -394,7 +394,7 @@ static const struct lm90_params lm90_params[] = { .max_convrate = 9, }, [max6646] = { - .flags = LM90_HAVE_CRIT, + .flags = LM90_HAVE_CRIT | LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 6, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, From d379880d9adb9f1ada3f1266aa49ea2561328e08 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 10 Jan 2022 23:23:31 -0800 Subject: [PATCH 041/367] hwmon: (lm90) Fix sysfs and udev notifications sysfs and udev notifications need to be sent to the _alarm attributes, not to the value attributes. Fixes: 94dbd23ed88c ("hwmon: (lm90) Use hwmon_notify_event()") Cc: Dmitry Osipenko Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index ba01127c1deb..1c9493c70813 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -1808,22 +1808,22 @@ static bool lm90_is_tripped(struct i2c_client *client, u16 *status) if (st & LM90_STATUS_LLOW) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_min, 0); + hwmon_temp_min_alarm, 0); if (st & LM90_STATUS_RLOW) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_min, 1); + hwmon_temp_min_alarm, 1); if (st2 & MAX6696_STATUS2_R2LOW) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_min, 2); + hwmon_temp_min_alarm, 2); if (st & LM90_STATUS_LHIGH) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_max, 0); + hwmon_temp_max_alarm, 0); if (st & LM90_STATUS_RHIGH) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_max, 1); + hwmon_temp_max_alarm, 1); if (st2 & MAX6696_STATUS2_R2HIGH) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_max, 2); + hwmon_temp_max_alarm, 2); return true; } From f1e75e0d6a1ae02fa4189d7aeebab623bade2a21 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 18 Jan 2022 07:53:19 -0800 Subject: [PATCH 042/367] hwmon: (pmbus/ir38064) Mark ir38064_of_match as __maybe_unused If CONFIG_PM is not enabled, the following warning is reported. drivers/hwmon/pmbus/ir38064.c:54:34: warning: unused variable 'ir38064_of_match' Mark it as __maybe_unused. Reported-by: kernel test robot Cc: Arthur Heymans Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/ir38064.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/ir38064.c b/drivers/hwmon/pmbus/ir38064.c index 0ea7e1c18bdc..09276e397194 100644 --- a/drivers/hwmon/pmbus/ir38064.c +++ b/drivers/hwmon/pmbus/ir38064.c @@ -62,7 +62,7 @@ static const struct i2c_device_id ir38064_id[] = { MODULE_DEVICE_TABLE(i2c, ir38064_id); -static const struct of_device_id ir38064_of_match[] = { +static const struct of_device_id __maybe_unused ir38064_of_match[] = { { .compatible = "infineon,ir38060" }, { .compatible = "infineon,ir38064" }, { .compatible = "infineon,ir38164" }, From c1ec0cabc36718efc7fe8b4157d41b82d08ec1d2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Jan 2022 14:55:43 +0300 Subject: [PATCH 043/367] hwmon: (adt7470) Prevent divide by zero in adt7470_fan_write() The "val" variable is controlled by the user and comes from hwmon_attr_store(). The FAN_RPM_TO_PERIOD() macro divides by "val" so a zero will crash the system. Check for that and return -EINVAL. Negatives are also invalid so return -EINVAL for those too. Fixes: fc958a61ff6d ("hwmon: (adt7470) Convert to devm_hwmon_device_register_with_info API") Signed-off-by: Dan Carpenter Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7470.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c index d519aca4a9d6..fb6d14d213a1 100644 --- a/drivers/hwmon/adt7470.c +++ b/drivers/hwmon/adt7470.c @@ -662,6 +662,9 @@ static int adt7470_fan_write(struct device *dev, u32 attr, int channel, long val struct adt7470_data *data = dev_get_drvdata(dev); int err; + if (val <= 0) + return -EINVAL; + val = FAN_RPM_TO_PERIOD(val); val = clamp_val(val, 1, 65534); From aec982603aa8cc0a21143681feb5f60ecc69d718 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 6 Dec 2021 11:11:51 +0000 Subject: [PATCH 044/367] powerpc/fixmap: Fix VM debug warning on unmap Unmapping a fixmap entry is done by calling __set_fixmap() with FIXMAP_PAGE_CLEAR as flags. Today, powerpc __set_fixmap() calls map_kernel_page(). map_kernel_page() is not happy when called a second time for the same page. WARNING: CPU: 0 PID: 1 at arch/powerpc/mm/pgtable.c:194 set_pte_at+0xc/0x1e8 CPU: 0 PID: 1 Comm: swapper Not tainted 5.16.0-rc3-s3k-dev-01993-g350ff07feb7d-dirty #682 NIP: c0017cd4 LR: c00187f0 CTR: 00000010 REGS: e1011d50 TRAP: 0700 Not tainted (5.16.0-rc3-s3k-dev-01993-g350ff07feb7d-dirty) MSR: 00029032 CR: 42000208 XER: 00000000 GPR00: c0165fec e1011e10 c14c0000 c0ee2550 ff800000 c0f3d000 00000000 c001686c GPR08: 00001000 b00045a9 00000001 c0f58460 c0f50000 00000000 c0007e10 00000000 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 GPR24: 00000000 00000000 c0ee2550 00000000 c0f57000 00000ff8 00000000 ff800000 NIP [c0017cd4] set_pte_at+0xc/0x1e8 LR [c00187f0] map_kernel_page+0x9c/0x100 Call Trace: [e1011e10] [c0736c68] vsnprintf+0x358/0x6c8 (unreliable) [e1011e30] [c0165fec] __set_fixmap+0x30/0x44 [e1011e40] [c0c13bdc] early_iounmap+0x11c/0x170 [e1011e70] [c0c06cb0] ioremap_legacy_serial_console+0x88/0xc0 [e1011e90] [c0c03634] do_one_initcall+0x80/0x178 [e1011ef0] [c0c0385c] kernel_init_freeable+0xb4/0x250 [e1011f20] [c0007e34] kernel_init+0x24/0x140 [e1011f30] [c0016268] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 7fe3fb78 48019689 80010014 7c630034 83e1000c 5463d97e 7c0803a6 38210010 4e800020 81250000 712a0001 41820008 <0fe00000> 9421ffe0 93e1001c 48000030 Implement unmap_kernel_page() which clears an existing pte. Reported-by: Maxime Bizon Signed-off-by: Christophe Leroy Tested-by: Maxime Bizon Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b0b752f6f6ecc60653e873f385c6f0dce4e9ab6a.1638789098.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 1 + arch/powerpc/include/asm/book3s/64/pgtable.h | 2 ++ arch/powerpc/include/asm/fixmap.h | 6 ++++-- arch/powerpc/include/asm/nohash/32/pgtable.h | 1 + arch/powerpc/include/asm/nohash/64/pgtable.h | 1 + arch/powerpc/mm/pgtable.c | 9 +++++++++ 6 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 609c80f67194..f8b94f78403f 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -178,6 +178,7 @@ static inline bool pte_user(pte_t pte) #ifndef __ASSEMBLY__ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 33e073d6b0c4..875730d5af40 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1082,6 +1082,8 @@ static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t p return hash__map_kernel_page(ea, pa, prot); } +void unmap_kernel_page(unsigned long va); + static inline int __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 947b5b9c4424..a832aeafe560 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -111,8 +111,10 @@ static inline void __set_fixmap(enum fixed_addresses idx, BUILD_BUG_ON(idx >= __end_of_fixed_addresses); else if (WARN_ON(idx >= __end_of_fixed_addresses)) return; - - map_kernel_page(__fix_to_virt(idx), phys, flags); + if (pgprot_val(flags)) + map_kernel_page(__fix_to_virt(idx), phys, flags); + else + unmap_kernel_page(__fix_to_virt(idx)); } #define __early_set_fixmap __set_fixmap diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b67742e2a9b2..d959c2a73fbf 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -64,6 +64,7 @@ extern int icache_44x_need_flush; #ifndef __ASSEMBLY__ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index a3313e853e5e..2816d158280a 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -308,6 +308,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __swp_entry_to_pte(x) __pte((x).val) int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); extern int __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index abb3198bd277..6ec5a7dd7913 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -206,6 +206,15 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, __set_pte_at(mm, addr, ptep, pte, 0); } +void unmap_kernel_page(unsigned long va) +{ + pmd_t *pmdp = pmd_off_k(va); + pte_t *ptep = pte_offset_kernel(pmdp, va); + + pte_clear(&init_mm, va, ptep); + flush_tlb_kernel_range(va, va + PAGE_SIZE); +} + /* * This is called when relaxing access to a PTE. It's also called in the page * fault path when we don't hit any of the major fault cases, ie, a minor From fb6433b48a178d4672cb26632454ee0b21056eaa Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sat, 22 Jan 2022 09:04:29 +0530 Subject: [PATCH 045/367] powerpc/perf: Fix power_pmu_disable to call clear_pmi_irq_pending only if PMI is pending Running selftest with CONFIG_PPC_IRQ_SOFT_MASK_DEBUG enabled in kernel triggered below warning: [ 172.851380] ------------[ cut here ]------------ [ 172.851391] WARNING: CPU: 8 PID: 2901 at arch/powerpc/include/asm/hw_irq.h:246 power_pmu_disable+0x270/0x280 [ 172.851402] Modules linked in: dm_mod bonding nft_ct nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables rfkill nfnetlink sunrpc xfs libcrc32c pseries_rng xts vmx_crypto uio_pdrv_genirq uio sch_fq_codel ip_tables ext4 mbcache jbd2 sd_mod t10_pi sg ibmvscsi ibmveth scsi_transport_srp fuse [ 172.851442] CPU: 8 PID: 2901 Comm: lost_exception_ Not tainted 5.16.0-rc5-03218-g798527287598 #2 [ 172.851451] NIP: c00000000013d600 LR: c00000000013d5a4 CTR: c00000000013b180 [ 172.851458] REGS: c000000017687860 TRAP: 0700 Not tainted (5.16.0-rc5-03218-g798527287598) [ 172.851465] MSR: 8000000000029033 CR: 48004884 XER: 20040000 [ 172.851482] CFAR: c00000000013d5b4 IRQMASK: 1 [ 172.851482] GPR00: c00000000013d5a4 c000000017687b00 c000000002a10600 0000000000000004 [ 172.851482] GPR04: 0000000082004000 c0000008ba08f0a8 0000000000000000 00000008b7ed0000 [ 172.851482] GPR08: 00000000446194f6 0000000000008000 c00000000013b118 c000000000d58e68 [ 172.851482] GPR12: c00000000013d390 c00000001ec54a80 0000000000000000 0000000000000000 [ 172.851482] GPR16: 0000000000000000 0000000000000000 c000000015d5c708 c0000000025396d0 [ 172.851482] GPR20: 0000000000000000 0000000000000000 c00000000a3bbf40 0000000000000003 [ 172.851482] GPR24: 0000000000000000 c0000008ba097400 c0000000161e0d00 c00000000a3bb600 [ 172.851482] GPR28: c000000015d5c700 0000000000000001 0000000082384090 c0000008ba0020d8 [ 172.851549] NIP [c00000000013d600] power_pmu_disable+0x270/0x280 [ 172.851557] LR [c00000000013d5a4] power_pmu_disable+0x214/0x280 [ 172.851565] Call Trace: [ 172.851568] [c000000017687b00] [c00000000013d5a4] power_pmu_disable+0x214/0x280 (unreliable) [ 172.851579] [c000000017687b40] [c0000000003403ac] perf_pmu_disable+0x4c/0x60 [ 172.851588] [c000000017687b60] [c0000000003445e4] __perf_event_task_sched_out+0x1d4/0x660 [ 172.851596] [c000000017687c50] [c000000000d1175c] __schedule+0xbcc/0x12a0 [ 172.851602] [c000000017687d60] [c000000000d11ea8] schedule+0x78/0x140 [ 172.851608] [c000000017687d90] [c0000000001a8080] sys_sched_yield+0x20/0x40 [ 172.851615] [c000000017687db0] [c0000000000334dc] system_call_exception+0x18c/0x380 [ 172.851622] [c000000017687e10] [c00000000000c74c] system_call_common+0xec/0x268 The warning indicates that MSR_EE being set(interrupt enabled) when there was an overflown PMC detected. This could happen in power_pmu_disable since it runs under interrupt soft disable condition ( local_irq_save ) and not with interrupts hard disabled. commit 2c9ac51b850d ("powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC") intended to clear PMI pending bit in Paca when disabling the PMU. It could happen that PMC gets overflown while code is in power_pmu_disable callback function. Hence add a check to see if PMI pending bit is set in Paca before clearing it via clear_pmi_pending. Fixes: 2c9ac51b850d ("powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC") Reported-by: Sachin Sant Signed-off-by: Athira Rajeev Tested-by: Sachin Sant Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220122033429.25395-1-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 32b98b7a1f86..b5b42cf0a703 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1355,9 +1355,20 @@ static void power_pmu_disable(struct pmu *pmu) * Otherwise provide a warning if there is PMI pending, but * no counter is found overflown. */ - if (any_pmc_overflown(cpuhw)) - clear_pmi_irq_pending(); - else + if (any_pmc_overflown(cpuhw)) { + /* + * Since power_pmu_disable runs under local_irq_save, it + * could happen that code hits a PMC overflow without PMI + * pending in paca. Hence only clear PMI pending if it was + * set. + * + * If a PMI is pending, then MSR[EE] must be disabled (because + * the masked PMI handler disabling EE). So it is safe to + * call clear_pmi_irq_pending(). + */ + if (pmi_irq_pending()) + clear_pmi_irq_pending(); + } else WARN_ON(pmi_irq_pending()); val = mmcra = cpuhw->mmcr.mmcra; From f3b7e73b2c6619884351a3a0a7468642f852b8a2 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 19 Jan 2022 19:26:37 +0100 Subject: [PATCH 046/367] s390/module: fix loading modules with a lot of relocations If the size of the PLT entries generated by apply_rela() exceeds 64KiB, the first ones can no longer reach __jump_r1 with brc. Fix by using brcl. An alternative solution is to add a __jump_r1 copy after every 64KiB, however, the space savings are quite small and do not justify the additional complexity. Fixes: f19fbd5ed642 ("s390: introduce execute-trampolines for branches") Cc: stable@vger.kernel.org Reported-by: Andrea Righi Signed-off-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Signed-off-by: Heiko Carstens --- arch/s390/kernel/module.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index d52d85367bf7..b032e556eeb7 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -33,7 +33,7 @@ #define DEBUGP(fmt , ...) #endif -#define PLT_ENTRY_SIZE 20 +#define PLT_ENTRY_SIZE 22 void *module_alloc(unsigned long size) { @@ -341,27 +341,26 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { - unsigned int insn[5]; - unsigned int *ip = me->core_layout.base + - me->arch.plt_offset + - info->plt_offset; + unsigned char insn[PLT_ENTRY_SIZE]; + char *plt_base; + char *ip; - insn[0] = 0x0d10e310; /* basr 1,0 */ - insn[1] = 0x100a0004; /* lg 1,10(1) */ + plt_base = me->core_layout.base + me->arch.plt_offset; + ip = plt_base + info->plt_offset; + *(int *)insn = 0x0d10e310; /* basr 1,0 */ + *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) { - unsigned int *ij; - ij = me->core_layout.base + - me->arch.plt_offset + - me->arch.plt_size - PLT_ENTRY_SIZE; - insn[2] = 0xa7f40000 + /* j __jump_r1 */ - (unsigned int)(u16) - (((unsigned long) ij - 8 - - (unsigned long) ip) / 2); + char *jump_r1; + + jump_r1 = plt_base + me->arch.plt_size - + PLT_ENTRY_SIZE; + /* brcl 0xf,__jump_r1 */ + *(short *)&insn[8] = 0xc0f4; + *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2; } else { - insn[2] = 0x07f10000; /* br %r1 */ + *(int *)&insn[8] = 0x07f10000; /* br %r1 */ } - insn[3] = (unsigned int) (val >> 32); - insn[4] = (unsigned int) val; + *(long *)&insn[14] = val; write(ip, insn, sizeof(insn)); info->plt_initialized = 1; From 90c5318795eefa09a9f9aef8d18a904e24962b5c Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 19 Jan 2022 19:26:38 +0100 Subject: [PATCH 047/367] s390/module: test loading modules with a lot of relocations Add a test in order to prevent regressions. Signed-off-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 15 +++++++++ arch/s390/lib/Makefile | 3 ++ arch/s390/lib/test_modules.c | 35 +++++++++++++++++++ arch/s390/lib/test_modules.h | 50 ++++++++++++++++++++++++++++ arch/s390/lib/test_modules_helpers.c | 13 ++++++++ 5 files changed, 116 insertions(+) create mode 100644 arch/s390/lib/test_modules.c create mode 100644 arch/s390/lib/test_modules.h create mode 100644 arch/s390/lib/test_modules_helpers.c diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9750f92380f5..be9f39fd06df 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -945,6 +945,9 @@ config S390_GUEST endmenu +config S390_MODULES_SANITY_TEST_HELPERS + def_bool n + menu "Selftests" config S390_UNWIND_SELFTEST @@ -971,4 +974,16 @@ config S390_KPROBES_SANITY_TEST Say N if you are unsure. +config S390_MODULES_SANITY_TEST + def_tristate n + depends on KUNIT + default KUNIT_ALL_TESTS + prompt "Enable s390 specific modules tests" + select S390_MODULES_SANITY_TEST_HELPERS + help + This option enables an s390 specific modules test. This option is + not useful for distributions or general kernels, but only for + kernel developers working on architecture code. + + Say N if you are unsure. endmenu diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 707cd4622c13..69feb8ed3312 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -17,4 +17,7 @@ KASAN_SANITIZE_uaccess.o := n obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o CFLAGS_test_unwind.o += -fno-optimize-sibling-calls +obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o +obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o + lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c new file mode 100644 index 000000000000..d056baa8fbb0 --- /dev/null +++ b/arch/s390/lib/test_modules.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include + +#include "test_modules.h" + +#define DECLARE_RETURN(i) int test_modules_return_ ## i(void) +REPEAT_10000(DECLARE_RETURN); + +/* + * Test that modules with many relocations are loaded properly. + */ +static void test_modules_many_vmlinux_relocs(struct kunit *test) +{ + int result = 0; + +#define CALL_RETURN(i) result += test_modules_return_ ## i() + REPEAT_10000(CALL_RETURN); + KUNIT_ASSERT_EQ(test, result, 49995000); +} + +static struct kunit_case modules_testcases[] = { + KUNIT_CASE(test_modules_many_vmlinux_relocs), + {} +}; + +static struct kunit_suite modules_test_suite = { + .name = "modules_test_s390", + .test_cases = modules_testcases, +}; + +kunit_test_suites(&modules_test_suite); + +MODULE_LICENSE("GPL"); diff --git a/arch/s390/lib/test_modules.h b/arch/s390/lib/test_modules.h new file mode 100644 index 000000000000..43b5e4b4af3e --- /dev/null +++ b/arch/s390/lib/test_modules.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef TEST_MODULES_H +#define TEST_MODULES_H + +#define __REPEAT_10000_3(f, x) \ + f(x ## 0); \ + f(x ## 1); \ + f(x ## 2); \ + f(x ## 3); \ + f(x ## 4); \ + f(x ## 5); \ + f(x ## 6); \ + f(x ## 7); \ + f(x ## 8); \ + f(x ## 9) +#define __REPEAT_10000_2(f, x) \ + __REPEAT_10000_3(f, x ## 0); \ + __REPEAT_10000_3(f, x ## 1); \ + __REPEAT_10000_3(f, x ## 2); \ + __REPEAT_10000_3(f, x ## 3); \ + __REPEAT_10000_3(f, x ## 4); \ + __REPEAT_10000_3(f, x ## 5); \ + __REPEAT_10000_3(f, x ## 6); \ + __REPEAT_10000_3(f, x ## 7); \ + __REPEAT_10000_3(f, x ## 8); \ + __REPEAT_10000_3(f, x ## 9) +#define __REPEAT_10000_1(f, x) \ + __REPEAT_10000_2(f, x ## 0); \ + __REPEAT_10000_2(f, x ## 1); \ + __REPEAT_10000_2(f, x ## 2); \ + __REPEAT_10000_2(f, x ## 3); \ + __REPEAT_10000_2(f, x ## 4); \ + __REPEAT_10000_2(f, x ## 5); \ + __REPEAT_10000_2(f, x ## 6); \ + __REPEAT_10000_2(f, x ## 7); \ + __REPEAT_10000_2(f, x ## 8); \ + __REPEAT_10000_2(f, x ## 9) +#define REPEAT_10000(f) \ + __REPEAT_10000_1(f, 0); \ + __REPEAT_10000_1(f, 1); \ + __REPEAT_10000_1(f, 2); \ + __REPEAT_10000_1(f, 3); \ + __REPEAT_10000_1(f, 4); \ + __REPEAT_10000_1(f, 5); \ + __REPEAT_10000_1(f, 6); \ + __REPEAT_10000_1(f, 7); \ + __REPEAT_10000_1(f, 8); \ + __REPEAT_10000_1(f, 9) + +#endif diff --git a/arch/s390/lib/test_modules_helpers.c b/arch/s390/lib/test_modules_helpers.c new file mode 100644 index 000000000000..1670349a03eb --- /dev/null +++ b/arch/s390/lib/test_modules_helpers.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include + +#include "test_modules.h" + +#define DEFINE_RETURN(i) \ + int test_modules_return_ ## i(void) \ + { \ + return 1 ## i - 10000; \ + } \ + EXPORT_SYMBOL_GPL(test_modules_return_ ## i) +REPEAT_10000(DEFINE_RETURN); From c9bb19368b3ab111aedf3297e65bf84c9d3aa005 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 17 Dec 2021 14:58:49 +0100 Subject: [PATCH 048/367] s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 20 ++++++++++---------- arch/s390/configs/defconfig | 16 +++++++++------- arch/s390/configs/zfcpdump_defconfig | 3 +++ 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 7fe8975b49ec..498bed9b261b 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -63,6 +63,7 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m +CONFIG_S390_MODULES_SANITY_TEST=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_STATIC_KEYS_SELFTEST=y @@ -96,7 +97,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_FRONTSWAP=y CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y @@ -109,6 +109,7 @@ CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y CONFIG_GUP_TEST=y +CONFIG_ANON_VMA_NAME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -116,7 +117,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -185,7 +185,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m @@ -391,6 +390,7 @@ CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m +CONFIG_NET_SWITCHDEV=y CONFIG_CGROUP_NET_PRIO=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y @@ -400,6 +400,7 @@ CONFIG_PCI_IOV=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_SAFE=y CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m @@ -501,6 +502,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_ENGLEDER is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set @@ -511,7 +513,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -542,6 +543,7 @@ CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_TEHUTI is not set # CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VERTEXCOM is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_NET_VENDOR_XILINX is not set @@ -592,6 +594,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -756,9 +759,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y -CONFIG_CRYPTO_LIB_BLAKE2S=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -774,6 +774,8 @@ CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_CRC32_SELFTEST=y CONFIG_CRC4=m CONFIG_CRC7=m @@ -807,7 +809,6 @@ CONFIG_SLUB_DEBUG_ON=y CONFIG_SLUB_STATS=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y -CONFIG_DEBUG_VM_VMACACHE=y CONFIG_DEBUG_VM_PGFLAGS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m @@ -819,12 +820,11 @@ CONFIG_PANIC_ON_OOPS=y CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y CONFIG_TEST_LOCKUP=m -CONFIG_DEBUG_TIMEKEEPING=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y -CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y +CONFIG_DEBUG_IRQFLAGS=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_BUG_ON_DATA_CORRUPTION=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 466780c465f5..61e36b999f67 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -61,6 +61,7 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m +CONFIG_S390_MODULES_SANITY_TEST=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y # CONFIG_GCC_PLUGINS is not set @@ -91,7 +92,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_FRONTSWAP=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y @@ -101,6 +101,7 @@ CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y +CONFIG_ANON_VMA_NAME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -108,7 +109,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -177,7 +177,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m @@ -382,6 +381,7 @@ CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m +CONFIG_NET_SWITCHDEV=y CONFIG_CGROUP_NET_PRIO=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y @@ -391,6 +391,7 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_UEVENT_HELPER=y CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_SAFE=y CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m @@ -492,6 +493,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_ENGLEDER is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set @@ -502,7 +504,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -533,6 +534,7 @@ CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_TEHUTI is not set # CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VERTEXCOM is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_NET_VENDOR_XILINX is not set @@ -582,6 +584,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -743,9 +746,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y -CONFIG_CRYPTO_LIB_BLAKE2S=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -762,6 +762,8 @@ CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_CRC4=m CONFIG_CRC7=m CONFIG_CRC8=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index eed3b9acfa71..c55c668dc3c7 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -1,6 +1,7 @@ # CONFIG_SWAP is not set CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BPF_SYSCALL=y # CONFIG_CPU_ISOLATION is not set # CONFIG_UTS_NS is not set # CONFIG_TIME_NS is not set @@ -34,6 +35,7 @@ CONFIG_NET=y # CONFIG_PCPU_DEV_REFCNT is not set # CONFIG_ETHTOOL_NETLINK is not set CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_SAFE=y CONFIG_BLK_DEV_RAM=y # CONFIG_DCSSBLK is not set # CONFIG_DASD is not set @@ -58,6 +60,7 @@ CONFIG_ZFCP=y # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set +# CONFIG_SURFACE_PLATFORMS is not set # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set From dda8e14363f4f2bac0a1122322a35f47b5565d46 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 20 Jan 2022 20:49:48 +0100 Subject: [PATCH 049/367] gpio: sim: check the label length when setting up device properties If the user-space sets the chip label to an empty string - we should check the length and not override the default name or else line hogs will not be properly attached. Fixes: cb8c474e79be ("gpio: sim: new testing module") Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 838bbfed11d3..04b137eca8da 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -816,7 +816,7 @@ gpio_sim_make_bank_swnode(struct gpio_sim_bank *bank, properties[prop_idx++] = PROPERTY_ENTRY_U32("ngpios", bank->num_lines); - if (bank->label) + if (bank->label && (strlen(bank->label) > 0)) properties[prop_idx++] = PROPERTY_ENTRY_STRING("gpio-sim,label", bank->label); From 8aa0f94b0a8d5304ea1bd63bf1ed06f9e395e328 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 22 Jan 2022 23:35:36 -0800 Subject: [PATCH 050/367] gpio: sim: add doc file to index file Include the gpio-sim.rst file in the GPIO index (toc/table of contents). Quietens this doc build warning: Documentation/admin-guide/gpio/gpio-sim.rst: WARNING: document isn't included in any toctree Fixes: b48f6b466e44 ("gpio: sim: new testing module") Signed-off-by: Randy Dunlap Reported-by: Stephen Rothwell Signed-off-by: Bartosz Golaszewski --- Documentation/admin-guide/gpio/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/admin-guide/gpio/index.rst b/Documentation/admin-guide/gpio/index.rst index 7db367572f30..f6861ca16ffe 100644 --- a/Documentation/admin-guide/gpio/index.rst +++ b/Documentation/admin-guide/gpio/index.rst @@ -10,6 +10,7 @@ gpio gpio-aggregator sysfs gpio-mockup + gpio-sim .. only:: subproject and html From 278583055a237270fac70518275ba877bf9e4013 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jan 2022 18:42:07 +0000 Subject: [PATCH 051/367] KVM: arm64: Use shadow SPSR_EL1 when injecting exceptions on !VHE Injecting an exception into a guest with non-VHE is risky business. Instead of writing in the shadow register for the switch code to restore it, we override the CPU register instead. Which gets overriden a few instructions later by said restore code. The result is that although the guest correctly gets the exception, it will return to the original context in some random state, depending on what was there the first place... Boo. Fix the issue by writing to the shadow register. The original code is absolutely fine on VHE, as the state is already loaded, and writing to the shadow register in that case would actually be a bug. Fixes: bb666c472ca2 ("KVM: arm64: Inject AArch64 exceptions from HYP") Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Link: https://lore.kernel.org/r/20220121184207.423426-1-maz@kernel.org --- arch/arm64/kvm/hyp/exception.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 0418399e0a20..c5d009715402 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) { - write_sysreg_el1(val, SYS_SPSR); + if (has_vhe()) + write_sysreg_el1(val, SYS_SPSR); + else + __vcpu_sys_reg(vcpu, SPSR_EL1) = val; } static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) From ddec7abd4d93760ad5b2c7c61bf123a7707664ca Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 11 Jan 2022 11:07:08 +0100 Subject: [PATCH 052/367] platform/x86: x86-android-tablets: Correct crystal_cove_charger module name The module was renamed to intel_crystal_cove_charger before it was merged, updated bq24190_modules to match. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220111100708.38585-1-hdegoede@redhat.com --- drivers/platform/x86/x86-android-tablets.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c index 3ba63ad91b28..8d6a3cad260b 100644 --- a/drivers/platform/x86/x86-android-tablets.c +++ b/drivers/platform/x86/x86-android-tablets.c @@ -187,8 +187,8 @@ static struct bq24190_platform_data bq24190_pdata = { }; static const char * const bq24190_modules[] __initconst = { - "crystal_cove_charger", /* For the bq24190 IRQ */ - "bq24190_charger", /* For the Vbus regulator for intel-int3496 */ + "intel_crystal_cove_charger", /* For the bq24190 IRQ */ + "bq24190_charger", /* For the Vbus regulator for intel-int3496 */ NULL }; From 4ce2a32d40260374dfce5344960c419fde23ce87 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 10 Jan 2022 11:39:50 +0100 Subject: [PATCH 053/367] platform/x86: x86-android-tablets: Add support for disabling ACPI _AEI handlers Some of the broken DSDTs on these devices often also include broken / wrong _AEI (ACPI Event Interrupt) handlers, which can cause e.g. interrupt storms by listening to a floating GPIO pin. Add support for disabling these and disable them on the Asus ME176C and TF103C tablets. Signed-off-by: Hans de Goede Reviewed-By: Lubomir Rintel Link: https://lore.kernel.org/r/20220110103952.48760-1-hdegoede@redhat.com --- drivers/platform/x86/x86-android-tablets.c | 23 ++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c index 8d6a3cad260b..08c98e881d0e 100644 --- a/drivers/platform/x86/x86-android-tablets.c +++ b/drivers/platform/x86/x86-android-tablets.c @@ -26,6 +26,7 @@ #include /* For gpio_get_desc() which is EXPORT_SYMBOL_GPL() */ #include "../../gpio/gpiolib.h" +#include "../../gpio/gpiolib-acpi.h" /* * Helper code to get Linux IRQ numbers given a description of the IRQ source @@ -47,7 +48,7 @@ struct x86_acpi_irq_data { int polarity; /* ACPI_ACTIVE_HIGH / ACPI_ACTIVE_LOW / ACPI_ACTIVE_BOTH */ }; -static int x86_acpi_irq_helper_gpiochip_find(struct gpio_chip *gc, void *data) +static int gpiochip_find_match_label(struct gpio_chip *gc, void *data) { return gc->label && !strcmp(gc->label, data); } @@ -73,7 +74,7 @@ static int x86_acpi_irq_helper_get(const struct x86_acpi_irq_data *data) return irq; case X86_ACPI_IRQ_TYPE_GPIOINT: /* Like acpi_dev_gpio_irq_get(), but without parsing ACPI resources */ - chip = gpiochip_find(data->chip, x86_acpi_irq_helper_gpiochip_find); + chip = gpiochip_find(data->chip, gpiochip_find_match_label); if (!chip) { pr_err("error cannot find GPIO chip %s\n", data->chip); return -ENODEV; @@ -143,6 +144,7 @@ struct x86_serdev_info { }; struct x86_dev_info { + char *invalid_aei_gpiochip; const char * const *modules; struct gpiod_lookup_table **gpiod_lookup_tables; const struct x86_i2c_client_info *i2c_client_info; @@ -317,6 +319,7 @@ static const struct x86_dev_info asus_me176c_info __initconst = { .serdev_count = ARRAY_SIZE(asus_me176c_serdevs), .gpiod_lookup_tables = asus_me176c_gpios, .modules = bq24190_modules, + .invalid_aei_gpiochip = "INT33FC:02", }; /* Asus TF103C tablets have an Android factory img with everything hardcoded */ @@ -417,6 +420,7 @@ static const struct x86_dev_info asus_tf103c_info __initconst = { .pdev_count = ARRAY_SIZE(int3496_pdevs), .gpiod_lookup_tables = asus_tf103c_gpios, .modules = bq24190_modules, + .invalid_aei_gpiochip = "INT33FC:02", }; /* @@ -795,6 +799,7 @@ static __init int x86_android_tablet_init(void) { const struct x86_dev_info *dev_info; const struct dmi_system_id *id; + struct gpio_chip *chip; int i, ret = 0; id = dmi_first_match(x86_android_tablet_ids); @@ -803,6 +808,20 @@ static __init int x86_android_tablet_init(void) dev_info = id->driver_data; + /* + * The broken DSDTs on these devices often also include broken + * _AEI (ACPI Event Interrupt) handlers, disable these. + */ + if (dev_info->invalid_aei_gpiochip) { + chip = gpiochip_find(dev_info->invalid_aei_gpiochip, + gpiochip_find_match_label); + if (!chip) { + pr_err("error cannot find GPIO chip %s\n", dev_info->invalid_aei_gpiochip); + return -ENODEV; + } + acpi_gpiochip_free_interrupts(chip); + } + /* * Since this runs from module_init() it cannot use -EPROBE_DEFER, * instead pre-load any modules which are listed as requirements. From 84c2dcdd475f3f5d1d30c87404cafba4dd4b75ec Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 10 Jan 2022 11:39:51 +0100 Subject: [PATCH 054/367] platform/x86: x86-android-tablets: Add an init() callback to struct x86_dev_info Add an init() callback to struct x86_dev_info, board descriptions can use this to do some custom setup before registering the i2c_clients, platform- devices and servdevs. Also add an exit() callback to also allow for cleanup of the custom setup. Signed-off-by: Hans de Goede Reviewed-By: Lubomir Rintel Link: https://lore.kernel.org/r/20220110103952.48760-2-hdegoede@redhat.com --- drivers/platform/x86/x86-android-tablets.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c index 08c98e881d0e..fe4dba5997fe 100644 --- a/drivers/platform/x86/x86-android-tablets.c +++ b/drivers/platform/x86/x86-android-tablets.c @@ -153,6 +153,8 @@ struct x86_dev_info { int i2c_client_count; int pdev_count; int serdev_count; + int (*init)(void); + void (*exit)(void); }; /* Generic / shared bq24190 settings */ @@ -674,6 +676,7 @@ static struct i2c_client **i2c_clients; static struct platform_device **pdevs; static struct serdev_device **serdevs; static struct gpiod_lookup_table **gpiod_lookup_tables; +static void (*exit_handler)(void); static __init int x86_instantiate_i2c_client(const struct x86_dev_info *dev_info, int idx) @@ -791,6 +794,9 @@ static void x86_android_tablet_cleanup(void) kfree(i2c_clients); + if (exit_handler) + exit_handler(); + for (i = 0; gpiod_lookup_tables && gpiod_lookup_tables[i]; i++) gpiod_remove_lookup_table(gpiod_lookup_tables[i]); } @@ -833,6 +839,15 @@ static __init int x86_android_tablet_init(void) for (i = 0; gpiod_lookup_tables && gpiod_lookup_tables[i]; i++) gpiod_add_lookup_table(gpiod_lookup_tables[i]); + if (dev_info->init) { + ret = dev_info->init(); + if (ret < 0) { + x86_android_tablet_cleanup(); + return ret; + } + exit_handler = dev_info->exit; + } + i2c_clients = kcalloc(dev_info->i2c_client_count, sizeof(*i2c_clients), GFP_KERNEL); if (!i2c_clients) { x86_android_tablet_cleanup(); From 442bf564eb0c4577d98a77e87caa10f704dddcad Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 10 Jan 2022 11:39:52 +0100 Subject: [PATCH 055/367] platform/x86: x86-android-tablets: Constify the gpiod_lookup_tables arrays The individual gpiod_lookup_table structs cannot be const because they contain a list-head which gets used when registering them. But the array of pointers to the gpiod_lookup_table-s used by a board can be const, constify these. Signed-off-by: Hans de Goede Reviewed-By: Lubomir Rintel Link: https://lore.kernel.org/r/20220110103952.48760-3-hdegoede@redhat.com --- drivers/platform/x86/x86-android-tablets.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c index fe4dba5997fe..e1b22bb3dbd8 100644 --- a/drivers/platform/x86/x86-android-tablets.c +++ b/drivers/platform/x86/x86-android-tablets.c @@ -146,7 +146,7 @@ struct x86_serdev_info { struct x86_dev_info { char *invalid_aei_gpiochip; const char * const *modules; - struct gpiod_lookup_table **gpiod_lookup_tables; + struct gpiod_lookup_table * const *gpiod_lookup_tables; const struct x86_i2c_client_info *i2c_client_info; const struct platform_device_info *pdev_info; const struct x86_serdev_info *serdev_info; @@ -306,7 +306,7 @@ static struct gpiod_lookup_table asus_me176c_goodix_gpios = { }, }; -static struct gpiod_lookup_table *asus_me176c_gpios[] = { +static struct gpiod_lookup_table * const asus_me176c_gpios[] = { &int3496_gpo2_pin22_gpios, &asus_me176c_goodix_gpios, NULL @@ -410,7 +410,7 @@ static const struct x86_i2c_client_info asus_tf103c_i2c_clients[] __initconst = }, }; -static struct gpiod_lookup_table *asus_tf103c_gpios[] = { +static struct gpiod_lookup_table * const asus_tf103c_gpios[] = { &int3496_gpo2_pin22_gpios, NULL }; @@ -565,7 +565,7 @@ static struct gpiod_lookup_table whitelabel_tm800a550l_goodix_gpios = { }, }; -static struct gpiod_lookup_table *whitelabel_tm800a550l_gpios[] = { +static struct gpiod_lookup_table * const whitelabel_tm800a550l_gpios[] = { &whitelabel_tm800a550l_goodix_gpios, NULL }; @@ -675,7 +675,7 @@ static int serdev_count; static struct i2c_client **i2c_clients; static struct platform_device **pdevs; static struct serdev_device **serdevs; -static struct gpiod_lookup_table **gpiod_lookup_tables; +static struct gpiod_lookup_table * const *gpiod_lookup_tables; static void (*exit_handler)(void); static __init int x86_instantiate_i2c_client(const struct x86_dev_info *dev_info, From 5de2ffd5acd33368e472dd3255a51cac528c730e Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 10 Jan 2022 07:35:12 +0100 Subject: [PATCH 056/367] platform/x86: x86-android-tablets: Fix the buttons on CZC P10T tablet This switches the P10T tablet to "Android" mode, where the Home button sends a single sancode instead of a Windows-specific key combination and the other button doesn't disable the Wi-Fi. Signed-off-by: Lubomir Rintel Link: https://lore.kernel.org/r/20220110063512.273252-1-lkundrak@v3.sk Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/x86-android-tablets.c | 51 ++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c index e1b22bb3dbd8..bd1ba676bcc0 100644 --- a/drivers/platform/x86/x86-android-tablets.c +++ b/drivers/platform/x86/x86-android-tablets.c @@ -496,6 +496,39 @@ static const struct x86_dev_info chuwi_hi8_info __initconst = { .i2c_client_count = ARRAY_SIZE(chuwi_hi8_i2c_clients), }; +#define CZC_EC_EXTRA_PORT 0x68 +#define CZC_EC_ANDROID_KEYS 0x63 + +static int __init czc_p10t_init(void) +{ + /* + * The device boots up in "Windows 7" mode, when the home button sends a + * Windows specific key sequence (Left Meta + D) and the second button + * sends an unknown one while also toggling the Radio Kill Switch. + * This is a surprising behavior when the second button is labeled "Back". + * + * The vendor-supplied Android-x86 build switches the device to a "Android" + * mode by writing value 0x63 to the I/O port 0x68. This just seems to just + * set bit 6 on address 0x96 in the EC region; switching the bit directly + * seems to achieve the same result. It uses a "p10t_switcher" to do the + * job. It doesn't seem to be able to do anything else, and no other use + * of the port 0x68 is known. + * + * In the Android mode, the home button sends just a single scancode, + * which can be handled in Linux userspace more reasonably and the back + * button only sends a scancode without toggling the kill switch. + * The scancode can then be mapped either to Back or RF Kill functionality + * in userspace, depending on how the button is labeled on that particular + * model. + */ + outb(CZC_EC_ANDROID_KEYS, CZC_EC_EXTRA_PORT); + return 0; +} + +static const struct x86_dev_info czc_p10t __initconst = { + .init = czc_p10t_init, +}; + /* * Whitelabel (sold as various brands) TM800A550L tablets. * These tablet's DSDT contains a whole bunch of bogus ACPI I2C devices @@ -647,6 +680,24 @@ static const struct dmi_system_id x86_android_tablet_ids[] __initconst = { }, .driver_data = (void *)&chuwi_hi8_info, }, + { + /* CZC P10T */ + .ident = "CZC ODEON TPC-10 (\"P10T\")", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "CZC"), + DMI_MATCH(DMI_PRODUCT_NAME, "ODEON*TPC-10"), + }, + .driver_data = (void *)&czc_p10t, + }, + { + /* A variant of CZC P10T */ + .ident = "ViewSonic ViewPad 10", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ViewSonic"), + DMI_MATCH(DMI_PRODUCT_NAME, "VPAD10"), + }, + .driver_data = (void *)&czc_p10t, + }, { /* Whitelabel (sold as various brands) TM800A550L */ .matches = { From 17f6736a020ef195f4855e807c76d2360310d143 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 10 Jan 2022 07:36:29 +0100 Subject: [PATCH 057/367] platform/x86: x86-android-tablets: Trivial typo fix for MODULE_AUTHOR Bring balance to the quoting of Hans' e-mail address. Signed-off-by: Lubomir Rintel Link: https://lore.kernel.org/r/20220110063629.273364-1-lkundrak@v3.sk Signed-off-by: Hans de Goede --- drivers/platform/x86/x86-android-tablets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c index bd1ba676bcc0..9360a8a92486 100644 --- a/drivers/platform/x86/x86-android-tablets.c +++ b/drivers/platform/x86/x86-android-tablets.c @@ -950,6 +950,6 @@ static __init int x86_android_tablet_init(void) module_init(x86_android_tablet_init); module_exit(x86_android_tablet_cleanup); -MODULE_AUTHOR("Hans de Goede "); MODULE_DESCRIPTION("X86 Android tablets DSDT fixups driver"); MODULE_LICENSE("GPL"); From c197e969e3082b9c19175d2f013a0dbd3ce52236 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 15 Jan 2022 15:08:49 +0100 Subject: [PATCH 058/367] platform/surface: Reinstate platform dependency Microsoft Surface platform-specific devices are only present on Microsoft Surface platforms, which are currently limited to arm64 and x86. Hence add a dependency on ARM64 || X86, to prevent asking the user about drivers for these devices when configuring a kernel for an architecture that does not support Microsoft Surface platforms. Fixes: 272479928172edf0 ("platform: surface: Propagate ACPI Dependency") Signed-off-by: Geert Uytterhoeven Acked-by: Maximilian Luz Link: https://lore.kernel.org/r/20220115140849.269479-1-geert@linux-m68k.org Signed-off-by: Hans de Goede --- drivers/platform/surface/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/surface/Kconfig b/drivers/platform/surface/Kconfig index 5f0578e25f71..463f1ec5c14e 100644 --- a/drivers/platform/surface/Kconfig +++ b/drivers/platform/surface/Kconfig @@ -5,6 +5,7 @@ menuconfig SURFACE_PLATFORMS bool "Microsoft Surface Platform-Specific Device Drivers" + depends on ARM64 || X86 || COMPILE_TEST default y help Say Y here to get to see options for platform-specific device drivers From 512eb73cfd1208898cf10cb06094e0ee0bb53b58 Mon Sep 17 00:00:00 2001 From: Yuka Kawajiri Date: Wed, 12 Jan 2022 00:40:21 +0900 Subject: [PATCH 059/367] platform/x86: touchscreen_dmi: Add info for the RWC NANOTE P8 AY07J 2-in-1 Add touchscreen info for RWC NANOTE P8 (AY07J) 2-in-1. Signed-off-by: Yuka Kawajiri Link: https://lore.kernel.org/r/20220111154019.4599-1-yukx00@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/touchscreen_dmi.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 494f23052678..bc97bfa8e8a6 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -770,6 +770,21 @@ static const struct ts_dmi_data predia_basic_data = { .properties = predia_basic_props, }; +static const struct property_entry rwc_nanote_p8_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-y", 46), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1728), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1140), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-rwc-nanote-p8.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + { } +}; + +static const struct ts_dmi_data rwc_nanote_p8_data = { + .acpi_name = "MSSL1680:00", + .properties = rwc_nanote_p8_props, +}; + static const struct property_entry schneider_sct101ctm_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1715), PROPERTY_ENTRY_U32("touchscreen-size-y", 1140), @@ -1394,6 +1409,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "0E57"), }, }, + { + /* RWC NANOTE P8 */ + .driver_data = (void *)&rwc_nanote_p8_data, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Default string"), + DMI_MATCH(DMI_PRODUCT_NAME, "AY07J"), + DMI_MATCH(DMI_PRODUCT_SKU, "0001") + }, + }, { /* Schneider SCT101CTM */ .driver_data = (void *)&schneider_sct101ctm_data, From b288420e773f5a9db77115b9cc3767a8ada16648 Mon Sep 17 00:00:00 2001 From: Alexander Kobel Date: Wed, 12 Jan 2022 12:18:27 +0100 Subject: [PATCH 060/367] platform/x86: thinkpad_acpi: Add quirk for ThinkPads without a fan Some ThinkPad models, like the X1 Tablet 1st and 2nd Gen, are passively cooled without any fan. Currently, an entry in /proc/acpi/ibm/fan is nevertheless created, and misleadingly shows status: enabled speed: 65535 level: auto This patch adds a TPACPI_FAN_NOFAN quirk definition and corresponding handling to not initialize a fan interface at all. For the time being, the quirk is only applied for X1 Tablet 2nd Gen (types 20JB, 20JC; EC N1O...); further models (such as Gen1, types 20GG and 20GH) can be added easily once tested. Tested on a 20JCS00C00, BIOS N1OET58W (1.43), EC N1OHT34W. Signed-off-by: Alexander Kobel Link: https://lore.kernel.org/r/12d4b825-a2b9-8cb7-6ed3-db4d66f46a60@a-kobel.de Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 098180fb1cfc..33f611af6e51 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -8679,9 +8679,10 @@ static const struct attribute_group fan_driver_attr_group = { .attrs = fan_driver_attributes, }; -#define TPACPI_FAN_Q1 0x0001 /* Unitialized HFSP */ -#define TPACPI_FAN_2FAN 0x0002 /* EC 0x31 bit 0 selects fan2 */ -#define TPACPI_FAN_2CTL 0x0004 /* selects fan2 control */ +#define TPACPI_FAN_Q1 0x0001 /* Uninitialized HFSP */ +#define TPACPI_FAN_2FAN 0x0002 /* EC 0x31 bit 0 selects fan2 */ +#define TPACPI_FAN_2CTL 0x0004 /* selects fan2 control */ +#define TPACPI_FAN_NOFAN 0x0008 /* no fan available */ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_QEC_IBM('1', 'Y', TPACPI_FAN_Q1), @@ -8702,6 +8703,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('N', '4', '0', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (4nd gen) */ TPACPI_Q_LNV3('N', '3', '0', TPACPI_FAN_2CTL), /* P15 (1st gen) / P15v (1st gen) */ TPACPI_Q_LNV3('N', '3', '2', TPACPI_FAN_2CTL), /* X1 Carbon (9th gen) */ + TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */ }; static int __init fan_init(struct ibm_init_struct *iibm) @@ -8730,6 +8732,11 @@ static int __init fan_init(struct ibm_init_struct *iibm) quirks = tpacpi_check_quirks(fan_quirk_table, ARRAY_SIZE(fan_quirk_table)); + if (quirks & TPACPI_FAN_NOFAN) { + pr_info("No integrated ThinkPad fan available\n"); + return -ENODEV; + } + if (gfan_handle) { /* 570, 600e/x, 770e, 770x */ fan_status_access_mode = TPACPI_FAN_RD_ACPI_GFAN; From a29012ab23163f78087a7e77719f05d201088700 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 12 Jan 2022 00:23:09 +0100 Subject: [PATCH 061/367] platform/x86: intel_crystal_cove_charger: Fix IRQ masking / unmasking The driver as originally submitted accidentally relied on Android having run before and Android having unmasked the 2nd level IRQ-mask for the charger IRQ. This worked since these are PMIC registers which are only reset when the battery is fully drained or disconnected. Fix the charger IRQ no longer working after loss of battery power by properly setting the 2nd level IRQ-mask for the charger IRQ. Note this removes the need to enable/disable our parent IRQ which just sets the mask bit in the 1st level IRQ-mask register, setting one of the 2 level masks is enough to stop the IRQ from getting reported. Fixes: 761db353d9e2 ("platform/x86: Add intel_crystal_cove_charger driver") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220111232309.377642-1-hdegoede@redhat.com --- .../platform/x86/intel/crystal_cove_charger.c | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/platform/x86/intel/crystal_cove_charger.c b/drivers/platform/x86/intel/crystal_cove_charger.c index 0374bc742513..e4299cfa2205 100644 --- a/drivers/platform/x86/intel/crystal_cove_charger.c +++ b/drivers/platform/x86/intel/crystal_cove_charger.c @@ -17,6 +17,7 @@ #include #define CHGRIRQ_REG 0x0a +#define MCHGRIRQ_REG 0x17 struct crystal_cove_charger_data { struct mutex buslock; /* irq_bus_lock */ @@ -25,8 +26,8 @@ struct crystal_cove_charger_data { struct irq_domain *irq_domain; int irq; int charger_irq; - bool irq_enabled; - bool irq_is_enabled; + u8 mask; + u8 new_mask; }; static irqreturn_t crystal_cove_charger_irq(int irq, void *data) @@ -53,13 +54,9 @@ static void crystal_cove_charger_irq_bus_sync_unlock(struct irq_data *data) { struct crystal_cove_charger_data *charger = irq_data_get_irq_chip_data(data); - if (charger->irq_is_enabled != charger->irq_enabled) { - if (charger->irq_enabled) - enable_irq(charger->irq); - else - disable_irq(charger->irq); - - charger->irq_is_enabled = charger->irq_enabled; + if (charger->mask != charger->new_mask) { + regmap_write(charger->regmap, MCHGRIRQ_REG, charger->new_mask); + charger->mask = charger->new_mask; } mutex_unlock(&charger->buslock); @@ -69,14 +66,14 @@ static void crystal_cove_charger_irq_unmask(struct irq_data *data) { struct crystal_cove_charger_data *charger = irq_data_get_irq_chip_data(data); - charger->irq_enabled = true; + charger->new_mask &= ~BIT(data->hwirq); } static void crystal_cove_charger_irq_mask(struct irq_data *data) { struct crystal_cove_charger_data *charger = irq_data_get_irq_chip_data(data); - charger->irq_enabled = false; + charger->new_mask |= BIT(data->hwirq); } static void crystal_cove_charger_rm_irq_domain(void *data) @@ -130,10 +127,13 @@ static int crystal_cove_charger_probe(struct platform_device *pdev) irq_set_nested_thread(charger->charger_irq, true); irq_set_noprobe(charger->charger_irq); + /* Mask the single 2nd level IRQ before enabling the 1st level IRQ */ + charger->mask = charger->new_mask = BIT(0); + regmap_write(charger->regmap, MCHGRIRQ_REG, charger->mask); + ret = devm_request_threaded_irq(&pdev->dev, charger->irq, NULL, crystal_cove_charger_irq, - IRQF_ONESHOT | IRQF_NO_AUTOEN, - KBUILD_MODNAME, charger); + IRQF_ONESHOT, KBUILD_MODNAME, charger); if (ret) return dev_err_probe(&pdev->dev, ret, "requesting irq\n"); From 17da2d5f93692086dd096a975225ffd5622d0bf8 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 11 Jan 2022 18:25:21 -0800 Subject: [PATCH 062/367] platform/x86: ISST: Fix possible circular locking dependency detected As reported: [ 256.104522] ====================================================== [ 256.113783] WARNING: possible circular locking dependency detected [ 256.120093] 5.16.0-rc6-yocto-standard+ #99 Not tainted [ 256.125362] ------------------------------------------------------ [ 256.131673] intel-speed-sel/844 is trying to acquire lock: [ 256.137290] ffffffffc036f0d0 (punit_misc_dev_lock){+.+.}-{3:3}, at: isst_if_open+0x18/0x90 [isst_if_common] [ 256.147171] [ 256.147171] but task is already holding lock: [ 256.153135] ffffffff8ee7cb50 (misc_mtx){+.+.}-{3:3}, at: misc_open+0x2a/0x170 [ 256.160407] [ 256.160407] which lock already depends on the new lock. [ 256.160407] [ 256.168712] [ 256.168712] the existing dependency chain (in reverse order) is: [ 256.176327] [ 256.176327] -> #1 (misc_mtx){+.+.}-{3:3}: [ 256.181946] lock_acquire+0x1e6/0x330 [ 256.186265] __mutex_lock+0x9b/0x9b0 [ 256.190497] mutex_lock_nested+0x1b/0x20 [ 256.195075] misc_register+0x32/0x1a0 [ 256.199390] isst_if_cdev_register+0x65/0x180 [isst_if_common] [ 256.205878] isst_if_probe+0x144/0x16e [isst_if_mmio] ... [ 256.241976] [ 256.241976] -> #0 (punit_misc_dev_lock){+.+.}-{3:3}: [ 256.248552] validate_chain+0xbc6/0x1750 [ 256.253131] __lock_acquire+0x88c/0xc10 [ 256.257618] lock_acquire+0x1e6/0x330 [ 256.261933] __mutex_lock+0x9b/0x9b0 [ 256.266165] mutex_lock_nested+0x1b/0x20 [ 256.270739] isst_if_open+0x18/0x90 [isst_if_common] [ 256.276356] misc_open+0x100/0x170 [ 256.280409] chrdev_open+0xa5/0x1e0 ... The call sequence suggested that misc_device /dev file can be opened before misc device is yet to be registered, which is done only once. Here punit_misc_dev_lock was used as common lock, to protect the registration by multiple ISST HW drivers, one time setup, prevent duplicate registry of misc device and prevent load/unload when device is open. We can split into locks: - One which just prevent duplicate call to misc_register() and one time setup. Also never call again if the misc_register() failed or required one time setup is failed. This lock is not shared with any misc device callbacks. - The other lock protects registry, load and unload of HW drivers. Sequence in isst_if_cdev_register() - Register callbacks under punit_misc_dev_open_lock - Call isst_misc_reg() which registers misc_device on the first registry which is under punit_misc_dev_reg_lock, which is not shared with callbacks. Sequence in isst_if_cdev_unregister Just opposite of isst_if_cdev_register Reported-and-tested-by: Liwei Song Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20220112022521.54669-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- .../intel/speed_select_if/isst_if_common.c | 97 ++++++++++++------- 1 file changed, 63 insertions(+), 34 deletions(-) diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c index c9a85eb2e860..e8424e70d81d 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c @@ -596,7 +596,10 @@ static long isst_if_def_ioctl(struct file *file, unsigned int cmd, return ret; } -static DEFINE_MUTEX(punit_misc_dev_lock); +/* Lock to prevent module registration when already opened by user space */ +static DEFINE_MUTEX(punit_misc_dev_open_lock); +/* Lock to allow one share misc device for all ISST interace */ +static DEFINE_MUTEX(punit_misc_dev_reg_lock); static int misc_usage_count; static int misc_device_ret; static int misc_device_open; @@ -606,7 +609,7 @@ static int isst_if_open(struct inode *inode, struct file *file) int i, ret = 0; /* Fail open, if a module is going away */ - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i]; @@ -628,7 +631,7 @@ static int isst_if_open(struct inode *inode, struct file *file) } else { misc_device_open++; } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return ret; } @@ -637,7 +640,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) { int i; - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); misc_device_open--; for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i]; @@ -645,7 +648,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) if (cb->registered) module_put(cb->owner); } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return 0; } @@ -662,6 +665,43 @@ static struct miscdevice isst_if_char_driver = { .fops = &isst_if_char_driver_ops, }; +static int isst_misc_reg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_device_ret) + goto unlock_exit; + + if (!misc_usage_count) { + misc_device_ret = isst_if_cpu_info_init(); + if (misc_device_ret) + goto unlock_exit; + + misc_device_ret = misc_register(&isst_if_char_driver); + if (misc_device_ret) { + isst_if_cpu_info_exit(); + goto unlock_exit; + } + } + misc_usage_count++; + +unlock_exit: + mutex_unlock(&punit_misc_dev_reg_lock); + + return misc_device_ret; +} + +static void isst_misc_unreg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_usage_count) + misc_usage_count--; + if (!misc_usage_count && !misc_device_ret) { + misc_deregister(&isst_if_char_driver); + isst_if_cpu_info_exit(); + } + mutex_unlock(&punit_misc_dev_reg_lock); +} + /** * isst_if_cdev_register() - Register callback for IOCTL * @device_type: The device type this callback handling. @@ -679,38 +719,31 @@ static struct miscdevice isst_if_char_driver = { */ int isst_if_cdev_register(int device_type, struct isst_if_cmd_cb *cb) { - if (misc_device_ret) - return misc_device_ret; + int ret; if (device_type >= ISST_IF_DEV_MAX) return -EINVAL; - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); + /* Device is already open, we don't want to add new callbacks */ if (misc_device_open) { - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return -EAGAIN; } - if (!misc_usage_count) { - int ret; - - misc_device_ret = misc_register(&isst_if_char_driver); - if (misc_device_ret) - goto unlock_exit; - - ret = isst_if_cpu_info_init(); - if (ret) { - misc_deregister(&isst_if_char_driver); - misc_device_ret = ret; - goto unlock_exit; - } - } memcpy(&punit_callbacks[device_type], cb, sizeof(*cb)); punit_callbacks[device_type].registered = 1; - misc_usage_count++; -unlock_exit: - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); - return misc_device_ret; + ret = isst_misc_reg(); + if (ret) { + /* + * No need of mutex as the misc device register failed + * as no one can open device yet. Hence no contention. + */ + punit_callbacks[device_type].registered = 0; + return ret; + } + return 0; } EXPORT_SYMBOL_GPL(isst_if_cdev_register); @@ -725,16 +758,12 @@ EXPORT_SYMBOL_GPL(isst_if_cdev_register); */ void isst_if_cdev_unregister(int device_type) { - mutex_lock(&punit_misc_dev_lock); - misc_usage_count--; + isst_misc_unreg(); + mutex_lock(&punit_misc_dev_open_lock); punit_callbacks[device_type].registered = 0; if (device_type == ISST_IF_DEV_MBOX) isst_delete_hash(); - if (!misc_usage_count && !misc_device_ret) { - misc_deregister(&isst_if_char_driver); - isst_if_cpu_info_exit(); - } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); } EXPORT_SYMBOL_GPL(isst_if_cdev_unregister); From f7086daab3b540c89951b9b4c00fc49111f7cfa6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 17 Jan 2022 12:26:43 +0100 Subject: [PATCH 063/367] platform/x86: amd-pmc: Make amd_pmc_stb_debugfs_fops static amd_pmc_stb_debugfs_fops is not used outside of amd-pmc.c, make it static. Cc: Sanket Goswami Reported-by: kernel test robot Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220117112644.260168-1-hdegoede@redhat.com --- drivers/platform/x86/amd-pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index f794343d6aaa..85b680297934 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -226,7 +226,7 @@ static int amd_pmc_stb_debugfs_release(struct inode *inode, struct file *filp) return 0; } -const struct file_operations amd_pmc_stb_debugfs_fops = { +static const struct file_operations amd_pmc_stb_debugfs_fops = { .owner = THIS_MODULE, .open = amd_pmc_stb_debugfs_open, .read = amd_pmc_stb_debugfs_read, From f8c28b93d2628610cf793b3528f6f40fd1c7cd5b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 17 Jan 2022 12:26:44 +0100 Subject: [PATCH 064/367] platform/x86: asus-tf103c-dock: Make 2 global structs static tf103c_dock_hid_ll_driver and tf103c_dock_pm_ops are not used outside of the driver, make them both static. Reported-by: kernel test robot Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220117112644.260168-2-hdegoede@redhat.com --- drivers/platform/x86/asus-tf103c-dock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/asus-tf103c-dock.c b/drivers/platform/x86/asus-tf103c-dock.c index d4ef8f362ee6..6fd0c9fea82d 100644 --- a/drivers/platform/x86/asus-tf103c-dock.c +++ b/drivers/platform/x86/asus-tf103c-dock.c @@ -250,7 +250,7 @@ static int tf103c_dock_hid_raw_request(struct hid_device *hid, u8 reportnum, return 0; } -struct hid_ll_driver tf103c_dock_hid_ll_driver = { +static struct hid_ll_driver tf103c_dock_hid_ll_driver = { .parse = tf103c_dock_hid_parse, .start = tf103c_dock_hid_start, .stop = tf103c_dock_hid_stop, @@ -921,7 +921,7 @@ static int __maybe_unused tf103c_dock_resume(struct device *dev) return 0; } -SIMPLE_DEV_PM_OPS(tf103c_dock_pm_ops, tf103c_dock_suspend, tf103c_dock_resume); +static SIMPLE_DEV_PM_OPS(tf103c_dock_pm_ops, tf103c_dock_suspend, tf103c_dock_resume); static const struct acpi_device_id tf103c_dock_acpi_match[] = { {"NPCE69A"}, From b8fb0d9b47660ddb8a8256412784aad7cee9f21a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 20 Jan 2022 11:44:39 -0600 Subject: [PATCH 065/367] platform/x86: amd-pmc: Correct usage of SMU version Yellow carp has been outputting versions like `1093.24.0`, but this is supposed to be 69.24.0. That is the MSB is being interpreted incorrectly. The MSB is not part of the major version, but has generally been treated that way thus far. It's actually the program, and used to distinguish between two programs from a similar family but different codebase. Link: https://patchwork.freedesktop.org/patch/469993/ Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20220120174439.12770-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 85b680297934..4c72ba68b315 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -124,9 +124,10 @@ struct amd_pmc_dev { u32 cpu_id; u32 active_ips; /* SMU version information */ - u16 major; - u16 minor; - u16 rev; + u8 smu_program; + u8 major; + u8 minor; + u8 rev; struct device *dev; struct pci_dev *rdev; struct mutex lock; /* generic mutex lock */ @@ -180,11 +181,13 @@ static int amd_pmc_get_smu_version(struct amd_pmc_dev *dev) if (rc) return rc; - dev->major = (val >> 16) & GENMASK(15, 0); + dev->smu_program = (val >> 24) & GENMASK(7, 0); + dev->major = (val >> 16) & GENMASK(7, 0); dev->minor = (val >> 8) & GENMASK(7, 0); dev->rev = (val >> 0) & GENMASK(7, 0); - dev_dbg(dev->dev, "SMU version is %u.%u.%u\n", dev->major, dev->minor, dev->rev); + dev_dbg(dev->dev, "SMU program %u version is %u.%u.%u\n", + dev->smu_program, dev->major, dev->minor, dev->rev); return 0; } From 58cd4a088e8917b4092c7011d499e277e04a6644 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jan 2022 12:12:34 +0000 Subject: [PATCH 066/367] arm64: vdso: Fix "no previous prototype" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If compiling the arm64 kernel with W=1 the following warning is produced: | arch/arm64/kernel/vdso/vgettimeofday.c:9:5: error: no previous prototype for ‘__kernel_clock_gettime’ [-Werror=missing-prototypes] | 9 | int __kernel_clock_gettime(clockid_t clock, | | ^~~~~~~~~~~~~~~~~~~~~~ | arch/arm64/kernel/vdso/vgettimeofday.c:15:5: error: no previous prototype for ‘__kernel_gettimeofday’ [-Werror=missing-prototypes] | 15 | int __kernel_gettimeofday(struct __kernel_old_timeval *tv, | | ^~~~~~~~~~~~~~~~~~~~~ | arch/arm64/kernel/vdso/vgettimeofday.c:21:5: error: no previous prototype for ‘__kernel_clock_getres’ [-Werror=missing-prototypes] | 21 | int __kernel_clock_getres(clockid_t clock_id, | | ^~~~~~~~~~~~~~~~~~~~~ This patch removes "-Wmissing-prototypes" and "-Wmissing-declarations" compilers flags from the compilation of vgettimeofday.c to make possible to build the kernel with CONFIG_WERROR enabled. Cc: Will Deacon Reported-by: Marc Kleine-Budde Signed-off-by: Vincenzo Frascino Tested-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20220121121234.47273-1-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 60813497a381..172452f79e46 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -29,8 +29,11 @@ ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO +# -Wmissing-prototypes and -Wmissing-declarations are removed from +# the CFLAGS of vgettimeofday.c to make possible to build the +# kernel with CONFIG_WERROR enabled. CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \ - $(CC_FLAGS_LTO) + $(CC_FLAGS_LTO) -Wmissing-prototypes -Wmissing-declarations KASAN_SANITIZE := n KCSAN_SANITIZE := n UBSAN_SANITIZE := n From a37d9a17f099072fe4d3a9048b0321978707a918 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 20 Jan 2022 23:53:04 +0200 Subject: [PATCH 067/367] fsnotify: invalidate dcache before IN_DELETE event Apparently, there are some applications that use IN_DELETE event as an invalidation mechanism and expect that if they try to open a file with the name reported with the delete event, that it should not contain the content of the deleted file. Commit 49246466a989 ("fsnotify: move fsnotify_nameremove() hook out of d_delete()") moved the fsnotify delete hook before d_delete() so fsnotify will have access to a positive dentry. This allowed a race where opening the deleted file via cached dentry is now possible after receiving the IN_DELETE event. To fix the regression, create a new hook fsnotify_delete() that takes the unlinked inode as an argument and use a helper d_delete_notify() to pin the inode, so we can pass it to fsnotify_delete() after d_delete(). Backporting hint: this regression is from v5.3. Although patch will apply with only trivial conflicts to v5.4 and v5.10, it won't build, because fsnotify_delete() implementation is different in each of those versions (see fsnotify_link()). A follow up patch will fix the fsnotify_unlink/rmdir() calls in pseudo filesystem that do not need to call d_delete(). Link: https://lore.kernel.org/r/20220120215305.282577-1-amir73il@gmail.com Reported-by: Ivan Delalande Link: https://lore.kernel.org/linux-fsdevel/YeNyzoDM5hP5LtGW@visor/ Fixes: 49246466a989 ("fsnotify: move fsnotify_nameremove() hook out of d_delete()") Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/btrfs/ioctl.c | 6 ++--- fs/namei.c | 10 ++++---- include/linux/fsnotify.h | 49 +++++++++++++++++++++++++++++++++++----- 3 files changed, 50 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a5bd6926f7ff..7807b28b7892 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3086,10 +3086,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, btrfs_inode_lock(inode, 0); err = btrfs_delete_subvolume(dir, dentry); btrfs_inode_unlock(inode, 0); - if (!err) { - fsnotify_rmdir(dir, dentry); - d_delete(dentry); - } + if (!err) + d_delete_notify(dir, dentry); out_dput: dput(dentry); diff --git a/fs/namei.c b/fs/namei.c index d81f04f8d818..4ed0e41feab7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3974,13 +3974,12 @@ int vfs_rmdir(struct user_namespace *mnt_userns, struct inode *dir, dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); detach_mounts(dentry); - fsnotify_rmdir(dir, dentry); out: inode_unlock(dentry->d_inode); dput(dentry); if (!error) - d_delete(dentry); + d_delete_notify(dir, dentry); return error; } EXPORT_SYMBOL(vfs_rmdir); @@ -4102,7 +4101,6 @@ int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir, if (!error) { dont_mount(dentry); detach_mounts(dentry); - fsnotify_unlink(dir, dentry); } } } @@ -4110,9 +4108,11 @@ out: inode_unlock(target); /* We don't d_delete() NFS sillyrenamed files--they still exist. */ - if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { + if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) { + fsnotify_unlink(dir, dentry); + } else if (!error) { fsnotify_link_count(target); - d_delete(dentry); + d_delete_notify(dir, dentry); } return error; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 3a2d7dc3c607..bb8467cd11ae 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -224,6 +224,43 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, dir, &new_dentry->d_name, 0); } +/* + * fsnotify_delete - @dentry was unlinked and unhashed + * + * Caller must make sure that dentry->d_name is stable. + * + * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode + * as this may be called after d_delete() and old_dentry may be negative. + */ +static inline void fsnotify_delete(struct inode *dir, struct inode *inode, + struct dentry *dentry) +{ + __u32 mask = FS_DELETE; + + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + + fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name, + 0); +} + +/** + * d_delete_notify - delete a dentry and call fsnotify_delete() + * @dentry: The dentry to delete + * + * This helper is used to guaranty that the unlinked inode cannot be found + * by lookup of this name after fsnotify_delete() event has been delivered. + */ +static inline void d_delete_notify(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + + ihold(inode); + d_delete(dentry); + fsnotify_delete(dir, inode, dentry); + iput(inode); +} + /* * fsnotify_unlink - 'name' was unlinked * @@ -231,10 +268,10 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, */ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) { - /* Expected to be called before d_delete() */ - WARN_ON_ONCE(d_is_negative(dentry)); + if (WARN_ON_ONCE(d_is_negative(dentry))) + return; - fsnotify_dirent(dir, dentry, FS_DELETE); + fsnotify_delete(dir, d_inode(dentry), dentry); } /* @@ -258,10 +295,10 @@ static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry) */ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) { - /* Expected to be called before d_delete() */ - WARN_ON_ONCE(d_is_negative(dentry)); + if (WARN_ON_ONCE(d_is_negative(dentry))) + return; - fsnotify_dirent(dir, dentry, FS_DELETE | FS_ISDIR); + fsnotify_delete(dir, d_inode(dentry), dentry); } /* From 29044dae2e746949ad4b9cbdbfb248994d1dcdb4 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 20 Jan 2022 23:53:05 +0200 Subject: [PATCH 068/367] fsnotify: fix fsnotify hooks in pseudo filesystems Commit 49246466a989 ("fsnotify: move fsnotify_nameremove() hook out of d_delete()") moved the fsnotify delete hook before d_delete() so fsnotify will have access to a positive dentry. This allowed a race where opening the deleted file via cached dentry is now possible after receiving the IN_DELETE event. To fix the regression in pseudo filesystems, convert d_delete() calls to d_drop() (see commit 46c46f8df9aa ("devpts_pty_kill(): don't bother with d_delete()") and move the fsnotify hook after d_drop(). Add a missing fsnotify_unlink() hook in nfsdfs that was found during the audit of fsnotify hooks in pseudo filesystems. Note that the fsnotify hooks in simple_recursive_removal() follow d_invalidate(), so they require no change. Link: https://lore.kernel.org/r/20220120215305.282577-2-amir73il@gmail.com Reported-by: Ivan Delalande Link: https://lore.kernel.org/linux-fsdevel/YeNyzoDM5hP5LtGW@visor/ Fixes: 49246466a989 ("fsnotify: move fsnotify_nameremove() hook out of d_delete()") Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/configfs/dir.c | 6 +++--- fs/devpts/inode.c | 2 +- fs/nfsd/nfsctl.c | 5 +++-- net/sunrpc/rpc_pipe.c | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 1466b5d01cbb..d3cd2a94d1e8 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1780,8 +1780,8 @@ void configfs_unregister_group(struct config_group *group) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); + d_drop(dentry); fsnotify_rmdir(d_inode(parent), dentry); - d_delete(dentry); inode_unlock(d_inode(parent)); dput(dentry); @@ -1922,10 +1922,10 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); - fsnotify_rmdir(d_inode(root), dentry); inode_unlock(d_inode(dentry)); - d_delete(dentry); + d_drop(dentry); + fsnotify_rmdir(d_inode(root), dentry); inode_unlock(d_inode(root)); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 42e5a766d33c..4f25015aa534 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -621,8 +621,8 @@ void devpts_pty_kill(struct dentry *dentry) dentry->d_fsdata = NULL; drop_nlink(dentry->d_inode); - fsnotify_unlink(d_inode(dentry->d_parent), dentry); d_drop(dentry); + fsnotify_unlink(d_inode(dentry->d_parent), dentry); dput(dentry); /* d_alloc_name() in devpts_pty_new() */ } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b9f27fbcd768..68b020f2002b 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1247,7 +1247,8 @@ static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry) clear_ncl(d_inode(dentry)); dget(dentry); ret = simple_unlink(dir, dentry); - d_delete(dentry); + d_drop(dentry); + fsnotify_unlink(dir, dentry); dput(dentry); WARN_ON_ONCE(ret); } @@ -1338,8 +1339,8 @@ void nfsd_client_rmdir(struct dentry *dentry) dget(dentry); ret = simple_rmdir(dir, dentry); WARN_ON_ONCE(ret); + d_drop(dentry); fsnotify_rmdir(dir, dentry); - d_delete(dentry); dput(dentry); inode_unlock(dir); } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index ee5336d73fdd..35588f0afa86 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -600,9 +600,9 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_rmdir(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_rmdir(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } @@ -613,9 +613,9 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_unlink(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_unlink(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } From bdac3bbd0dc63873a9c606b8e4f814e6d61d288d Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Fri, 26 Nov 2021 16:43:42 +0100 Subject: [PATCH 069/367] spi: spi-rockchip: Add rk3568-spi compatible This adds a compatible string for the SPI controller found on the RK3566 and RK3568 SoCs. Signed-off-by: Nicolas Frattaroli Link: https://lore.kernel.org/r/20211126154344.724316-2-frattaroli.nicolas@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-rockchip.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml index 7f987e79337c..52a78a2e362e 100644 --- a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml +++ b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml @@ -33,6 +33,7 @@ properties: - rockchip,rk3328-spi - rockchip,rk3368-spi - rockchip,rk3399-spi + - rockchip,rk3568-spi - rockchip,rv1126-spi - const: rockchip,rk3066-spi From 7fc3b7c2981bbd1047916ade327beccb90994eee Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 17 Jan 2022 18:22:13 +0100 Subject: [PATCH 070/367] udf: Fix NULL ptr deref when converting from inline format udf_expand_file_adinicb() calls directly ->writepage to write data expanded into a page. This however misses to setup inode for writeback properly and so we can crash on inode->i_wb dereference when submitting page for IO like: BUG: kernel NULL pointer dereference, address: 0000000000000158 #PF: supervisor read access in kernel mode ... __folio_start_writeback+0x2ac/0x350 __block_write_full_page+0x37d/0x490 udf_expand_file_adinicb+0x255/0x400 [udf] udf_file_write_iter+0xbe/0x1b0 [udf] new_sync_write+0x125/0x1c0 vfs_write+0x28e/0x400 Fix the problem by marking the page dirty and going through the standard writeback path to write the page. Strictly speaking we would not even have to write the page but we want to catch e.g. ENOSPC errors early. Reported-by: butt3rflyh4ck CC: stable@vger.kernel.org Fixes: 52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/udf/inode.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 1d6b7a50736b..d6aa506b6b58 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -258,10 +258,6 @@ int udf_expand_file_adinicb(struct inode *inode) char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); int err; - struct writeback_control udf_wbc = { - .sync_mode = WB_SYNC_NONE, - .nr_to_write = 1, - }; WARN_ON_ONCE(!inode_is_locked(inode)); if (!iinfo->i_lenAlloc) { @@ -305,8 +301,10 @@ int udf_expand_file_adinicb(struct inode *inode) iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; /* from now on we have normal address_space methods */ inode->i_data.a_ops = &udf_aops; + set_page_dirty(page); + unlock_page(page); up_write(&iinfo->i_data_sem); - err = inode->i_data.a_ops->writepage(page, &udf_wbc); + err = filemap_fdatawrite(inode->i_mapping); if (err) { /* Restore everything back so that we don't lose data... */ lock_page(page); From ea8569194b43f0f01f0a84c689388542c7254a1f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 18 Jan 2022 09:57:25 +0100 Subject: [PATCH 071/367] udf: Restore i_lenAlloc when inode expansion fails When we fail to expand inode from inline format to a normal format, we restore inode to contain the original inline formatting but we forgot to set i_lenAlloc back. The mismatch between i_lenAlloc and i_size was then causing further problems such as warnings and lost data down the line. Reported-by: butt3rflyh4ck CC: stable@vger.kernel.org Fixes: 7e49b6f2480c ("udf: Convert UDF to new truncate calling sequence") Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/udf/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index d6aa506b6b58..ea8f6cd01f50 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -315,6 +315,7 @@ int udf_expand_file_adinicb(struct inode *inode) unlock_page(page); iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; inode->i_data.a_ops = &udf_adinicb_aops; + iinfo->i_lenAlloc = inode->i_size; up_write(&iinfo->i_data_sem); } put_page(page); From 9daf0a4d32d60a57f2a2533bdf4c178be7fdff7f Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 16 Jan 2022 04:59:36 -0800 Subject: [PATCH 072/367] quota: cleanup double word in comment Remove the second 'handle'. Link: https://lore.kernel.org/r/20220116125936.389767-1-trix@redhat.com Signed-off-by: Tom Rix Signed-off-by: Jan Kara --- include/linux/quota.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/quota.h b/include/linux/quota.h index 18ebd39c9487..fd692b4a41d5 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -91,7 +91,7 @@ extern bool qid_valid(struct kqid qid); * * When there is no mapping defined for the user-namespace, type, * qid tuple an invalid kqid is returned. Callers are expected to - * test for and handle handle invalid kqids being returned. + * test for and handle invalid kqids being returned. * Invalid kqids may be tested for using qid_valid(). */ static inline struct kqid make_kqid(struct user_namespace *from, From 94fea1d8a30eadc3ef07afc0f53dc06799bb300b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 22 Jan 2022 01:52:11 +0000 Subject: [PATCH 073/367] KVM: VMX: Zero host's SYSENTER_ESP iff SYSENTER is NOT used Zero vmcs.HOST_IA32_SYSENTER_ESP when initializing *constant* host state if and only if SYSENTER cannot be used, i.e. the kernel is a 64-bit kernel and is not emulating 32-bit syscalls. As the name suggests, vmx_set_constant_host_state() is intended for state that is *constant*. When SYSENTER is used, SYSENTER_ESP isn't constant because stacks are per-CPU, and the VMCS must be updated whenever the vCPU is migrated to a new CPU. The logic in vmx_vcpu_load_vmcs() doesn't differentiate between "never loaded" and "loaded on a different CPU", i.e. setting SYSENTER_ESP on VMCS load also handles setting correct host state when the VMCS is first loaded. Because a VMCS must be loaded before it is initialized during vCPU RESET, zeroing the field in vmx_set_constant_host_state() obliterates the value that was written when the VMCS was loaded. If the vCPU is run before it is migrated, the subsequent VM-Exit will zero out MSR_IA32_SYSENTER_ESP, leading to a #DF on the next 32-bit syscall. double fault: 0000 [#1] SMP CPU: 0 PID: 990 Comm: stable Not tainted 5.16.0+ #97 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 EIP: entry_SYSENTER_32+0x0/0xe7 Code: <9c> 50 eb 17 0f 20 d8 a9 00 10 00 00 74 0d 25 ff ef ff ff 0f 22 d8 EAX: 000000a2 EBX: a8d1300c ECX: a8d13014 EDX: 00000000 ESI: a8f87000 EDI: a8d13014 EBP: a8d12fc0 ESP: 00000000 DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 EFLAGS: 00210093 CR0: 80050033 CR2: fffffffc CR3: 02c3b000 CR4: 00152e90 Fixes: 6ab8a4053f71 ("KVM: VMX: Avoid to rdmsrl(MSR_IA32_SYSENTER_ESP)") Cc: Lai Jiangshan Signed-off-by: Sean Christopherson Message-Id: <20220122015211.1468758-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a02a28ce7cc3..705e5c082738 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4094,10 +4094,14 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) vmcs_write32(HOST_IA32_SYSENTER_CS, low32); /* - * If 32-bit syscall is enabled, vmx_vcpu_load_vcms rewrites - * HOST_IA32_SYSENTER_ESP. + * SYSENTER is used for 32-bit system calls on either 32-bit or + * 64-bit kernels. It is always zero If neither is allowed, otherwise + * vmx_vcpu_load_vmcs loads it with the per-CPU entry stack (and may + * have already done so!). */ - vmcs_writel(HOST_IA32_SYSENTER_ESP, 0); + if (!IS_ENABLED(CONFIG_IA32_EMULATION) && !IS_ENABLED(CONFIG_X86_32)) + vmcs_writel(HOST_IA32_SYSENTER_ESP, 0); + rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl); vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */ From adb759e599990416e42e659c024a654b76c84617 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 23 Jan 2022 13:42:19 +0100 Subject: [PATCH 074/367] x86,kvm/xen: Remove superfluous .fixup usage Commit 14243b387137 ("KVM: x86/xen: Add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel delivery") adds superfluous .fixup usage after the whole .fixup section was removed in commit e5eefda5aa51 ("x86: Remove .fixup section"). Fixes: 14243b387137 ("KVM: x86/xen: Add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel delivery") Reported-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Message-Id: <20220123124219.GH20638@worktop.programming.kicks-ass.net> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 0e3f7d6e9fd7..bad57535fad0 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -316,10 +316,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) "\tnotq %0\n" "\t" LOCK_PREFIX "andq %0, %2\n" "2:\n" - "\t.section .fixup,\"ax\"\n" - "3:\tjmp\t2b\n" - "\t.previous\n" - _ASM_EXTABLE_UA(1b, 3b) + _ASM_EXTABLE_UA(1b, 2b) : "=r" (evtchn_pending_sel), "+m" (vi->evtchn_pending_sel), "+m" (v->arch.xen.evtchn_pending_sel) @@ -335,10 +332,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) "\tnotl %0\n" "\t" LOCK_PREFIX "andl %0, %2\n" "2:\n" - "\t.section .fixup,\"ax\"\n" - "3:\tjmp\t2b\n" - "\t.previous\n" - _ASM_EXTABLE_UA(1b, 3b) + _ASM_EXTABLE_UA(1b, 2b) : "=r" (evtchn_pending_sel32), "+m" (vi->evtchn_pending_sel), "+m" (v->arch.xen.evtchn_pending_sel) From 1625566ec8fd3a42e305c5118df81fb113eb60a7 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Mon, 24 Jan 2022 10:04:56 +0800 Subject: [PATCH 075/367] KVM: remove async parameter of hva_to_pfn_remapped() The async parameter of hva_to_pfn_remapped() is not used, so remove it. Signed-off-by: Xianting Tian Message-Id: <20220124020456.156386-1-xianting.tian@linux.alibaba.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5a1164483e6c..0bbb3cbf6014 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2463,9 +2463,8 @@ static int kvm_try_get_pfn(kvm_pfn_t pfn) } static int hva_to_pfn_remapped(struct vm_area_struct *vma, - unsigned long addr, bool *async, - bool write_fault, bool *writable, - kvm_pfn_t *p_pfn) + unsigned long addr, bool write_fault, + bool *writable, kvm_pfn_t *p_pfn) { kvm_pfn_t pfn; pte_t *ptep; @@ -2575,7 +2574,7 @@ retry: if (vma == NULL) pfn = KVM_PFN_ERR_FAULT; else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { - r = hva_to_pfn_remapped(vma, addr, async, write_fault, writable, &pfn); + r = hva_to_pfn_remapped(vma, addr, write_fault, writable, &pfn); if (r == -EAGAIN) goto retry; if (r < 0) From 9ff5549b1d1d3c3a9d71220d44bd246586160f1d Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 16 Jan 2022 11:18:31 -0800 Subject: [PATCH 076/367] video: hyperv_fb: Fix validation of screen resolution In the WIN10 version of the Synthetic Video protocol with Hyper-V, Hyper-V reports a list of supported resolutions as part of the protocol negotiation. The driver calculates the maximum width and height from the list of resolutions, and uses those maximums to validate any screen resolution specified in the video= option on the kernel boot line. This method of validation is incorrect. For example, the list of supported resolutions could contain 1600x1200 and 1920x1080, both of which fit in an 8 Mbyte frame buffer. But calculating the max width and height yields 1920 and 1200, and 1920x1200 resolution does not fit in an 8 Mbyte frame buffer. Unfortunately, this resolution is accepted, causing a kernel fault when the driver accesses memory outside the frame buffer. Instead, validate the specified screen resolution by calculating its size, and comparing against the frame buffer size. Delete the code for calculating the max width and height from the list of resolutions, since these max values have no use. Also add the frame buffer size to the info message to aid in understanding why a resolution might be rejected. Fixes: 67e7cdb4829d ("video: hyperv: hyperv_fb: Obtain screen resolution from Hyper-V host") Signed-off-by: Michael Kelley Reviewed-by: Haiyang Zhang Acked-by: Helge Deller Link: https://lore.kernel.org/r/1642360711-2335-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/video/fbdev/hyperv_fb.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 23999df52739..c8e0ea27caf1 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -287,8 +287,6 @@ struct hvfb_par { static uint screen_width = HVFB_WIDTH; static uint screen_height = HVFB_HEIGHT; -static uint screen_width_max = HVFB_WIDTH; -static uint screen_height_max = HVFB_HEIGHT; static uint screen_depth; static uint screen_fb_size; static uint dio_fb_size; /* FB size for deferred IO */ @@ -582,7 +580,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev) int ret = 0; unsigned long t; u8 index; - int i; memset(msg, 0, sizeof(struct synthvid_msg)); msg->vid_hdr.type = SYNTHVID_RESOLUTION_REQUEST; @@ -613,13 +610,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev) goto out; } - for (i = 0; i < msg->resolution_resp.resolution_count; i++) { - screen_width_max = max_t(unsigned int, screen_width_max, - msg->resolution_resp.supported_resolution[i].width); - screen_height_max = max_t(unsigned int, screen_height_max, - msg->resolution_resp.supported_resolution[i].height); - } - screen_width = msg->resolution_resp.supported_resolution[index].width; screen_height = @@ -941,7 +931,7 @@ static void hvfb_get_option(struct fb_info *info) if (x < HVFB_WIDTH_MIN || y < HVFB_HEIGHT_MIN || (synthvid_ver_ge(par->synthvid_version, SYNTHVID_VERSION_WIN10) && - (x > screen_width_max || y > screen_height_max)) || + (x * y * screen_depth / 8 > screen_fb_size)) || (par->synthvid_version == SYNTHVID_VERSION_WIN8 && x * y * screen_depth / 8 > SYNTHVID_FB_SIZE_WIN8) || (par->synthvid_version == SYNTHVID_VERSION_WIN7 && @@ -1194,8 +1184,8 @@ static int hvfb_probe(struct hv_device *hdev, } hvfb_get_option(info); - pr_info("Screen resolution: %dx%d, Color depth: %d\n", - screen_width, screen_height, screen_depth); + pr_info("Screen resolution: %dx%d, Color depth: %d, Frame buffer size: %d\n", + screen_width, screen_height, screen_depth, screen_fb_size); ret = hvfb_getmem(hdev, info); if (ret) { From 72bb9dcb6c33cfac80282713c2b4f2b254cd24d1 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 24 Jan 2022 08:45:37 +0530 Subject: [PATCH 077/367] arm64: Add Cortex-X2 CPU part definition Add the CPU Partnumbers for the new Arm designs. Cc: Will Deacon Cc: Suzuki Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1642994138-25887-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 19b8441aa8f2..657eeb06c784 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -74,6 +74,7 @@ #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D #define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define APM_CPU_PART_POTENZA 0x000 @@ -116,6 +117,7 @@ #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) From eb30d838a44c9e59a2a106884f536119859c7257 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 24 Jan 2022 08:45:38 +0530 Subject: [PATCH 078/367] arm64: errata: Update ARM64_ERRATUM_[2119858|2224489] with Cortex-X2 ranges Errata ARM64_ERRATUM_[2119858|2224489] also affect some Cortex-X2 ranges as well. Lets update these errata definition and detection to accommodate all new Cortex-X2 based cpu MIDR ranges. Cc: Will Deacon Cc: Mathieu Poirier Cc: Suzuki Poulose Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1642994138-25887-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas --- Documentation/arm64/silicon-errata.rst | 4 ++++ arch/arm64/Kconfig | 12 ++++++------ arch/arm64/kernel/cpu_errata.c | 2 ++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 5342e895fb60..8789c79310bb 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -98,6 +98,10 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6978140edfa4..77b8f653f4bc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -671,14 +671,14 @@ config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE bool config ARM64_ERRATUM_2119858 - bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode" + bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode" default y depends on CORESIGHT_TRBE select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE help - This option adds the workaround for ARM Cortex-A710 erratum 2119858. + This option adds the workaround for ARM Cortex-A710/X2 erratum 2119858. - Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace + Affected Cortex-A710/X2 cores could overwrite up to 3 cache lines of trace data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in the event of a WRAP event. @@ -761,14 +761,14 @@ config ARM64_ERRATUM_2253138 If unsure, say Y. config ARM64_ERRATUM_2224489 - bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range" + bool "Cortex-A710/X2: 2224489: workaround TRBE writing to address out-of-range" depends on CORESIGHT_TRBE default y select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE help - This option adds the workaround for ARM Cortex-A710 erratum 2224489. + This option adds the workaround for ARM Cortex-A710/X2 erratum 2224489. - Affected Cortex-A710 cores might write to an out-of-range address, not reserved + Affected Cortex-A710/X2 cores might write to an out-of-range address, not reserved for TRBE. Under some conditions, the TRBE might generate a write to the next virtually addressed page following the last page of the TRBE address space (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base. diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9e1c1aef9ebd..29cc062a4153 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -347,6 +347,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_2119858 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0), #endif {}, }; @@ -371,6 +372,7 @@ static struct midr_range trbe_write_out_of_range_cpus[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_2224489 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0), #endif {}, }; From 1e0924bd09916fab795fc2a21ec1d148f24299fd Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 24 Jan 2022 17:17:54 +0900 Subject: [PATCH 079/367] arm64: Mark start_backtrace() notrace and NOKPROBE_SYMBOL Mark the start_backtrace() as notrace and NOKPROBE_SYMBOL because this function is called from ftrace and lockdep to get the caller address via return_address(). The lockdep is used in kprobes, it should also be NOKPROBE_SYMBOL. Fixes: b07f3499661c ("arm64: stacktrace: Move start_backtrace() out of the header") Cc: # 5.13.x Signed-off-by: Masami Hiramatsu Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/164301227374.1433152.12808232644267107415.stgit@devnote2 Signed-off-by: Catalin Marinas --- arch/arm64/kernel/stacktrace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 0fb58fed54cb..e4103e085681 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -33,8 +33,8 @@ */ -static void start_backtrace(struct stackframe *frame, unsigned long fp, - unsigned long pc) +static notrace void start_backtrace(struct stackframe *frame, unsigned long fp, + unsigned long pc) { frame->fp = fp; frame->pc = pc; @@ -55,6 +55,7 @@ static void start_backtrace(struct stackframe *frame, unsigned long fp, frame->prev_fp = 0; frame->prev_type = STACK_TYPE_UNKNOWN; } +NOKPROBE_SYMBOL(start_backtrace); /* * Unwind from one frame record (A) to the next frame record (B). From 77311237eaffa240af6eae1d511b61e77a20a2ef Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 12 Jan 2022 22:58:46 +0200 Subject: [PATCH 080/367] pinctrl: Place correctly CONFIG_PINCTRL_ST in the Makefile Keep Makefile entries ordered in the same way as Kconfig ones. Reported-by: Linus Torvalds Signed-off-by: Andy Shevchenko --- drivers/pinctrl/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile index 08c364d611f5..f64d29f614ec 100644 --- a/drivers/pinctrl/Makefile +++ b/drivers/pinctrl/Makefile @@ -42,9 +42,9 @@ obj-$(CONFIG_PINCTRL_PISTACHIO) += pinctrl-pistachio.o obj-$(CONFIG_PINCTRL_RK805) += pinctrl-rk805.o obj-$(CONFIG_PINCTRL_ROCKCHIP) += pinctrl-rockchip.o obj-$(CONFIG_PINCTRL_SINGLE) += pinctrl-single.o +obj-$(CONFIG_PINCTRL_ST) += pinctrl-st.o obj-$(CONFIG_PINCTRL_STARFIVE) += pinctrl-starfive.o obj-$(CONFIG_PINCTRL_STMFX) += pinctrl-stmfx.o -obj-$(CONFIG_PINCTRL_ST) += pinctrl-st.o obj-$(CONFIG_PINCTRL_SX150X) += pinctrl-sx150x.o obj-$(CONFIG_PINCTRL_TB10X) += pinctrl-tb10x.o obj-$(CONFIG_PINCTRL_THUNDERBAY) += pinctrl-thunderbay.o From e986f0e602f19ecb7880b04dd1db415ed9bca3f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Bartosik?= Date: Mon, 24 Jan 2022 13:55:29 +0100 Subject: [PATCH 081/367] pinctrl: intel: fix unexpected interrupt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ASUS Chromebook C223 with Celeron N3350 crashes sometimes during cold booot. Inspection of the kernel log showed that it gets into an inifite loop logging the following message: ->handle_irq(): 000000009cdb51e8, handle_bad_irq+0x0/0x251 ->irq_data.chip(): 000000005ec212a7, 0xffffa043009d8e7 ->action(): 00000 IRQ_NOPROBE set unexpected IRQ trap at vector 7c The issue happens during cold boot but only if cold boot happens at most several dozen seconds after Chromebook is powered off. For longer intervals between power off and power on (cold boot) the issue does not reproduce. The unexpected interrupt is sourced from INT3452 GPIO pin which is used for SD card detect. Investigation relevealed that when the interval between power off and power on (cold boot) is less than several dozen seconds then values of INT3452 GPIO interrupt enable and interrupt pending registers survive power off and power on sequence and interrupt for SD card detect pin is enabled and pending during probe of SD controller which causes the unexpected IRQ message. "Intel Pentium and Celeron Processor N- and J- Series" volume 3 doc mentions that GPIO interrupt enable and status registers default value is 0x0. The fix clears INT3452 GPIO interrupt enabled and interrupt pending registers in its probe function. Fixes: 7981c0015af2 ("pinctrl: intel: Add Intel Sunrisepoint pin controller and GPIO support") Signed-off-by: Łukasz Bartosik Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-intel.c | 54 +++++++++++++++++---------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 85750974d182..e9bb98cb9112 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -1216,6 +1216,39 @@ static irqreturn_t intel_gpio_irq(int irq, void *data) return IRQ_RETVAL(ret); } +static void intel_gpio_irq_init(struct intel_pinctrl *pctrl) +{ + int i; + + for (i = 0; i < pctrl->ncommunities; i++) { + const struct intel_community *community; + void __iomem *base; + unsigned int gpp; + + community = &pctrl->communities[i]; + base = community->regs; + + for (gpp = 0; gpp < community->ngpps; gpp++) { + /* Mask and clear all interrupts */ + writel(0, base + community->ie_offset + gpp * 4); + writel(0xffff, base + community->is_offset + gpp * 4); + } + } +} + +static int intel_gpio_irq_init_hw(struct gpio_chip *gc) +{ + struct intel_pinctrl *pctrl = gpiochip_get_data(gc); + + /* + * Make sure the interrupt lines are in a proper state before + * further configuration. + */ + intel_gpio_irq_init(pctrl); + + return 0; +} + static int intel_gpio_add_community_ranges(struct intel_pinctrl *pctrl, const struct intel_community *community) { @@ -1320,6 +1353,7 @@ static int intel_gpio_probe(struct intel_pinctrl *pctrl, int irq) girq->num_parents = 0; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; + girq->init_hw = intel_gpio_irq_init_hw; ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl); if (ret) { @@ -1695,26 +1729,6 @@ int intel_pinctrl_suspend_noirq(struct device *dev) } EXPORT_SYMBOL_GPL(intel_pinctrl_suspend_noirq); -static void intel_gpio_irq_init(struct intel_pinctrl *pctrl) -{ - size_t i; - - for (i = 0; i < pctrl->ncommunities; i++) { - const struct intel_community *community; - void __iomem *base; - unsigned int gpp; - - community = &pctrl->communities[i]; - base = community->regs; - - for (gpp = 0; gpp < community->ngpps; gpp++) { - /* Mask and clear all interrupts */ - writel(0, base + community->ie_offset + gpp * 4); - writel(0xffff, base + community->is_offset + gpp * 4); - } - } -} - static bool intel_gpio_update_reg(void __iomem *reg, u32 mask, u32 value) { u32 curr, updated; From e12963c453263d5321a2c610e98cbc731233b685 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 19 Jan 2022 20:19:15 +0200 Subject: [PATCH 082/367] pinctrl: intel: Fix a glitch when updating IRQ flags on a preconfigured line The commit af7e3eeb84e2 ("pinctrl: intel: Disable input and output buffer when switching to GPIO") hadn't taken into account an update of the IRQ flags scenario. When updating the IRQ flags on the preconfigured line the ->irq_set_type() is called again. In such case the sequential Rx buffer configuration changes may trigger a falling or rising edge interrupt that may lead, on some platforms, to an undesired event. This may happen because each of intel_gpio_set_gpio_mode() and __intel_gpio_set_direction() updates the pad configuration with a different value of the GPIORXDIS bit. Notable, that the intel_gpio_set_gpio_mode() is called only for the pads that are configured as an input. Due to this fact, integrate the logic of __intel_gpio_set_direction() call into the intel_gpio_set_gpio_mode() so that the Rx buffer won't be disabled and immediately re-enabled. Fixes: af7e3eeb84e2 ("pinctrl: intel: Disable input and output buffer when switching to GPIO") Reported-by: Kane Chen Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Tested-by: Grace Kao --- drivers/pinctrl/intel/pinctrl-intel.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index e9bb98cb9112..826d494f3cc6 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -451,8 +451,8 @@ static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) value &= ~PADCFG0_PMODE_MASK; value |= PADCFG0_PMODE_GPIO; - /* Disable input and output buffers */ - value |= PADCFG0_GPIORXDIS; + /* Disable TX buffer and enable RX (this will be input) */ + value &= ~PADCFG0_GPIORXDIS; value |= PADCFG0_GPIOTXDIS; /* Disable SCI/SMI/NMI generation */ @@ -497,9 +497,6 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, intel_gpio_set_gpio_mode(padcfg0); - /* Disable TX buffer and enable RX (this will be input) */ - __intel_gpio_set_direction(padcfg0, true); - raw_spin_unlock_irqrestore(&pctrl->lock, flags); return 0; @@ -1115,9 +1112,6 @@ static int intel_gpio_irq_type(struct irq_data *d, unsigned int type) intel_gpio_set_gpio_mode(reg); - /* Disable TX buffer and enable RX (this will be input) */ - __intel_gpio_set_direction(reg, true); - value = readl(reg); value &= ~(PADCFG0_RXEVCFG_MASK | PADCFG0_RXINV); From 79da533d3cc717ccc05ddbd3190da8a72bc2408b Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 23 Jan 2022 18:23:22 -0800 Subject: [PATCH 083/367] hwmon: (nct6775) Fix crash in clear_caseopen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Paweł Marciniak reports the following crash, observed when clearing the chassis intrusion alarm. BUG: kernel NULL pointer dereference, address: 0000000000000028 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 3 PID: 4815 Comm: bash Tainted: G S 5.16.2-200.fc35.x86_64 #1 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./Z97 Extreme4, BIOS P2.60A 05/03/2018 RIP: 0010:clear_caseopen+0x5a/0x120 [nct6775] Code: 68 70 e8 e9 32 b1 e3 85 c0 0f 85 d2 00 00 00 48 83 7c 24 ... RSP: 0018:ffffabcb02803dd8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000000000 RDX: ffff8e8808192880 RSI: 0000000000000000 RDI: ffff8e87c7509a68 RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000000000a R10: 000000000000000a R11: f000000000000000 R12: 000000000000001f R13: ffff8e87c7509828 R14: ffff8e87c7509a68 R15: ffff8e88494527a0 FS: 00007f4db9151740(0000) GS:ffff8e8ebfec0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000028 CR3: 0000000166b66001 CR4: 00000000001706e0 Call Trace: kernfs_fop_write_iter+0x11c/0x1b0 new_sync_write+0x10b/0x180 vfs_write+0x209/0x2a0 ksys_write+0x4f/0xc0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae The problem is that the device passed to clear_caseopen() is the hwmon device, not the platform device, and the platform data is not set in the hwmon device. Store the pointer to sio_data in struct nct6775_data and get if from there if needed. Fixes: 2e7b9886968b ("hwmon: (nct6775) Use superio_*() function pointers in sio_data.") Cc: Denis Pauk Cc: Bernhard Seibold Reported-by: Paweł Marciniak Tested-by: Denis Pauk Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index fd3f91cb01c6..098d12b9ecda 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -1175,7 +1175,7 @@ static inline u8 in_to_reg(u32 val, u8 nr) struct nct6775_data { int addr; /* IO base of hw monitor block */ - int sioreg; /* SIO register address */ + struct nct6775_sio_data *sio_data; enum kinds kind; const char *name; @@ -3559,7 +3559,7 @@ clear_caseopen(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nct6775_data *data = dev_get_drvdata(dev); - struct nct6775_sio_data *sio_data = dev_get_platdata(dev); + struct nct6775_sio_data *sio_data = data->sio_data; int nr = to_sensor_dev_attr(attr)->index - INTRUSION_ALARM_BASE; unsigned long val; u8 reg; @@ -3967,7 +3967,7 @@ static int nct6775_probe(struct platform_device *pdev) return -ENOMEM; data->kind = sio_data->kind; - data->sioreg = sio_data->sioreg; + data->sio_data = sio_data; if (sio_data->access == access_direct) { data->addr = res->start; From 84d46e1fc33cc56b622b2e7a91703157161ce2e9 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 16 Dec 2021 11:11:03 +0800 Subject: [PATCH 084/367] drm/msm: remove variable set but not used The code that uses variable mdss has been removed, So the declaration and assignment of the variable can be removed. Eliminate the following clang warning: drivers/gpu/drm/msm/msm_drv.c:513:19: warning: variable 'mdss' set but not used [-Wunused-but-set-variable] Reported-by: Abaci Robot Fixes: 2027e5b3413d ("drm/msm: Initialize MDSS irq domain at probe time") Signed-off-by: Yang Li Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20211216031103.34146-1-yang.lee@linux.alibaba.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/msm_drv.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index ad35a5d94053..59e30192cdf6 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -510,7 +510,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) struct msm_drm_private *priv = dev_get_drvdata(dev); struct drm_device *ddev; struct msm_kms *kms; - struct msm_mdss *mdss; int ret, i; ddev = drm_dev_alloc(drv, dev); @@ -521,8 +520,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) ddev->dev_private = priv; priv->dev = ddev; - mdss = priv->mdss; - priv->wq = alloc_ordered_workqueue("msm", 0); priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD; From c04c3148ca12227d92f91b355b4538cc333c9922 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 30 Dec 2021 07:09:40 +0000 Subject: [PATCH 085/367] drm/msm/dsi: Fix missing put_device() call in dsi_get_phy If of_find_device_by_node() succeeds, dsi_get_phy() doesn't a corresponding put_device(). Thus add put_device() to fix the exception handling. Fixes: ec31abf ("drm/msm/dsi: Separate PHY to another platform device") Signed-off-by: Miaoqian Lin Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20211230070943.18116-1-linmq006@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dsi/dsi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c index 052548883d27..0fe02529b5e7 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.c +++ b/drivers/gpu/drm/msm/dsi/dsi.c @@ -40,7 +40,12 @@ static int dsi_get_phy(struct msm_dsi *msm_dsi) of_node_put(phy_node); - if (!phy_pdev || !msm_dsi->phy) { + if (!phy_pdev) { + DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__); + return -EPROBE_DEFER; + } + if (!msm_dsi->phy) { + put_device(&phy_pdev->dev); DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__); return -EPROBE_DEFER; } From 774fe0cd838d1b1419d41ab4ea0613c80d4ecbd7 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 7 Jan 2022 08:50:22 +0000 Subject: [PATCH 086/367] drm/msm/hdmi: Fix missing put_device() call in msm_hdmi_get_phy The reference taken by 'of_find_device_by_node()' must be released when not needed anymore. Add the corresponding 'put_device()' in the error handling path. Fixes: e00012b256d4 ("drm/msm/hdmi: Make HDMI core get its PHY") Signed-off-by: Miaoqian Lin Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220107085026.23831-1-linmq006@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/hdmi/hdmi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 3acdeae25caf..719720709e9e 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -97,10 +97,15 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi) of_node_put(phy_node); - if (!phy_pdev || !hdmi->phy) { + if (!phy_pdev) { DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); return -EPROBE_DEFER; } + if (!hdmi->phy) { + DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); + put_device(&phy_pdev->dev); + return -EPROBE_DEFER; + } hdmi->phy_dev = get_device(&phy_pdev->dev); From 170b22234d5495f5e0844246e23f004639ee89ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Sun, 9 Jan 2022 20:24:31 +0100 Subject: [PATCH 087/367] drm/msm/dpu: invalid parameter check in dpu_setup_dspp_pcc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function performs a check on the "ctx" input parameter, however, it is used before the check. Initialize the "base" variable after the sanity check to avoid a possible NULL pointer dereference. Fixes: 4259ff7ae509e ("drm/msm/dpu: add support for pcc color block in dpu driver") Addresses-Coverity-ID: 1493866 ("Null pointer dereference") Signed-off-by: José Expósito Link: https://lore.kernel.org/r/20220109192431.135949-1-jose.exposito89@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c index a98e964c3b6f..355894a3b48c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c @@ -26,9 +26,16 @@ static void dpu_setup_dspp_pcc(struct dpu_hw_dspp *ctx, struct dpu_hw_pcc_cfg *cfg) { - u32 base = ctx->cap->sblk->pcc.base; + u32 base; - if (!ctx || !base) { + if (!ctx) { + DRM_ERROR("invalid ctx %pK\n", ctx); + return; + } + + base = ctx->cap->sblk->pcc.base; + + if (!base) { DRM_ERROR("invalid ctx %pK pcc base 0x%x\n", ctx, base); return; } From 0a727b459ee39bd4c5ced19d6024258ac87b6b2e Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Wed, 12 Jan 2022 20:33:34 +0800 Subject: [PATCH 088/367] drm/msm: Fix wrong size calculation For example, memory-region in .dts as below, reg = <0x0 0x50000000 0x0 0x20000000> We can get below values, struct resource r; r.start = 0x50000000; r.end = 0x6fffffff; So the size should be: size = r.end - r.start + 1 = 0x20000000 Signed-off-by: Xianting Tian Fixes: 072f1f9168ed ("drm/msm: add support for "stolen" mem") Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220112123334.749776-1-xianting.tian@linux.alibaba.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/msm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 59e30192cdf6..555666e3f960 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -461,7 +461,7 @@ static int msm_init_vram(struct drm_device *dev) of_node_put(node); if (ret) return ret; - size = r.end - r.start; + size = r.end - r.start + 1; DRM_INFO("using VRAM carveout: %lx@%pa\n", size, &r.start); /* if we have no IOMMU, then we need to use carveout allocator. From 860a7b2a87b7c743154824d0597b6c3eb3b53154 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 13 Jan 2022 08:32:13 -0800 Subject: [PATCH 089/367] drm/msm/a6xx: Add missing suspend_count increment Reported-by: Danylo Piliaiev Fixes: 3ab1c5cc3939 ("drm/msm: Add param for userspace to query suspend count") Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220113163215.215367-1-robdclark@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 51b83776951b..17cfad6424db 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1560,6 +1560,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) for (i = 0; i < gpu->nr_rings; i++) a6xx_gpu->shadow[i] = 0; + gpu->suspend_count++; + return 0; } From 5e761a2287234bc402ba7ef07129f5103bcd775c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Sun, 16 Jan 2022 19:18:44 +0100 Subject: [PATCH 090/367] drm/msm/dsi: invalid parameter check in msm_dsi_phy_enable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function performs a check on the "phy" input parameter, however, it is used before the check. Initialize the "dev" variable after the sanity check to avoid a possible NULL pointer dereference. Fixes: 5c8290284402b ("drm/msm/dsi: Split PHY drivers to separate files") Addresses-Coverity-ID: 1493860 ("Null pointer dereference") Signed-off-by: José Expósito Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220116181844.7400-1-jose.exposito89@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index c2ed177717c7..2027b38617ab 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -808,12 +808,14 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy, struct msm_dsi_phy_clk_request *clk_req, struct msm_dsi_phy_shared_timings *shared_timings) { - struct device *dev = &phy->pdev->dev; + struct device *dev; int ret; if (!phy || !phy->cfg->ops.enable) return -EINVAL; + dev = &phy->pdev->dev; + ret = dsi_phy_enable_resource(phy); if (ret) { DRM_DEV_ERROR(dev, "%s: resource enable failed, %d\n", From 63ee956f69d8c181e5251c7ce58b84c1edec0f6a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 24 Jan 2022 20:20:51 -0800 Subject: [PATCH 091/367] bpf: Fix renaming task_getsecid_subj->current_getsecid_subj. The commit 6326948f940d missed renaming of task->current LSM hook in BTF_ID. Fix it to silence build warning: WARN: resolve_btfids: unresolved symbol bpf_lsm_task_getsecid_subj Fixes: 6326948f940d ("lsm: security_task_getsecid_subj() -> security_current_getsecid_subj()") Acked-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_lsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 06062370c3b8..9e4ecc990647 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -207,7 +207,7 @@ BTF_ID(func, bpf_lsm_socket_socketpair) BTF_ID(func, bpf_lsm_syslog) BTF_ID(func, bpf_lsm_task_alloc) -BTF_ID(func, bpf_lsm_task_getsecid_subj) +BTF_ID(func, bpf_lsm_current_getsecid_subj) BTF_ID(func, bpf_lsm_task_getsecid_obj) BTF_ID(func, bpf_lsm_task_prctl) BTF_ID(func, bpf_lsm_task_setscheduler) From 61263b3a11a2594b4e898f166c31162236182b5c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 11 Jan 2022 09:24:41 +0800 Subject: [PATCH 092/367] scsi: elx: efct: Don't use GFP_KERNEL under spin lock GFP_KERNEL/GFP_DMA can't be used under a spin lock. According the comment, els_ios_lock is used to protect els ios list so we can move down the spin lock to avoid using this flag under the lock. Link: https://lore.kernel.org/r/20220111012441.3232527-1-yangyingliang@huawei.com Fixes: 8f406ef72859 ("scsi: elx: libefc: Extended link Service I/O handling") Reported-by: Hulk Robot Reviewed-by: James Smart Signed-off-by: Yang Yingliang Signed-off-by: Martin K. Petersen --- drivers/scsi/elx/libefc/efc_els.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/elx/libefc/efc_els.c b/drivers/scsi/elx/libefc/efc_els.c index 7bb4f9aad2c8..84bc81d7ce76 100644 --- a/drivers/scsi/elx/libefc/efc_els.c +++ b/drivers/scsi/elx/libefc/efc_els.c @@ -46,18 +46,14 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen) efc = node->efc; - spin_lock_irqsave(&node->els_ios_lock, flags); - if (!node->els_io_enabled) { efc_log_err(efc, "els io alloc disabled\n"); - spin_unlock_irqrestore(&node->els_ios_lock, flags); return NULL; } els = mempool_alloc(efc->els_io_pool, GFP_ATOMIC); if (!els) { atomic_add_return(1, &efc->els_io_alloc_failed_count); - spin_unlock_irqrestore(&node->els_ios_lock, flags); return NULL; } @@ -74,7 +70,6 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen) &els->io.req.phys, GFP_KERNEL); if (!els->io.req.virt) { mempool_free(els, efc->els_io_pool); - spin_unlock_irqrestore(&node->els_ios_lock, flags); return NULL; } @@ -94,10 +89,11 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen) /* add els structure to ELS IO list */ INIT_LIST_HEAD(&els->list_entry); + spin_lock_irqsave(&node->els_ios_lock, flags); list_add_tail(&els->list_entry, &node->els_ios_list); + spin_unlock_irqrestore(&node->els_ios_lock, flags); } - spin_unlock_irqrestore(&node->els_ios_lock, flags); return els; } From a861790afaa8b6369eee8a88c5d5d73f5799c0c6 Mon Sep 17 00:00:00 2001 From: ZouMingzhe Date: Tue, 11 Jan 2022 13:47:42 +0800 Subject: [PATCH 093/367] scsi: target: iscsi: Make sure the np under each tpg is unique iscsit_tpg_check_network_portal() has nested for_each loops and is supposed to return true when a match is found. However, the tpg loop will still continue after existing the tpg_np loop. If this tpg_np is not the last the match value will be changed. Break the outer loop after finding a match and make sure the np under each tpg is unique. Link: https://lore.kernel.org/r/20220111054742.19582-1-mingzhe.zou@easystack.cn Signed-off-by: ZouMingzhe Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target_tpg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 8075f60fd02c..2d5cf1714ae0 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -443,6 +443,9 @@ static bool iscsit_tpg_check_network_portal( break; } spin_unlock(&tpg->tpg_np_lock); + + if (match) + break; } spin_unlock(&tiqn->tiqn_tpg_lock); From a65b32748f4566f986ba2495a8236c141fa42a26 Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Sun, 16 Jan 2022 11:06:49 +0800 Subject: [PATCH 094/367] scsi: ufs: ufshcd-pltfrm: Check the return value of devm_kstrdup() devm_kstrdup() returns pointer to allocated string on success, NULL on failure. So it is better to check the return value of it. Link: https://lore.kernel.org/r/tencent_4257E15D4A94FF9020DDCC4BB9B21C041408@qq.com Reviewed-by: Bean Huo Signed-off-by: Xiaoke Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pltfrm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c index 8b16bbbcb806..87975d1a21c8 100644 --- a/drivers/scsi/ufs/ufshcd-pltfrm.c +++ b/drivers/scsi/ufs/ufshcd-pltfrm.c @@ -92,6 +92,11 @@ static int ufshcd_parse_clock_info(struct ufs_hba *hba) clki->min_freq = clkfreq[i]; clki->max_freq = clkfreq[i+1]; clki->name = devm_kstrdup(dev, name, GFP_KERNEL); + if (!clki->name) { + ret = -ENOMEM; + goto out; + } + if (!strcmp(name, "ref_clk")) clki->keep_link_active = true; dev_dbg(dev, "%s: min %u max %u name %s\n", "freq-table-hz", @@ -127,6 +132,8 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name, return -ENOMEM; vreg->name = devm_kstrdup(dev, name, GFP_KERNEL); + if (!vreg->name) + return -ENOMEM; snprintf(prop_name, MAX_PROP_SIZE, "%s-max-microamp", name); if (of_property_read_u32(np, prop_name, &vreg->max_uA)) { From b70a99fd13282d7885f69bf1372e28b7506a1613 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Mon, 17 Jan 2022 05:53:09 -0800 Subject: [PATCH 095/367] scsi: qedf: Add stag_work to all the vports Call trace seen when creating NPIV ports, only 32 out of 64 show online. stag work was not initialized for vport, hence initialize the stag work. WARNING: CPU: 8 PID: 645 at kernel/workqueue.c:1635 __queue_delayed_work+0x68/0x80 CPU: 8 PID: 645 Comm: kworker/8:1 Kdump: loaded Tainted: G IOE --------- -- 4.18.0-348.el8.x86_64 #1 Hardware name: Dell Inc. PowerEdge MX740c/0177V9, BIOS 2.12.2 07/09/2021 Workqueue: events fc_lport_timeout [libfc] RIP: 0010:__queue_delayed_work+0x68/0x80 Code: 89 b2 88 00 00 00 44 89 82 90 00 00 00 48 01 c8 48 89 42 50 41 81 f8 00 20 00 00 75 1d e9 60 24 07 00 44 89 c7 e9 98 f6 ff ff <0f> 0b eb c5 0f 0b eb a1 0f 0b eb a7 0f 0b eb ac 44 89 c6 e9 40 23 RSP: 0018:ffffae514bc3be40 EFLAGS: 00010006 RAX: ffff8d25d6143750 RBX: 0000000000000202 RCX: 0000000000000002 RDX: ffff8d2e31383748 RSI: ffff8d25c000d600 RDI: ffff8d2e31383788 RBP: ffff8d2e31380de0 R08: 0000000000002000 R09: ffff8d2e31383750 R10: ffffffffc0c957e0 R11: ffff8d2624800000 R12: ffff8d2e31380a58 R13: ffff8d2d915eb000 R14: ffff8d25c499b5c0 R15: ffff8d2e31380e18 FS: 0000000000000000(0000) GS:ffff8d2d1fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055fd0484b8b8 CR3: 00000008ffc10006 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: queue_delayed_work_on+0x36/0x40 qedf_elsct_send+0x57/0x60 [qedf] fc_lport_enter_flogi+0x90/0xc0 [libfc] fc_lport_timeout+0xb7/0x140 [libfc] process_one_work+0x1a7/0x360 ? create_worker+0x1a0/0x1a0 worker_thread+0x30/0x390 ? create_worker+0x1a0/0x1a0 kthread+0x116/0x130 ? kthread_flush_work_fn+0x10/0x10 ret_from_fork+0x35/0x40 ---[ end trace 008f00f722f2c2ff ]-- Initialize stag work for all the vports. Link: https://lore.kernel.org/r/20220117135311.6256-2-njavali@marvell.com Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index cdc66e2a9488..3a5ce540cfc4 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -1864,6 +1864,7 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled) vport_qedf->cmd_mgr = base_qedf->cmd_mgr; init_completion(&vport_qedf->flogi_compl); INIT_LIST_HEAD(&vport_qedf->fcports); + INIT_DELAYED_WORK(&vport_qedf->stag_work, qedf_stag_change_work); rc = qedf_vport_libfc_config(vport, vn_port); if (rc) { From 5239ab63f17cee643bd4bf6addfedebaa7d4f41e Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Mon, 17 Jan 2022 05:53:10 -0800 Subject: [PATCH 096/367] scsi: qedf: Fix refcount issue when LOGO is received during TMF Hung task call trace was seen during LOGO processing. [ 974.309060] [0000:00:00.0]:[qedf_eh_device_reset:868]: 1:0:2:0: LUN RESET Issued... [ 974.309065] [0000:00:00.0]:[qedf_initiate_tmf:2422]: tm_flags 0x10 sc_cmd 00000000c16b930f op = 0x2a target_id = 0x2 lun=0 [ 974.309178] [0000:00:00.0]:[qedf_initiate_tmf:2431]: portid=016900 tm_flags =LUN RESET [ 974.309222] [0000:00:00.0]:[qedf_initiate_tmf:2438]: orig io_req = 00000000ec78df8f xid = 0x180 ref_cnt = 1. [ 974.309625] host1: rport 016900: Received LOGO request while in state Ready [ 974.309627] host1: rport 016900: Delete port [ 974.309642] host1: rport 016900: work event 3 [ 974.309644] host1: rport 016900: lld callback ev 3 [ 974.313243] [0000:61:00.2]:[qedf_execute_tmf:2383]:1: fcport is uploading, not executing flush. [ 974.313295] [0000:61:00.2]:[qedf_execute_tmf:2400]:1: task mgmt command success... [ 984.031088] INFO: task jbd2/dm-15-8:7645 blocked for more than 120 seconds. [ 984.031136] Not tainted 4.18.0-305.el8.x86_64 #1 [ 984.031166] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 984.031209] jbd2/dm-15-8 D 0 7645 2 0x80004080 [ 984.031212] Call Trace: [ 984.031222] __schedule+0x2c4/0x700 [ 984.031230] ? unfreeze_partials.isra.83+0x16e/0x1a0 [ 984.031233] ? bit_wait_timeout+0x90/0x90 [ 984.031235] schedule+0x38/0xa0 [ 984.031238] io_schedule+0x12/0x40 [ 984.031240] bit_wait_io+0xd/0x50 [ 984.031243] __wait_on_bit+0x6c/0x80 [ 984.031248] ? free_buffer_head+0x21/0x50 [ 984.031251] out_of_line_wait_on_bit+0x91/0xb0 [ 984.031257] ? init_wait_var_entry+0x50/0x50 [ 984.031268] jbd2_journal_commit_transaction+0x112e/0x19f0 [jbd2] [ 984.031280] kjournald2+0xbd/0x270 [jbd2] [ 984.031284] ? finish_wait+0x80/0x80 [ 984.031291] ? commit_timeout+0x10/0x10 [jbd2] [ 984.031294] kthread+0x116/0x130 [ 984.031300] ? kthread_flush_work_fn+0x10/0x10 [ 984.031305] ret_from_fork+0x1f/0x40 There was a ref count issue when LOGO is received during TMF. This leads to one of the I/Os hanging with the driver. Fix the ref count. Link: https://lore.kernel.org/r/20220117135311.6256-3-njavali@marvell.com Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 99a56ca1fb16..fab43dabe5b3 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -2250,6 +2250,7 @@ process_els: io_req->tm_flags == FCP_TMF_TGT_RESET) { clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags); io_req->sc_cmd = NULL; + kref_put(&io_req->refcount, qedf_release_cmd); complete(&io_req->tm_done); } From 64fd4af6274eb0f49d29772c228fffcf6bde1635 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Mon, 17 Jan 2022 05:53:11 -0800 Subject: [PATCH 097/367] scsi: qedf: Change context reset messages to ratelimited If FCoE is not configured, libfc/libfcoe keeps on retrying FLOGI and after 3 retries driver does a context reset and tries fipvlan again. This leads to context reset message flooding the logs. Hence ratelimit the message to prevent flooding the logs. Link: https://lore.kernel.org/r/20220117135311.6256-4-njavali@marvell.com Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 3a5ce540cfc4..6ad28bc8e948 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -911,7 +911,7 @@ void qedf_ctx_soft_reset(struct fc_lport *lport) struct qed_link_output if_link; if (lport->vport) { - QEDF_ERR(NULL, "Cannot issue host reset on NPIV port.\n"); + printk_ratelimited("Cannot issue host reset on NPIV port.\n"); return; } @@ -3981,7 +3981,9 @@ void qedf_stag_change_work(struct work_struct *work) struct qedf_ctx *qedf = container_of(work, struct qedf_ctx, stag_work.work); - QEDF_ERR(&qedf->dbg_ctx, "Performing software context reset.\n"); + printk_ratelimited("[%s]:[%s:%d]:%d: Performing software context reset.", + dev_name(&qedf->pdev->dev), __func__, __LINE__, + qedf->dbg_ctx.host_no); qedf_ctx_soft_reset(qedf->lport); } From 62afb379a0fee7e9c2f9f68e1abeb85ceddf51b9 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 18 Jan 2022 20:15:05 +0800 Subject: [PATCH 098/367] scsi: pm8001: Fix bogus FW crash for maxcpus=1 According to the comment in check_fw_ready() we should not check the IOP1_READY field in register SCRATCH_PAD_1 for 8008 or 8009 controllers. However we check this very field in process_oq() for processing the highest index interrupt vector. The highest interrupt vector is checked as the FW is programmed to signal fatal errors through this irq. Change that function to not check IOP1_READY for those mentioned controllers, but do check ILA_READY in both cases. The reason I assume that this was not hit earlier was because we always allocated 64 MSI(X), and just did not pass the vector index check in process_oq(), i.e. the handler never ran for vector index 63. Link: https://lore.kernel.org/r/1642508105-95432-1-git-send-email-john.garry@huawei.com Tested-by: Damien Le Moal Reviewed-by: Damien Le Moal Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm80xx_hwi.c | 16 ++++++++++++++-- drivers/scsi/pm8001/pm80xx_hwi.h | 6 +++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index bbf538fe15b3..2530d1365556 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -4151,10 +4151,22 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec) u32 ret = MPI_IO_STATUS_FAIL; u32 regval; + /* + * Fatal errors are programmed to be signalled in irq vector + * pm8001_ha->max_q_num - 1 through pm8001_ha->main_cfg_tbl.pm80xx_tbl. + * fatal_err_interrupt + */ if (vec == (pm8001_ha->max_q_num - 1)) { + u32 mipsall_ready; + + if (pm8001_ha->chip_id == chip_8008 || + pm8001_ha->chip_id == chip_8009) + mipsall_ready = SCRATCH_PAD_MIPSALL_READY_8PORT; + else + mipsall_ready = SCRATCH_PAD_MIPSALL_READY_16PORT; + regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1); - if ((regval & SCRATCH_PAD_MIPSALL_READY) != - SCRATCH_PAD_MIPSALL_READY) { + if ((regval & mipsall_ready) != mipsall_ready) { pm8001_ha->controller_fatal_error = true; pm8001_dbg(pm8001_ha, FAIL, "Firmware Fatal error! Regval:0x%x\n", diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h index c7e5d93bea92..c41ed039c92a 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.h +++ b/drivers/scsi/pm8001/pm80xx_hwi.h @@ -1405,8 +1405,12 @@ typedef struct SASProtocolTimerConfig SASProtocolTimerConfig_t; #define SCRATCH_PAD_BOOT_LOAD_SUCCESS 0x0 #define SCRATCH_PAD_IOP0_READY 0xC00 #define SCRATCH_PAD_IOP1_READY 0x3000 -#define SCRATCH_PAD_MIPSALL_READY (SCRATCH_PAD_IOP1_READY | \ +#define SCRATCH_PAD_MIPSALL_READY_16PORT (SCRATCH_PAD_IOP1_READY | \ SCRATCH_PAD_IOP0_READY | \ + SCRATCH_PAD_ILA_READY | \ + SCRATCH_PAD_RAAE_READY) +#define SCRATCH_PAD_MIPSALL_READY_8PORT (SCRATCH_PAD_IOP0_READY | \ + SCRATCH_PAD_ILA_READY | \ SCRATCH_PAD_RAAE_READY) /* boot loader state */ From 8c9db6679be4348b8aae108e11d4be2f83976e30 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 18 Jan 2022 17:58:03 +0100 Subject: [PATCH 099/367] scsi: zfcp: Fix failed recovery on gone remote port with non-NPIV FCP devices Suppose we have an environment with a number of non-NPIV FCP devices (virtual HBAs / FCP devices / zfcp "adapter"s) sharing the same physical FCP channel (HBA port) and its I_T nexus. Plus a number of storage target ports zoned to such shared channel. Now one target port logs out of the fabric causing an RSCN. Zfcp reacts with an ADISC ELS and subsequent port recovery depending on the ADISC result. This happens on all such FCP devices (in different Linux images) concurrently as they all receive a copy of this RSCN. In the following we look at one of those FCP devices. Requests other than FSF_QTCB_FCP_CMND can be slow until they get a response. Depending on which requests are affected by slow responses, there are different recovery outcomes. Here we want to fix failed recoveries on port or adapter level by avoiding recovery requests that can be slow. We need the cached N_Port_ID for the remote port "link" test with ADISC. Just before sending the ADISC, we now intentionally forget the old cached N_Port_ID. The idea is that on receiving an RSCN for a port, we have to assume that any cached information about this port is stale. This forces a fresh new GID_PN [FC-GS] nameserver lookup on any subsequent recovery for the same port. Since we typically can still communicate with the nameserver efficiently, we now reach steady state quicker: Either the nameserver still does not know about the port so we stop recovery, or the nameserver already knows the port potentially with a new N_Port_ID and we can successfully and quickly perform open port recovery. For the one case, where ADISC returns successfully, we re-initialize port->d_id because that case does not involve any port recovery. This also solves a problem if the storage WWPN quickly logs into the fabric again but with a different N_Port_ID. Such as on virtual WWPN takeover during target NPIV failover. [https://www.redbooks.ibm.com/abstracts/redp5477.html] In that case the RSCN from the storage FDISC was ignored by zfcp and we could not successfully recover the failover. On some later failback on the storage, we could have been lucky if the virtual WWPN got the same old N_Port_ID from the SAN switch as we still had cached. Then the related RSCN triggered a successful port reopen recovery. However, there is no guarantee to get the same N_Port_ID on NPIV FDISC. Even though NPIV-enabled FCP devices are not affected by this problem, this code change optimizes recovery time for gone remote ports as a side effect. The timely drop of cached N_Port_IDs prevents unnecessary slow open port attempts. While the problem might have been in code before v2.6.32 commit 799b76d09aee ("[SCSI] zfcp: Decouple gid_pn requests from erp") this fix depends on the gid_pn_work introduced with that commit, so we mark it as culprit to satisfy fix dependencies. Note: Point-to-point remote port is already handled separately and gets its N_Port_ID from the cached peer_d_id. So resetting port->d_id in general does not affect PtP. Link: https://lore.kernel.org/r/20220118165803.3667947-1-maier@linux.ibm.com Fixes: 799b76d09aee ("[SCSI] zfcp: Decouple gid_pn requests from erp") Cc: #2.6.32+ Suggested-by: Benjamin Block Reviewed-by: Benjamin Block Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_fc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index d24cafe02708..511bf8e0a436 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -521,6 +521,8 @@ static void zfcp_fc_adisc_handler(void *data) goto out; } + /* re-init to undo drop from zfcp_fc_adisc() */ + port->d_id = ntoh24(adisc_resp->adisc_port_id); /* port is good, unblock rport without going through erp */ zfcp_scsi_schedule_rport_register(port); out: @@ -534,6 +536,7 @@ static int zfcp_fc_adisc(struct zfcp_port *port) struct zfcp_fc_req *fc_req; struct zfcp_adapter *adapter = port->adapter; struct Scsi_Host *shost = adapter->scsi_host; + u32 d_id; int ret; fc_req = kmem_cache_zalloc(zfcp_fc_req_cache, GFP_ATOMIC); @@ -558,7 +561,15 @@ static int zfcp_fc_adisc(struct zfcp_port *port) fc_req->u.adisc.req.adisc_cmd = ELS_ADISC; hton24(fc_req->u.adisc.req.adisc_port_id, fc_host_port_id(shost)); - ret = zfcp_fsf_send_els(adapter, port->d_id, &fc_req->ct_els, + d_id = port->d_id; /* remember as destination for send els below */ + /* + * Force fresh GID_PN lookup on next port recovery. + * Must happen after request setup and before sending request, + * to prevent race with port->d_id re-init in zfcp_fc_adisc_handler(). + */ + port->d_id = 0; + + ret = zfcp_fsf_send_els(adapter, d_id, &fc_req->ct_els, ZFCP_FC_CTELS_TMO); if (ret) kmem_cache_free(zfcp_fc_req_cache, fc_req); From 847f9ea4c5186fdb7b84297e3eeed9e340e83fce Mon Sep 17 00:00:00 2001 From: John Meneghini Date: Fri, 14 Jan 2022 23:00:44 -0500 Subject: [PATCH 100/367] scsi: bnx2fc: Flush destroy_work queue before calling bnx2fc_interface_put() The bnx2fc_destroy() functions are removing the interface before calling destroy_work. This results multiple WARNings from sysfs_remove_group() as the controller rport device attributes are removed too early. Replace the fcoe_port's destroy_work queue. It's not needed. The problem is easily reproducible with the following steps. Example: $ dmesg -w & $ systemctl enable --now fcoe $ fipvlan -s -c ens2f1 $ fcoeadm -d ens2f1.802 [ 583.464488] host2: libfc: Link down on port (7500a1) [ 583.472651] bnx2fc: 7500a1 - rport not created Yet!! [ 583.490468] ------------[ cut here ]------------ [ 583.538725] sysfs group 'power' not found for kobject 'rport-2:0-0' [ 583.568814] WARNING: CPU: 3 PID: 192 at fs/sysfs/group.c:279 sysfs_remove_group+0x6f/0x80 [ 583.607130] Modules linked in: dm_service_time 8021q garp mrp stp llc bnx2fc cnic uio rpcsec_gss_krb5 auth_rpcgss nfsv4 ... [ 583.942994] CPU: 3 PID: 192 Comm: kworker/3:2 Kdump: loaded Not tainted 5.14.0-39.el9.x86_64 #1 [ 583.984105] Hardware name: HP ProLiant DL120 G7, BIOS J01 07/01/2013 [ 584.016535] Workqueue: fc_wq_2 fc_rport_final_delete [scsi_transport_fc] [ 584.050691] RIP: 0010:sysfs_remove_group+0x6f/0x80 [ 584.074725] Code: ff 5b 48 89 ef 5d 41 5c e9 ee c0 ff ff 48 89 ef e8 f6 b8 ff ff eb d1 49 8b 14 24 48 8b 33 48 c7 c7 ... [ 584.162586] RSP: 0018:ffffb567c15afdc0 EFLAGS: 00010282 [ 584.188225] RAX: 0000000000000000 RBX: ffffffff8eec4220 RCX: 0000000000000000 [ 584.221053] RDX: ffff8c1586ce84c0 RSI: ffff8c1586cd7cc0 RDI: ffff8c1586cd7cc0 [ 584.255089] RBP: 0000000000000000 R08: 0000000000000000 R09: ffffb567c15afc00 [ 584.287954] R10: ffffb567c15afbf8 R11: ffffffff8fbe7f28 R12: ffff8c1486326400 [ 584.322356] R13: ffff8c1486326480 R14: ffff8c1483a4a000 R15: 0000000000000004 [ 584.355379] FS: 0000000000000000(0000) GS:ffff8c1586cc0000(0000) knlGS:0000000000000000 [ 584.394419] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 584.421123] CR2: 00007fe95a6f7840 CR3: 0000000107674002 CR4: 00000000000606e0 [ 584.454888] Call Trace: [ 584.466108] device_del+0xb2/0x3e0 [ 584.481701] device_unregister+0x13/0x60 [ 584.501306] bsg_unregister_queue+0x5b/0x80 [ 584.522029] bsg_remove_queue+0x1c/0x40 [ 584.541884] fc_rport_final_delete+0xf3/0x1d0 [scsi_transport_fc] [ 584.573823] process_one_work+0x1e3/0x3b0 [ 584.592396] worker_thread+0x50/0x3b0 [ 584.609256] ? rescuer_thread+0x370/0x370 [ 584.628877] kthread+0x149/0x170 [ 584.643673] ? set_kthread_struct+0x40/0x40 [ 584.662909] ret_from_fork+0x22/0x30 [ 584.680002] ---[ end trace 53575ecefa942ece ]--- Link: https://lore.kernel.org/r/20220115040044.1013475-1-jmeneghi@redhat.com Fixes: 0cbf32e1681d ("[SCSI] bnx2fc: Avoid calling bnx2fc_if_destroy with unnecessary locks") Tested-by: Guangwu Zhang Co-developed-by: Maurizio Lombardi Signed-off-by: Maurizio Lombardi Signed-off-by: John Meneghini Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 71fa62bd3083..9be273c320e2 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -82,7 +82,7 @@ static int bnx2fc_bind_pcidev(struct bnx2fc_hba *hba); static void bnx2fc_unbind_pcidev(struct bnx2fc_hba *hba); static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, struct device *parent, int npiv); -static void bnx2fc_destroy_work(struct work_struct *work); +static void bnx2fc_port_destroy(struct fcoe_port *port); static struct bnx2fc_hba *bnx2fc_hba_lookup(struct net_device *phys_dev); static struct bnx2fc_interface *bnx2fc_interface_lookup(struct net_device @@ -907,9 +907,6 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event, __bnx2fc_destroy(interface); } mutex_unlock(&bnx2fc_dev_lock); - - /* Ensure ALL destroy work has been completed before return */ - flush_workqueue(bnx2fc_wq); return; default: @@ -1215,8 +1212,8 @@ static int bnx2fc_vport_destroy(struct fc_vport *vport) mutex_unlock(&n_port->lp_mutex); bnx2fc_free_vport(interface->hba, port->lport); bnx2fc_port_shutdown(port->lport); + bnx2fc_port_destroy(port); bnx2fc_interface_put(interface); - queue_work(bnx2fc_wq, &port->destroy_work); return 0; } @@ -1525,7 +1522,6 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, port->lport = lport; port->priv = interface; port->get_netdev = bnx2fc_netdev; - INIT_WORK(&port->destroy_work, bnx2fc_destroy_work); /* Configure fcoe_port */ rc = bnx2fc_lport_config(lport); @@ -1653,8 +1649,8 @@ static void __bnx2fc_destroy(struct bnx2fc_interface *interface) bnx2fc_interface_cleanup(interface); bnx2fc_stop(interface); list_del(&interface->list); + bnx2fc_port_destroy(port); bnx2fc_interface_put(interface); - queue_work(bnx2fc_wq, &port->destroy_work); } /** @@ -1694,15 +1690,12 @@ netdev_err: return rc; } -static void bnx2fc_destroy_work(struct work_struct *work) +static void bnx2fc_port_destroy(struct fcoe_port *port) { - struct fcoe_port *port; struct fc_lport *lport; - port = container_of(work, struct fcoe_port, destroy_work); lport = port->lport; - - BNX2FC_HBA_DBG(lport, "Entered bnx2fc_destroy_work\n"); + BNX2FC_HBA_DBG(lport, "Entered %s, destroying lport %p\n", __func__, lport); bnx2fc_if_destroy(lport); } @@ -2556,9 +2549,6 @@ static void bnx2fc_ulp_exit(struct cnic_dev *dev) __bnx2fc_destroy(interface); mutex_unlock(&bnx2fc_dev_lock); - /* Ensure ALL destroy work has been completed before return */ - flush_workqueue(bnx2fc_wq); - bnx2fc_ulp_stop(hba); /* unregister cnic device */ if (test_and_clear_bit(BNX2FC_CNIC_REGISTERED, &hba->reg_with_cnic)) From fb8d5ea8fd907faa3751a9e5df5d01b5f3803e35 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 15 Jan 2022 09:53:03 +0100 Subject: [PATCH 101/367] scsi: 3w-sas: Remove useless DMA-32 fallback configuration As stated in [1], dma_set_mask() with a 64-bit mask never fails if dev->dma_mask is non-NULL. So, if it fails, the 32-bit case will also fail for the same reason. Simplify code and remove some dead code accordingly. [1]: https://lore.kernel.org/linux-kernel/YL3vSPK5DXTNvgdx@infradead.org/#t Link: https://lore.kernel.org/r/dbbe8671ca760972d80f8d35f3170b4609bee368.1642236763.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Martin K. Petersen --- drivers/scsi/3w-sas.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c index b9482da79512..3ebe66151dcb 100644 --- a/drivers/scsi/3w-sas.c +++ b/drivers/scsi/3w-sas.c @@ -1567,8 +1567,6 @@ static int twl_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) pci_try_set_mwi(pdev); retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (retval) - retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (retval) { TW_PRINTK(host, TW_DRIVER, 0x18, "Failed to set dma mask"); retval = -ENODEV; @@ -1786,8 +1784,6 @@ static int __maybe_unused twl_resume(struct device *dev) pci_try_set_mwi(pdev); retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (retval) - retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (retval) { TW_PRINTK(host, TW_DRIVER, 0x25, "Failed to set dma mask during resume"); retval = -ENODEV; From 8001fa240fc0af1c3538a9fbaccd2c345ff9ab62 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 15 Jan 2022 10:05:22 +0100 Subject: [PATCH 102/367] scsi: hisi_sas: Remove useless DMA-32 fallback configuration As stated in [1], dma_set_mask() with a 64-bit mask never fails if dev->dma_mask is non-NULL. So, if it fails, the 32-bit case will also fail for the same reason. Simplify code and remove some dead code accordingly. [1]: https://lore.kernel.org/linux-kernel/YL3vSPK5DXTNvgdx@infradead.org/#t Link: https://lore.kernel.org/r/1bf2d3660178b0e6f172e5208bc0bd68d31d9268.1642237482.git.christophe.jaillet@wanadoo.fr Acked-by: John Garry Signed-off-by: Christophe JAILLET Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 3 --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 2 -- 2 files changed, 5 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index a05ec7aece5a..2f53a2ee024a 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2666,9 +2666,6 @@ static struct Scsi_Host *hisi_sas_shost_alloc(struct platform_device *pdev, goto err_out; error = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); - if (error) - error = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); - if (error) { dev_err(dev, "No usable DMA addressing method\n"); goto err_out; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index a45ef9a5e12e..a01a3a7b706b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -4695,8 +4695,6 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_out; rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (rc) - rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (rc) { dev_err(dev, "No usable DMA addressing method\n"); rc = -ENODEV; From 012d98dae453821ac31da25595ffa26d4ad49c8c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 15 Jan 2022 10:15:41 +0100 Subject: [PATCH 103/367] scsi: bfa: Remove useless DMA-32 fallback configuration As stated in [1], dma_set_mask() with a 64-bit mask never fails if dev->dma_mask is non-NULL. So, if it fails, the 32-bit case will also fail for the same reason. Simplify code and remove some dead code accordingly. [1]: https://lore.kernel.org/linux-kernel/YL3vSPK5DXTNvgdx@infradead.org/#t Link: https://lore.kernel.org/r/5663cef9b54004fa56cca7ce65f51eadfc3ecddb.1642238127.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfad.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index 440ef32be048..e5aa982ffedc 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -732,9 +732,6 @@ bfad_pci_init(struct pci_dev *pdev, struct bfad_s *bfad) pci_set_master(pdev); rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (rc) - rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (rc) { rc = -ENODEV; printk(KERN_ERR "dma_set_mask_and_coherent fail %p\n", pdev); @@ -1559,9 +1556,6 @@ bfad_pci_slot_reset(struct pci_dev *pdev) pci_set_master(pdev); rc = dma_set_mask_and_coherent(&bfad->pcidev->dev, DMA_BIT_MASK(64)); - if (rc) - rc = dma_set_mask_and_coherent(&bfad->pcidev->dev, - DMA_BIT_MASK(32)); if (rc) goto out_disable_device; From ad6c8a426446873febc98140d81d5353f8c0825b Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Fri, 21 Jan 2022 14:33:02 +0900 Subject: [PATCH 104/367] scsi: ufs: Use generic error code in ufshcd_set_dev_pwr_mode() The return value of ufshcd_set_dev_pwr_mode() is passed to device PM core. However, the function currently returns a SCSI result which the PM core doesn't understand. This might lead to unexpected behaviors in userland; a platform reset was observed in Android. Use a generic error code for SSU failures. Link: https://lore.kernel.org/r/1642743182-54098-1-git-send-email-kwmad.kim@samsung.com Reviewed-by: Bart Van Assche Signed-off-by: Kiwoong Kim Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 460d2b440d2e..50b12d60dc1b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8613,7 +8613,7 @@ static void ufshcd_hba_exit(struct ufs_hba *hba) * @pwr_mode: device power mode to set * * Returns 0 if requested power mode is set successfully - * Returns non-zero if failed to set the requested power mode + * Returns < 0 if failed to set the requested power mode */ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, enum ufs_dev_pwr_mode pwr_mode) @@ -8667,8 +8667,11 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, sdev_printk(KERN_WARNING, sdp, "START_STOP failed for power mode: %d, result %x\n", pwr_mode, ret); - if (ret > 0 && scsi_sense_valid(&sshdr)) - scsi_print_sense_hdr(sdp, NULL, &sshdr); + if (ret > 0) { + if (scsi_sense_valid(&sshdr)) + scsi_print_sense_hdr(sdp, NULL, &sshdr); + ret = -EIO; + } } if (!ret) From c99b9b2301492b665b6e51ba6c06ec362eddcd10 Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Fri, 21 Jan 2022 14:37:55 +0900 Subject: [PATCH 105/367] scsi: ufs: Treat link loss as fatal error This event is raised when link is lost as specified in UFSHCI spec and that means communication is not possible. Thus initializing UFS interface needs to be done. Make UFS driver considers Link Lost as fatal in the INT_FATAL_ERRORS mask. This will trigger a host reset whenever a link lost interrupt occurs. Link: https://lore.kernel.org/r/1642743475-54275-1-git-send-email-kwmad.kim@samsung.com Signed-off-by: Kiwoong Kim Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h index 6a295c88d850..a7ff0e5b5494 100644 --- a/drivers/scsi/ufs/ufshci.h +++ b/drivers/scsi/ufs/ufshci.h @@ -142,7 +142,8 @@ static inline u32 ufshci_version(u32 major, u32 minor) #define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ SYSTEM_BUS_FATAL_ERROR |\ - CRYPTO_ENGINE_FATAL_ERROR) + CRYPTO_ENGINE_FATAL_ERROR |\ + UIC_LINK_LOST) /* HCS - Host Controller Status 30h */ #define DEVICE_PRESENT 0x1 From efd7bb1d75cf6808d67c869a29245c88a990bdea Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 23 Jan 2022 17:55:30 +0000 Subject: [PATCH 106/367] scsi: 53c700: Remove redundant assignment to pointer SCp Pointer SCp is being re-assigned the same value that it was initialized to a few lines earlier, the assignment is redundant and can be removed. Link: https://lore.kernel.org/r/20220123175530.110462-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/53c700.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 3ad3ebaca8e9..ad4972c0fc53 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -1507,7 +1507,6 @@ NCR_700_intr(int irq, void *dev_id) struct scsi_cmnd *SCp = hostdata->cmd; handled = 1; - SCp = hostdata->cmd; if(istat & SCSI_INT_PENDING) { udelay(10); From 4db09593af0b0b4d7d4805ebb3273df51d7cc30d Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 23 Jan 2022 14:57:17 -0800 Subject: [PATCH 107/367] scsi: myrs: Fix crash in error case In myrs_detect(), cs->disable_intr is NULL when privdata->hw_init() fails with non-zero. In this case, myrs_cleanup(cs) will call a NULL ptr and crash the kernel. [ 1.105606] myrs 0000:00:03.0: Unknown Initialization Error 5A [ 1.105872] myrs 0000:00:03.0: Failed to initialize Controller [ 1.106082] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 1.110774] Call Trace: [ 1.110950] myrs_cleanup+0xe4/0x150 [myrs] [ 1.111135] myrs_probe.cold+0x91/0x56a [myrs] [ 1.111302] ? DAC960_GEM_intr_handler+0x1f0/0x1f0 [myrs] [ 1.111500] local_pci_probe+0x48/0x90 Link: https://lore.kernel.org/r/20220123225717.1069538-1-ztong0001@gmail.com Reviewed-by: Hannes Reinecke Signed-off-by: Tong Zhang Signed-off-by: Martin K. Petersen --- drivers/scsi/myrs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index 253ceca54a84..7eb8c39da366 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -2267,7 +2267,8 @@ static void myrs_cleanup(struct myrs_hba *cs) myrs_unmap(cs); if (cs->mmio_base) { - cs->disable_intr(cs); + if (cs->disable_intr) + cs->disable_intr(cs); iounmap(cs->mmio_base); cs->mmio_base = NULL; } From 5ec1cebd59300ddd26dbaa96c17c508764eef911 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Mon, 4 Oct 2021 04:59:13 -0700 Subject: [PATCH 108/367] drm/atomic: Add the crtc to affected crtc only if uapi.enable = true MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case of a modeset where a mode gets split across multiple CRTCs in the driver specific implementation (bigjoiner in i915) we wrongly count the affected CRTCs based on the drm_crtc_mask and indicate the stolen CRTC as an affected CRTC in atomic_check_only(). This triggers a warning since affected CRTCs doent match requested CRTC. To fix this in such bigjoiner configurations, we should only increment affected crtcs if that CRTC is enabled in UAPI not if it is just used internally in the driver to split the mode. v3: Add the same uapi crtc_state->enable check in requested crtc calc (Ville) Cc: Ville Syrjälä Cc: Simon Ser Cc: Pekka Paalanen Cc: Daniel Stone Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: # v5.11+ Fixes: 919c2299a893 ("drm/i915: Enable bigjoiner") Signed-off-by: Manasi Navare Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20211004115913.23889-1-manasi.d.navare@intel.com --- drivers/gpu/drm/drm_atomic.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index ff1416cd609a..a1e4c7905ebb 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1310,8 +1310,10 @@ int drm_atomic_check_only(struct drm_atomic_state *state) DRM_DEBUG_ATOMIC("checking %p\n", state); - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) - requested_crtc |= drm_crtc_mask(crtc); + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + if (new_crtc_state->enable) + requested_crtc |= drm_crtc_mask(crtc); + } for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { ret = drm_atomic_plane_check(old_plane_state, new_plane_state); @@ -1360,8 +1362,10 @@ int drm_atomic_check_only(struct drm_atomic_state *state) } } - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) - affected_crtc |= drm_crtc_mask(crtc); + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + if (new_crtc_state->enable) + affected_crtc |= drm_crtc_mask(crtc); + } /* * For commits that allow modesets drivers can add other CRTCs to the From 22f7ff0dea9491e90b6fe808ed40c30bd791e5c2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 22 Jan 2022 20:55:30 +1000 Subject: [PATCH 109/367] KVM: PPC: Book3S HV Nested: Fix nested HFSCR being clobbered with multiple vCPUs The L0 is storing HFSCR requested by the L1 for the L2 in struct kvm_nested_guest when the L1 requests a vCPU enter L2. kvm_nested_guest is not a per-vCPU structure. Hilarity ensues. Fix it by moving the nested hfscr into the vCPU structure together with the other per-vCPU nested fields. Fixes: 8b210a880b35 ("KVM: PPC: Book3S HV Nested: Make nested HFSCR state accessible") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Nicholas Piggin Reviewed-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220122105530.3477250-1-npiggin@gmail.com --- arch/powerpc/include/asm/kvm_book3s_64.h | 1 - arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kvm/book3s_hv.c | 3 +-- arch/powerpc/kvm/book3s_hv_nested.c | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fe07558173ef..827038a33064 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -39,7 +39,6 @@ struct kvm_nested_guest { pgd_t *shadow_pgtable; /* our page table for this guest */ u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */ u64 process_table; /* process table entry for this guest */ - u64 hfscr; /* HFSCR that the L1 requested for this nested guest */ long refcnt; /* number of pointers to this struct */ struct mutex tlb_lock; /* serialize page faults and tlbies */ struct kvm_nested_guest *next; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index a770443cd6e0..d9bf60bf0816 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -818,6 +818,7 @@ struct kvm_vcpu_arch { /* For support of nested guests */ struct kvm_nested_guest *nested; + u64 nested_hfscr; /* HFSCR that the L1 requested for the nested guest */ u32 nested_vcpu_id; gpa_t nested_io_gpr; #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index d1817cd9a691..84c89f08ae9a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1816,7 +1816,6 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) { - struct kvm_nested_guest *nested = vcpu->arch.nested; int r; int srcu_idx; @@ -1922,7 +1921,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) * it into a HEAI. */ if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) || - (nested->hfscr & (1UL << cause))) { + (vcpu->arch.nested_hfscr & (1UL << cause))) { vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST; /* diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 8f8daaeeb3b7..9d373f8963ee 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -363,7 +363,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) /* set L1 state to L2 state */ vcpu->arch.nested = l2; vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; - l2->hfscr = l2_hv.hfscr; + vcpu->arch.nested_hfscr = l2_hv.hfscr; vcpu->arch.regs = l2_regs; /* Guest must always run with ME enabled, HV disabled. */ From 8defc2a5dd8f4c0cb19ecbaca8d3e89ab98524da Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 25 Jan 2022 00:39:28 +1000 Subject: [PATCH 110/367] powerpc/64s/interrupt: Fix decrementer storm The decrementer exception can fail to be cleared when the interrupt returns in the case where the decrementer wraps with the next timer still beyond decrementer_max. This results in a decrementer interrupt storm. This is triggerable with small decrementer system with hard and soft watchdogs disabled. Fix this by always programming the decrementer if there was no timer. Fixes: 0faf20a1ad16 ("powerpc/64s/interrupt: Don't enable MSR[EE] in irq handlers unless perf is in use") Reported-by: Alexey Kardashevskiy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220124143930.3923442-1-npiggin@gmail.com --- arch/powerpc/kernel/time.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 62361cc7281c..cd0b8b71ecdd 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -649,8 +649,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; - if (now <= decrementer_max) - set_dec_or_work(now); + if (now > decrementer_max) + now = decrementer_max; + set_dec_or_work(now); __this_cpu_inc(irq_stat.timer_irqs_others); } From 5c89be1dd5cfb697614bc13626ba3bd0781aa160 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 24 Jan 2022 11:36:05 +0100 Subject: [PATCH 111/367] KVM: x86: Move CPUID.(EAX=0x12,ECX=1) mangling to __kvm_update_cpuid_runtime() Full equality check of CPUID data on update (kvm_cpuid_check_equal()) may fail for SGX enabled CPUs as CPUID.(EAX=0x12,ECX=1) is currently being mangled in kvm_vcpu_after_set_cpuid(). Move it to __kvm_update_cpuid_runtime() and split off cpuid_get_supported_xcr0() helper as 'vcpu->arch.guest_supported_xcr0' update needs (logically) to stay in kvm_vcpu_after_set_cpuid(). Cc: stable@vger.kernel.org Fixes: feb627e8d6f6 ("KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN") Signed-off-by: Vitaly Kuznetsov Message-Id: <20220124103606.2630588-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 54 +++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 3902c28fb6cb..89d7822a8f5b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -196,10 +196,26 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) vcpu->arch.pv_cpuid.features = best->eax; } +/* + * Calculate guest's supported XCR0 taking into account guest CPUID data and + * supported_xcr0 (comprised of host configuration and KVM_SUPPORTED_XCR0). + */ +static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent) +{ + struct kvm_cpuid_entry2 *best; + + best = cpuid_entry2_find(entries, nent, 0xd, 0); + if (!best) + return 0; + + return (best->eax | ((u64)best->edx << 32)) & supported_xcr0; +} + static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries, int nent) { struct kvm_cpuid_entry2 *best; + u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent); best = cpuid_entry2_find(entries, nent, 1, 0); if (best) { @@ -238,6 +254,21 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT); } + + /* + * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate + * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's + * requested XCR0 value. The enclave's XFRM must be a subset of XCRO + * at the time of EENTER, thus adjust the allowed XFRM by the guest's + * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to + * '1' even on CPUs that don't support XSAVE. + */ + best = cpuid_entry2_find(entries, nent, 0x12, 0x1); + if (best) { + best->ecx &= guest_supported_xcr0 & 0xffffffff; + best->edx &= guest_supported_xcr0 >> 32; + best->ecx |= XFEATURE_MASK_FPSSE; + } } void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) @@ -261,27 +292,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_apic_set_version(vcpu); } - best = kvm_find_cpuid_entry(vcpu, 0xD, 0); - if (!best) - vcpu->arch.guest_supported_xcr0 = 0; - else - vcpu->arch.guest_supported_xcr0 = - (best->eax | ((u64)best->edx << 32)) & supported_xcr0; - - /* - * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate - * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's - * requested XCR0 value. The enclave's XFRM must be a subset of XCRO - * at the time of EENTER, thus adjust the allowed XFRM by the guest's - * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to - * '1' even on CPUs that don't support XSAVE. - */ - best = kvm_find_cpuid_entry(vcpu, 0x12, 0x1); - if (best) { - best->ecx &= vcpu->arch.guest_supported_xcr0 & 0xffffffff; - best->edx &= vcpu->arch.guest_supported_xcr0 >> 32; - best->ecx |= XFEATURE_MASK_FPSSE; - } + vcpu->arch.guest_supported_xcr0 = + cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); kvm_update_pv_runtime(vcpu); From b9bed78e2fa9571b7c983b20666efa0009030c71 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 00:06:24 +0000 Subject: [PATCH 112/367] KVM: VMX: Set vmcs.PENDING_DBG.BS on #DB in STI/MOVSS blocking shadow Set vmcs.GUEST_PENDING_DBG_EXCEPTIONS.BS, a.k.a. the pending single-step breakpoint flag, when re-injecting a #DB with RFLAGS.TF=1, and STI or MOVSS blocking is active. Setting the flag is necessary to make VM-Entry consistency checks happy, as VMX has an invariant that if RFLAGS.TF is set and STI/MOVSS blocking is true, then the previous instruction must have been STI or MOV/POP, and therefore a single-step #DB must be pending since the RFLAGS.TF cannot have been set by the previous instruction, i.e. the one instruction delay after setting RFLAGS.TF must have already expired. Normally, the CPU sets vmcs.GUEST_PENDING_DBG_EXCEPTIONS.BS appropriately when recording guest state as part of a VM-Exit, but #DB VM-Exits intentionally do not treat the #DB as "guest state" as interception of the #DB effectively makes the #DB host-owned, thus KVM needs to manually set PENDING_DBG.BS when forwarding/re-injecting the #DB to the guest. Note, although this bug can be triggered by guest userspace, doing so requires IOPL=3, and guest userspace running with IOPL=3 has full access to all I/O ports (from the guest's perspective) and can crash/reboot the guest any number of ways. IOPL=3 is required because STI blocking kicks in if and only if RFLAGS.IF is toggled 0=>1, and if CPL>IOPL, STI either takes a #GP or modifies RFLAGS.VIF, not RFLAGS.IF. MOVSS blocking can be initiated by userspace, but can be coincident with a #DB if and only if DR7.GD=1 (General Detect enabled) and a MOV DR is executed in the MOVSS shadow. MOV DR #GPs at CPL>0, thus MOVSS blocking is problematic only for CPL0 (and only if the guest is crazy enough to access a DR in a MOVSS shadow). All other sources of #DBs are either suppressed by MOVSS blocking (single-step, code fetch, data, and I/O), are mutually exclusive with MOVSS blocking (T-bit task switch), or are already handled by KVM (ICEBP, a.k.a. INT1). This bug was originally found by running tests[1] created for XSA-308[2]. Note that Xen's userspace test emits ICEBP in the MOVSS shadow, which is presumably why the Xen bug was deemed to be an exploitable DOS from guest userspace. KVM already handles ICEBP by skipping the ICEBP instruction and thus clears MOVSS blocking as a side effect of its "emulation". [1] http://xenbits.xenproject.org/docs/xtf/xsa-308_2main_8c_source.html [2] https://xenbits.xen.org/xsa/advisory-308.html Reported-by: David Woodhouse Reported-by: Alexander Graf Signed-off-by: Sean Christopherson Message-Id: <20220120000624.655815-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 705e5c082738..2833b095a804 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4905,8 +4905,33 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) dr6 = vmx_get_exit_qual(vcpu); if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { + /* + * If the #DB was due to ICEBP, a.k.a. INT1, skip the + * instruction. ICEBP generates a trap-like #DB, but + * despite its interception control being tied to #DB, + * is an instruction intercept, i.e. the VM-Exit occurs + * on the ICEBP itself. Note, skipping ICEBP also + * clears STI and MOVSS blocking. + * + * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS + * if single-step is enabled in RFLAGS and STI or MOVSS + * blocking is active, as the CPU doesn't set the bit + * on VM-Exit due to #DB interception. VM-Entry has a + * consistency check that a single-step #DB is pending + * in this scenario as the previous instruction cannot + * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV + * don't modify RFLAGS), therefore the one instruction + * delay when activating single-step breakpoints must + * have already expired. Note, the CPU sets/clears BS + * as appropriate for all other VM-Exits types. + */ if (is_icebp(intr_info)) WARN_ON(!skip_emulated_instruction(vcpu)); + else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) && + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS))) + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS); kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); return 1; From de1956f48543e90f94b1194395f33140898b39b2 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Thu, 20 Jan 2022 00:38:26 +0000 Subject: [PATCH 113/367] KVM: selftests: Re-enable access_tracking_perf_test This selftest was accidentally removed by commit 6a58150859fd ("selftest: KVM: Add intra host migration tests"). Add it back. Fixes: 6a58150859fd ("selftest: KVM: Add intra host migration tests") Signed-off-by: David Matlack Message-Id: <20220120003826.2805036-1-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 81ebf99d6ff0..0e4926bc9a58 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -85,6 +85,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests TEST_GEN_PROGS_x86_64 += x86_64/amx_test +TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test From d081a343dd18f6733f2f4d9a2521db92de9f7b75 Mon Sep 17 00:00:00 2001 From: Quanfa Fu Date: Sun, 19 Dec 2021 17:14:46 +0800 Subject: [PATCH 114/367] KVM/X86: Make kvm_vcpu_reload_apic_access_page() static Make kvm_vcpu_reload_apic_access_page() static as it is no longer invoked directly by vmx and it is also no longer exported. No functional change intended. Signed-off-by: Quanfa Fu Message-Id: <20211219091446.174584-1-quanfafu@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/x86.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 682ad02a4e58..9b2fffeeab16 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1856,7 +1856,6 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); -void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long ipi_bitmap_high, u32 min, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 55518b7d3b96..9ceeab1e5bdc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9753,7 +9753,7 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); } -void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) +static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { if (!lapic_in_kernel(vcpu)) return; From 167a668ab0edf92bfd043bafd24e7f895d074173 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 8 Jan 2022 10:09:10 -0800 Subject: [PATCH 115/367] drm/msm/gpu: Wait for idle before suspending System suspend uses pm_runtime_force_suspend(), which cheekily bypasses the runpm reference counts. This doesn't actually work so well when the GPU is active. So add a reasonable delay waiting for the GPU to become idle. Alternatively we could just return -EBUSY in this case, but that has the disadvantage of causing system suspend to fail. v2: s/ret/remaining [sboyd], and switch to using active_submits count to ensure we aren't racing with submit cleanup (and devfreq idle work getting scheduled, etc) v3: fix inverted logic Signed-off-by: Rob Clark Reviewed-by: Bjorn Andersson Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220108180913.814448-2-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_device.c | 18 ++++++++++++++++++ drivers/gpu/drm/msm/msm_gpu.c | 3 +++ drivers/gpu/drm/msm/msm_gpu.h | 3 +++ 3 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 93005839b5da..fb261930ad1c 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -608,9 +608,27 @@ static int adreno_resume(struct device *dev) return gpu->funcs->pm_resume(gpu); } +static int active_submits(struct msm_gpu *gpu) +{ + int active_submits; + mutex_lock(&gpu->active_lock); + active_submits = gpu->active_submits; + mutex_unlock(&gpu->active_lock); + return active_submits; +} + static int adreno_suspend(struct device *dev) { struct msm_gpu *gpu = dev_to_gpu(dev); + int remaining; + + remaining = wait_event_timeout(gpu->retire_event, + active_submits(gpu) == 0, + msecs_to_jiffies(1000)); + if (remaining == 0) { + dev_err(dev, "Timeout waiting for GPU to suspend\n"); + return -EBUSY; + } return gpu->funcs->pm_suspend(gpu); } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 0f78c2615272..2c1049c0ea14 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -703,6 +703,8 @@ static void retire_submits(struct msm_gpu *gpu) } } } + + wake_up_all(&gpu->retire_event); } static void retire_worker(struct kthread_work *work) @@ -848,6 +850,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, INIT_LIST_HEAD(&gpu->active_list); mutex_init(&gpu->active_lock); mutex_init(&gpu->lock); + init_waitqueue_head(&gpu->retire_event); kthread_init_work(&gpu->retire_work, retire_worker); kthread_init_work(&gpu->recover_work, recover_worker); kthread_init_work(&gpu->fault_work, fault_worker); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 445c6bfd4b6b..92aa1e9196c6 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -230,6 +230,9 @@ struct msm_gpu { /* work for handling GPU recovery: */ struct kthread_work recover_work; + /** retire_event: notified when submits are retired: */ + wait_queue_head_t retire_event; + /* work for handling active-list retiring: */ struct kthread_work retire_work; From 6aa89ae1fb049614b7e03e24485bbfb96754a02b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 8 Jan 2022 10:09:11 -0800 Subject: [PATCH 116/367] drm/msm/gpu: Cancel idle/boost work on suspend With system suspend using pm_runtime_force_suspend() we can't rely on the pm_runtime_get_if_in_use() trick to deal with devfreq callbacks after (or racing with) suspend. So flush any pending idle or boost work in the suspend path. Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20220108180913.814448-3-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu_devfreq.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 62405e980925..9bf319be11f6 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -133,6 +133,18 @@ void msm_devfreq_init(struct msm_gpu *gpu) CLOCK_MONOTONIC, HRTIMER_MODE_REL); } +static void cancel_idle_work(struct msm_gpu_devfreq *df) +{ + hrtimer_cancel(&df->idle_work.timer); + kthread_cancel_work_sync(&df->idle_work.work); +} + +static void cancel_boost_work(struct msm_gpu_devfreq *df) +{ + hrtimer_cancel(&df->boost_work.timer); + kthread_cancel_work_sync(&df->boost_work.work); +} + void msm_devfreq_cleanup(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; @@ -152,7 +164,12 @@ void msm_devfreq_resume(struct msm_gpu *gpu) void msm_devfreq_suspend(struct msm_gpu *gpu) { - devfreq_suspend_device(gpu->devfreq.devfreq); + struct msm_gpu_devfreq *df = &gpu->devfreq; + + devfreq_suspend_device(df->devfreq); + + cancel_idle_work(df); + cancel_boost_work(df); } static void msm_devfreq_boost_work(struct kthread_work *work) @@ -196,7 +213,7 @@ void msm_devfreq_active(struct msm_gpu *gpu) /* * Cancel any pending transition to idle frequency: */ - hrtimer_cancel(&df->idle_work.timer); + cancel_idle_work(df); idle_time = ktime_to_ms(ktime_sub(ktime_get(), df->idle_time)); From c9d967b2ce40d71e968eb839f36c936b8a9cf1ea Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 13 Jan 2022 19:44:20 +0100 Subject: [PATCH 117/367] PM: wakeup: simplify the output logic of pm_show_wakelocks() The buffer handling in pm_show_wakelocks() is tricky, and hopefully correct. Ensure it really is correct by using sysfs_emit_at() which handles all of the tricky string handling logic in a PAGE_SIZE buffer for us automatically as this is a sysfs file being read from. Signed-off-by: Greg Kroah-Hartman Reviewed-by: Lee Jones Signed-off-by: Rafael J. Wysocki --- kernel/power/wakelock.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index 105df4dfc783..52571dcad768 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -39,23 +39,20 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active) { struct rb_node *node; struct wakelock *wl; - char *str = buf; - char *end = buf + PAGE_SIZE; + int len = 0; mutex_lock(&wakelocks_lock); for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) { wl = rb_entry(node, struct wakelock, node); if (wl->ws->active == show_active) - str += scnprintf(str, end - str, "%s ", wl->name); + len += sysfs_emit_at(buf, len, "%s ", wl->name); } - if (str > buf) - str--; - str += scnprintf(str, end - str, "\n"); + len += sysfs_emit_at(buf, len, "\n"); mutex_unlock(&wakelocks_lock); - return (str - buf); + return len; } #if CONFIG_PM_WAKELOCKS_LIMIT > 0 From 945c37ed564770c78dfe6b9f08bed57a1b4e60ef Mon Sep 17 00:00:00 2001 From: Linyu Yuan Date: Mon, 10 Jan 2022 20:43:28 +0800 Subject: [PATCH 118/367] usb: roles: fix include/linux/usb/role.h compile issue when CONFIG_USB_ROLE_SWITCH is not defined, add usb_role_switch_find_by_fwnode() definition which return NULL. Fixes: c6919d5e0cd1 ("usb: roles: Add usb_role_switch_find_by_fwnode()") Signed-off-by: Linyu Yuan Link: https://lore.kernel.org/r/1641818608-25039-1-git-send-email-quic_linyyuan@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/role.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h index 031f148ab373..b5deafd91f67 100644 --- a/include/linux/usb/role.h +++ b/include/linux/usb/role.h @@ -91,6 +91,12 @@ fwnode_usb_role_switch_get(struct fwnode_handle *node) static inline void usb_role_switch_put(struct usb_role_switch *sw) { } +static inline struct usb_role_switch * +usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) +{ + return NULL; +} + static inline struct usb_role_switch * usb_role_switch_register(struct device *parent, const struct usb_role_switch_desc *desc) From 33569ef3c754a82010f266b7b938a66a3ccf90a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Wed, 19 Jan 2022 11:47:51 +0100 Subject: [PATCH 119/367] PM: hibernate: Remove register_nosave_region_late() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is an unused wrapper forcing kmalloc allocation for registering nosave regions. Also, rename __register_nosave_region() to register_nosave_region() now that there is no need for disambiguation. Signed-off-by: Amadeusz Sławiński Reviewed-by: Cezary Rojewski Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 11 +---------- kernel/power/snapshot.c | 21 +++++++-------------- 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 5785d909c321..3e8ecdebe601 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -430,15 +430,7 @@ struct platform_hibernation_ops { #ifdef CONFIG_HIBERNATION /* kernel/power/snapshot.c */ -extern void __register_nosave_region(unsigned long b, unsigned long e, int km); -static inline void __init register_nosave_region(unsigned long b, unsigned long e) -{ - __register_nosave_region(b, e, 0); -} -static inline void __init register_nosave_region_late(unsigned long b, unsigned long e) -{ - __register_nosave_region(b, e, 1); -} +extern void register_nosave_region(unsigned long b, unsigned long e); extern int swsusp_page_is_forbidden(struct page *); extern void swsusp_set_page_free(struct page *); extern void swsusp_unset_page_free(struct page *); @@ -458,7 +450,6 @@ int pfn_is_nosave(unsigned long pfn); int hibernate_quiet_exec(int (*func)(void *data), void *data); #else /* CONFIG_HIBERNATION */ static inline void register_nosave_region(unsigned long b, unsigned long e) {} -static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} static inline void swsusp_unset_page_free(struct page *p) {} diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f7a986078213..330d49937692 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -978,8 +978,7 @@ static void memory_bm_recycle(struct memory_bitmap *bm) * Register a range of page frames the contents of which should not be saved * during hibernation (to be used in the early initialization code). */ -void __init __register_nosave_region(unsigned long start_pfn, - unsigned long end_pfn, int use_kmalloc) +void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) { struct nosave_region *region; @@ -995,18 +994,12 @@ void __init __register_nosave_region(unsigned long start_pfn, goto Report; } } - if (use_kmalloc) { - /* During init, this shouldn't fail */ - region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); - BUG_ON(!region); - } else { - /* This allocation cannot fail */ - region = memblock_alloc(sizeof(struct nosave_region), - SMP_CACHE_BYTES); - if (!region) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct nosave_region)); - } + /* This allocation cannot fail */ + region = memblock_alloc(sizeof(struct nosave_region), + SMP_CACHE_BYTES); + if (!region) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(struct nosave_region)); region->start_pfn = start_pfn; region->end_pfn = end_pfn; list_add_tail(®ion->list, &nosave_regions); From 5638b0dfb6921f69943c705383ff40fb64b987f2 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 13 Jan 2022 17:29:43 +0800 Subject: [PATCH 120/367] usb: typec: tcpci: don't touch CC line if it's Vconn source With the AMS and Collision Avoidance, tcpm often needs to change the CC's termination. When one CC line is sourcing Vconn, if we still change its termination, the voltage of the another CC line is likely to be fluctuant and unstable. Therefore, we should verify whether a CC line is sourcing Vconn before changing its termination and only change the termination that is not a Vconn line. This can be done by reading the Vconn Present bit of POWER_ STATUS register. To determine the polarity, we can read the Plug Orientation bit of TCPC_CONTROL register. Since Vconn can only be sourced if Plug Orientation is set. Fixes: 0908c5aca31e ("usb: typec: tcpm: AMS and Collision Avoidance") cc: Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20220113092943.752372-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci.c | 26 ++++++++++++++++++++++++++ drivers/usb/typec/tcpm/tcpci.h | 1 + 2 files changed, 27 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index 35a1307349a2..e07d26a3cd8e 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -75,9 +75,25 @@ static int tcpci_write16(struct tcpci *tcpci, unsigned int reg, u16 val) static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc) { struct tcpci *tcpci = tcpc_to_tcpci(tcpc); + bool vconn_pres; + enum typec_cc_polarity polarity = TYPEC_POLARITY_CC1; unsigned int reg; int ret; + ret = regmap_read(tcpci->regmap, TCPC_POWER_STATUS, ®); + if (ret < 0) + return ret; + + vconn_pres = !!(reg & TCPC_POWER_STATUS_VCONN_PRES); + if (vconn_pres) { + ret = regmap_read(tcpci->regmap, TCPC_TCPC_CTRL, ®); + if (ret < 0) + return ret; + + if (reg & TCPC_TCPC_CTRL_ORIENTATION) + polarity = TYPEC_POLARITY_CC2; + } + switch (cc) { case TYPEC_CC_RA: reg = (TCPC_ROLE_CTRL_CC_RA << TCPC_ROLE_CTRL_CC1_SHIFT) | @@ -112,6 +128,16 @@ static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc) break; } + if (vconn_pres) { + if (polarity == TYPEC_POLARITY_CC2) { + reg &= ~(TCPC_ROLE_CTRL_CC1_MASK << TCPC_ROLE_CTRL_CC1_SHIFT); + reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC1_SHIFT); + } else { + reg &= ~(TCPC_ROLE_CTRL_CC2_MASK << TCPC_ROLE_CTRL_CC2_SHIFT); + reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC2_SHIFT); + } + } + ret = regmap_write(tcpci->regmap, TCPC_ROLE_CTRL, reg); if (ret < 0) return ret; diff --git a/drivers/usb/typec/tcpm/tcpci.h b/drivers/usb/typec/tcpm/tcpci.h index 2be7a77d400e..b2edd45f13c6 100644 --- a/drivers/usb/typec/tcpm/tcpci.h +++ b/drivers/usb/typec/tcpm/tcpci.h @@ -98,6 +98,7 @@ #define TCPC_POWER_STATUS_SOURCING_VBUS BIT(4) #define TCPC_POWER_STATUS_VBUS_DET BIT(3) #define TCPC_POWER_STATUS_VBUS_PRES BIT(2) +#define TCPC_POWER_STATUS_VCONN_PRES BIT(1) #define TCPC_POWER_STATUS_SINKING_VBUS BIT(0) #define TCPC_FAULT_STATUS 0x1f From 7817adb03cfb52ebb5bdb25fd9fc8f683a1a09d9 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 24 Jan 2022 12:02:27 +0300 Subject: [PATCH 121/367] usb: typec: Only attempt to link USB ports if there is fwnode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code that creates the links to the USB ports attached to a connector inside the system assumed that the ACPI nodes (fwnodes) always exist for the connectors, but it can not do that. There is no guarantee that every USB Type-C connector has ACPI device node representing it in the ACPI tables, and even if there are the nodes in the ACPI tables, the _STA method in those nodes may still return 0 (which means the device does not exist from ACPI PoW). This fixes NULL pointer dereference that happens if the nodes are missing. Fixes: 730b49aac426 ("usb: typec: port-mapper: Convert to the component framework") Reported-and-tested-by: Robert Święcki Reported-by: Mikhail Gavrilov Tested-by: Marc Zyngier Acked-by: Marc Zyngier Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20220124090228.41396-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/port-mapper.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/port-mapper.c b/drivers/usb/typec/port-mapper.c index 07d307418b47..b6e0c6acc628 100644 --- a/drivers/usb/typec/port-mapper.c +++ b/drivers/usb/typec/port-mapper.c @@ -56,6 +56,9 @@ int typec_link_ports(struct typec_port *con) { struct each_port_arg arg = { .port = con, .match = NULL }; + if (!has_acpi_companion(&con->dev)) + return 0; + bus_for_each_dev(&acpi_bus_type, NULL, &arg, typec_port_match); /* @@ -74,5 +77,6 @@ int typec_link_ports(struct typec_port *con) void typec_unlink_ports(struct typec_port *con) { - component_master_del(&con->dev, &typec_aggregate_ops); + if (has_acpi_companion(&con->dev)) + component_master_del(&con->dev, &typec_aggregate_ops); } From 147ab5376f18045da9f22a8262185707745bbf77 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 24 Jan 2022 12:02:28 +0300 Subject: [PATCH 122/367] usb: typec: Don't try to register component master without components This fixes NULL pointer dereference that happens if component master is registered with empty component match list. Fixes: 730b49aac426 ("usb: typec: port-mapper: Convert to the component framework") Reported-by: Mikhail Gavrilov Tested-by: John Stultz Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20220124090228.41396-3-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/port-mapper.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/port-mapper.c b/drivers/usb/typec/port-mapper.c index b6e0c6acc628..a7d507802509 100644 --- a/drivers/usb/typec/port-mapper.c +++ b/drivers/usb/typec/port-mapper.c @@ -60,6 +60,8 @@ int typec_link_ports(struct typec_port *con) return 0; bus_for_each_dev(&acpi_bus_type, NULL, &arg, typec_port_match); + if (!arg.match) + return 0; /* * REVISIT: Now each connector can have only a single component master. From e464121f2d40eabc7d11823fb26db807ce945df4 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 21 Jan 2022 09:47:38 -0800 Subject: [PATCH 123/367] x86/cpu: Add Xeon Icelake-D to list of CPUs that support PPIN Missed adding the Icelake-D CPU to the list. It uses the same MSRs to control and read the inventory number as all the other models. Fixes: dc6b025de95b ("x86/mce: Add Xeon Icelake to list of CPUs that support PPIN") Reported-by: Ailin Xu Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: Link: https://lore.kernel.org/r/20220121174743.1875294-2-tony.luck@intel.com --- arch/x86/kernel/cpu/mce/intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index bb9a46a804bf..baafbb37be67 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -486,6 +486,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_ICELAKE_D: case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_XEON_PHI_KNL: case INTEL_FAM6_XEON_PHI_KNM: From 90b8aa9f5b09edae6928c0561f933fec9f7a9987 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 21 Jan 2022 17:55:19 -0800 Subject: [PATCH 124/367] usb: typec: tcpm: Do not disconnect while receiving VBUS off With some chargers, vbus might momentarily raise above VSAFE5V and fall back to 0V before tcpm gets to read port->tcpc->get_vbus. This will will report a VBUS off event causing TCPM to transition to SNK_UNATTACHED where it should be waiting in either SNK_ATTACH_WAIT or SNK_DEBOUNCED state. This patch makes TCPM avoid vbus off events while in SNK_ATTACH_WAIT or SNK_DEBOUNCED state. Stub from the spec: "4.5.2.2.4.2 Exiting from AttachWait.SNK State A Sink shall transition to Unattached.SNK when the state of both the CC1 and CC2 pins is SNK.Open for at least tPDDebounce. A DRP shall transition to Unattached.SRC when the state of both the CC1 and CC2 pins is SNK.Open for at least tPDDebounce." [23.194131] CC1: 0 -> 0, CC2: 0 -> 5 [state SNK_UNATTACHED, polarity 0, connected] [23.201777] state change SNK_UNATTACHED -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [23.209949] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS] [23.300579] VBUS off [23.300668] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [rev3 NONE_AMS] [23.301014] VBUS VSAFE0V [23.301111] Start toggling Fixes: f0690a25a140b8 ("staging: typec: USB Type-C Port Manager (tcpm)") Cc: stable@vger.kernel.org Acked-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20220122015520.332507-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 59d4fa2443f2..b8afe3d8c882 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5156,7 +5156,8 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port) case SNK_TRYWAIT_DEBOUNCE: break; case SNK_ATTACH_WAIT: - tcpm_set_state(port, SNK_UNATTACHED, 0); + case SNK_DEBOUNCED: + /* Do nothing, as TCPM is still waiting for vbus to reaach VSAFE5V to connect */ break; case SNK_NEGOTIATE_CAPABILITIES: From 746f96e7d6f7a276726860f696671766bfb24cf0 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 21 Jan 2022 17:55:20 -0800 Subject: [PATCH 125/367] usb: typec: tcpm: Do not disconnect when receiving VSAFE0V With some chargers, vbus might momentarily raise above VSAFE5V and fall back to 0V causing VSAFE0V to be triggered. This will will report a VBUS off event causing TCPM to transition to SNK_UNATTACHED state where it should be waiting in either SNK_ATTACH_WAIT or SNK_DEBOUNCED state. This patch makes TCPM avoid VSAFE0V events while in SNK_ATTACH_WAIT or SNK_DEBOUNCED state. Stub from the spec: "4.5.2.2.4.2 Exiting from AttachWait.SNK State A Sink shall transition to Unattached.SNK when the state of both the CC1 and CC2 pins is SNK.Open for at least tPDDebounce. A DRP shall transition to Unattached.SRC when the state of both the CC1 and CC2 pins is SNK.Open for at least tPDDebounce." [23.194131] CC1: 0 -> 0, CC2: 0 -> 5 [state SNK_UNATTACHED, polarity 0, connected] [23.201777] state change SNK_UNATTACHED -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [23.209949] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS] [23.300579] VBUS off [23.300668] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [rev3 NONE_AMS] [23.301014] VBUS VSAFE0V [23.301111] Start toggling Fixes: 28b43d3d746b8 ("usb: typec: tcpm: Introduce vsafe0v for vbus") Cc: stable@vger.kernel.org Acked-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20220122015520.332507-2-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index b8afe3d8c882..5fce795b69c7 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5264,6 +5264,10 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port) case PR_SWAP_SNK_SRC_SOURCE_ON: /* Do nothing, vsafe0v is expected during transition */ break; + case SNK_ATTACH_WAIT: + case SNK_DEBOUNCED: + /*Do nothing, still waiting for VSAFE5V for connect */ + break; default: if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled) tcpm_set_state(port, SNK_UNATTACHED, 0); From 5b67b315037250a61861119683e7fcb509deea25 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 24 Jan 2022 15:14:40 -0500 Subject: [PATCH 126/367] usb-storage: Add unusual-devs entry for VL817 USB-SATA bridge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two people have reported (and mentioned numerous other reports on the web) that VIA's VL817 USB-SATA bridge does not work with the uas driver. Typical log messages are: [ 3606.232149] sd 14:0:0:0: [sdg] tag#2 uas_zap_pending 0 uas-tag 1 inflight: CMD [ 3606.232154] sd 14:0:0:0: [sdg] tag#2 CDB: Write(16) 8a 00 00 00 00 00 18 0c c9 80 00 00 00 80 00 00 [ 3606.306257] usb 4-4.4: reset SuperSpeed Plus Gen 2x1 USB device number 11 using xhci_hcd [ 3606.328584] scsi host14: uas_eh_device_reset_handler success Surprisingly, the devices do seem to work okay for some other people. The cause of the differing behaviors is not known. In the hope of getting the devices to work for the most users, even at the possible cost of degraded performance for some, this patch adds an unusual_devs entry for the VL817 to block it from binding to the uas driver by default. Users will be able to override this entry by means of a module parameter, if they want. CC: Reported-by: DocMAX Reported-and-tested-by: Thomas Weißschuh Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/Ye8IsK2sjlEv1rqU@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 29191d33c0e3..1a05e3dcfec8 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2301,6 +2301,16 @@ UNUSUAL_DEV( 0x2027, 0xa001, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init, US_FL_SCM_MULT_TARG ), +/* + * Reported by DocMAX + * and Thomas Weißschuh + */ +UNUSUAL_DEV( 0x2109, 0x0715, 0x9999, 0x9999, + "VIA Labs, Inc.", + "VL817 SATA Bridge", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + UNUSUAL_DEV( 0x2116, 0x0320, 0x0001, 0x0001, "ST", "2A", From 26fbe9772b8c459687930511444ce443011f86bf Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 24 Jan 2022 15:23:45 -0500 Subject: [PATCH 127/367] USB: core: Fix hang in usb_kill_urb by adding memory barriers The syzbot fuzzer has identified a bug in which processes hang waiting for usb_kill_urb() to return. It turns out the issue is not unlinking the URB; that works just fine. Rather, the problem arises when the wakeup notification that the URB has completed is not received. The reason is memory-access ordering on SMP systems. In outline form, usb_kill_urb() and __usb_hcd_giveback_urb() operating concurrently on different CPUs perform the following actions: CPU 0 CPU 1 ---------------------------- --------------------------------- usb_kill_urb(): __usb_hcd_giveback_urb(): ... ... atomic_inc(&urb->reject); atomic_dec(&urb->use_count); ... ... wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0); if (atomic_read(&urb->reject)) wake_up(&usb_kill_urb_queue); Confining your attention to urb->reject and urb->use_count, you can see that the overall pattern of accesses on CPU 0 is: write urb->reject, then read urb->use_count; whereas the overall pattern of accesses on CPU 1 is: write urb->use_count, then read urb->reject. This pattern is referred to in memory-model circles as SB (for "Store Buffering"), and it is well known that without suitable enforcement of the desired order of accesses -- in the form of memory barriers -- it is entirely possible for one or both CPUs to execute their reads ahead of their writes. The end result will be that sometimes CPU 0 sees the old un-decremented value of urb->use_count while CPU 1 sees the old un-incremented value of urb->reject. Consequently CPU 0 ends up on the wait queue and never gets woken up, leading to the observed hang in usb_kill_urb(). The same pattern of accesses occurs in usb_poison_urb() and the failure pathway of usb_hcd_submit_urb(). The problem is fixed by adding suitable memory barriers. To provide proper memory-access ordering in the SB pattern, a full barrier is required on both CPUs. The atomic_inc() and atomic_dec() accesses themselves don't provide any memory ordering, but since they are present, we can use the optimized smp_mb__after_atomic() memory barrier in the various routines to obtain the desired effect. This patch adds the necessary memory barriers. CC: Reported-and-tested-by: syzbot+76629376e06e2c2ad626@syzkaller.appspotmail.com Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/Ye8K0QYee0Q0Nna2@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 14 ++++++++++++++ drivers/usb/core/urb.c | 12 ++++++++++++ 2 files changed, 26 insertions(+) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 3e01dd6e509b..d9712c2602af 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1563,6 +1563,13 @@ int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags) urb->hcpriv = NULL; INIT_LIST_HEAD(&urb->urb_list); atomic_dec(&urb->use_count); + /* + * Order the write of urb->use_count above before the read + * of urb->reject below. Pairs with the memory barriers in + * usb_kill_urb() and usb_poison_urb(). + */ + smp_mb__after_atomic(); + atomic_dec(&urb->dev->urbnum); if (atomic_read(&urb->reject)) wake_up(&usb_kill_urb_queue); @@ -1665,6 +1672,13 @@ static void __usb_hcd_giveback_urb(struct urb *urb) usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); + /* + * Order the write of urb->use_count above before the read + * of urb->reject below. Pairs with the memory barriers in + * usb_kill_urb() and usb_poison_urb(). + */ + smp_mb__after_atomic(); + if (unlikely(atomic_read(&urb->reject))) wake_up(&usb_kill_urb_queue); usb_put_urb(urb); diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index 30727729a44c..33d62d7e3929 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -715,6 +715,12 @@ void usb_kill_urb(struct urb *urb) if (!(urb && urb->dev && urb->ep)) return; atomic_inc(&urb->reject); + /* + * Order the write of urb->reject above before the read + * of urb->use_count below. Pairs with the barriers in + * __usb_hcd_giveback_urb() and usb_hcd_submit_urb(). + */ + smp_mb__after_atomic(); usb_hcd_unlink_urb(urb, -ENOENT); wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0); @@ -756,6 +762,12 @@ void usb_poison_urb(struct urb *urb) if (!urb) return; atomic_inc(&urb->reject); + /* + * Order the write of urb->reject above before the read + * of urb->use_count below. Pairs with the barriers in + * __usb_hcd_giveback_urb() and usb_hcd_submit_urb(). + */ + smp_mb__after_atomic(); if (!urb->dev || !urb->ep) return; From e3d26528e083e612314d4dcd713f3d5a26143ddc Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 6 Jan 2022 19:10:21 +0100 Subject: [PATCH 128/367] drm/etnaviv: relax submit size limits While all userspace tried to limit commandstreams to 64K in size, a bug in the Mesa driver lead to command streams of up to 128K being submitted. Allow those to avoid breaking existing userspace. Fixes: 6dfa2fab8ddd ("drm/etnaviv: limit submit sizes") Cc: stable@vger.kernel.org Signed-off-by: Lucas Stach Reviewed-by: Christian Gmeiner --- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index b03c20c14ca1..a17313282e8b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -469,8 +469,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, return -EINVAL; } - if (args->stream_size > SZ_64K || args->nr_relocs > SZ_64K || - args->nr_bos > SZ_64K || args->nr_pmrs > 128) { + if (args->stream_size > SZ_128K || args->nr_relocs > SZ_128K || + args->nr_bos > SZ_128K || args->nr_pmrs > 128) { DRM_ERROR("submit arguments out of size limits\n"); return -EINVAL; } From f26d04331360d42dbd6b58448bd98e4edbfbe1c5 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 13 Jan 2022 18:54:38 -0500 Subject: [PATCH 129/367] audit: improve audit queue handling when "audit=1" on cmdline When an admin enables audit at early boot via the "audit=1" kernel command line the audit queue behavior is slightly different; the audit subsystem goes to greater lengths to avoid dropping records, which unfortunately can result in problems when the audit daemon is forcibly stopped for an extended period of time. This patch makes a number of changes designed to improve the audit queuing behavior so that leaving the audit daemon in a stopped state for an extended period does not cause a significant impact to the system. - kauditd_send_queue() is now limited to looping through the passed queue only once per call. This not only prevents the function from looping indefinitely when records are returned to the current queue, it also allows any recovery handling in kauditd_thread() to take place when kauditd_send_queue() returns. - Transient netlink send errors seen as -EAGAIN now cause the record to be returned to the retry queue instead of going to the hold queue. The intention of the hold queue is to store, perhaps for an extended period of time, the events which led up to the audit daemon going offline. The retry queue remains a temporary queue intended to protect against transient issues between the kernel and the audit daemon. - The retry queue is now limited by the audit_backlog_limit setting, the same as the other queues. This allows admins to bound the size of all of the audit queues on the system. - kauditd_rehold_skb() now returns records to the end of the hold queue to ensure ordering is preserved in the face of recent changes to kauditd_send_queue(). Cc: stable@vger.kernel.org Fixes: 5b52330bbfe63 ("audit: fix auditd/kernel connection state tracking") Fixes: f4b3ee3c85551 ("audit: improve robustness of the audit queue handling") Reported-by: Gaosheng Cui Tested-by: Gaosheng Cui Reviewed-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/audit.c | 62 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index e4bbe2c70c26..7690c29d4ee4 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -541,20 +541,22 @@ static void kauditd_printk_skb(struct sk_buff *skb) /** * kauditd_rehold_skb - Handle a audit record send failure in the hold queue * @skb: audit record + * @error: error code (unused) * * Description: * This should only be used by the kauditd_thread when it fails to flush the * hold queue. */ -static void kauditd_rehold_skb(struct sk_buff *skb) +static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error) { - /* put the record back in the queue at the same place */ - skb_queue_head(&audit_hold_queue, skb); + /* put the record back in the queue */ + skb_queue_tail(&audit_hold_queue, skb); } /** * kauditd_hold_skb - Queue an audit record, waiting for auditd * @skb: audit record + * @error: error code * * Description: * Queue the audit record, waiting for an instance of auditd. When this @@ -564,19 +566,31 @@ static void kauditd_rehold_skb(struct sk_buff *skb) * and queue it, if we have room. If we want to hold on to the record, but we * don't have room, record a record lost message. */ -static void kauditd_hold_skb(struct sk_buff *skb) +static void kauditd_hold_skb(struct sk_buff *skb, int error) { /* at this point it is uncertain if we will ever send this to auditd so * try to send the message via printk before we go any further */ kauditd_printk_skb(skb); /* can we just silently drop the message? */ - if (!audit_default) { - kfree_skb(skb); - return; + if (!audit_default) + goto drop; + + /* the hold queue is only for when the daemon goes away completely, + * not -EAGAIN failures; if we are in a -EAGAIN state requeue the + * record on the retry queue unless it's full, in which case drop it + */ + if (error == -EAGAIN) { + if (!audit_backlog_limit || + skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { + skb_queue_tail(&audit_retry_queue, skb); + return; + } + audit_log_lost("kauditd retry queue overflow"); + goto drop; } - /* if we have room, queue the message */ + /* if we have room in the hold queue, queue the message */ if (!audit_backlog_limit || skb_queue_len(&audit_hold_queue) < audit_backlog_limit) { skb_queue_tail(&audit_hold_queue, skb); @@ -585,24 +599,32 @@ static void kauditd_hold_skb(struct sk_buff *skb) /* we have no other options - drop the message */ audit_log_lost("kauditd hold queue overflow"); +drop: kfree_skb(skb); } /** * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd * @skb: audit record + * @error: error code (unused) * * Description: * Not as serious as kauditd_hold_skb() as we still have a connected auditd, * but for some reason we are having problems sending it audit records so * queue the given record and attempt to resend. */ -static void kauditd_retry_skb(struct sk_buff *skb) +static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error) { - /* NOTE: because records should only live in the retry queue for a - * short period of time, before either being sent or moved to the hold - * queue, we don't currently enforce a limit on this queue */ - skb_queue_tail(&audit_retry_queue, skb); + if (!audit_backlog_limit || + skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { + skb_queue_tail(&audit_retry_queue, skb); + return; + } + + /* we have to drop the record, send it via printk as a last effort */ + kauditd_printk_skb(skb); + audit_log_lost("kauditd retry queue overflow"); + kfree_skb(skb); } /** @@ -640,7 +662,7 @@ static void auditd_reset(const struct auditd_connection *ac) /* flush the retry queue to the hold queue, but don't touch the main * queue since we need to process that normally for multicast */ while ((skb = skb_dequeue(&audit_retry_queue))) - kauditd_hold_skb(skb); + kauditd_hold_skb(skb, -ECONNREFUSED); } /** @@ -714,16 +736,18 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, struct sk_buff_head *queue, unsigned int retry_limit, void (*skb_hook)(struct sk_buff *skb), - void (*err_hook)(struct sk_buff *skb)) + void (*err_hook)(struct sk_buff *skb, int error)) { int rc = 0; - struct sk_buff *skb; + struct sk_buff *skb = NULL; + struct sk_buff *skb_tail; unsigned int failed = 0; /* NOTE: kauditd_thread takes care of all our locking, we just use * the netlink info passed to us (e.g. sk and portid) */ - while ((skb = skb_dequeue(queue))) { + skb_tail = skb_peek_tail(queue); + while ((skb != skb_tail) && (skb = skb_dequeue(queue))) { /* call the skb_hook for each skb we touch */ if (skb_hook) (*skb_hook)(skb); @@ -731,7 +755,7 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, /* can we send to anyone via unicast? */ if (!sk) { if (err_hook) - (*err_hook)(skb); + (*err_hook)(skb, -ECONNREFUSED); continue; } @@ -745,7 +769,7 @@ retry: rc == -ECONNREFUSED || rc == -EPERM) { sk = NULL; if (err_hook) - (*err_hook)(skb); + (*err_hook)(skb, rc); if (rc == -EAGAIN) rc = 0; /* continue to drain the queue */ From 235528072f28b3b0a1446279b7eaddda36dbf743 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Thu, 13 Jan 2022 00:36:57 +0100 Subject: [PATCH 130/367] kunit: tool: Import missing importlib.abc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Python 3.10.0 contains: 9e09849d20 ("bpo-41006: importlib.util no longer imports typing (GH-20938)") It causes importlib.util to no longer import importlib.abs, which leads to the following error when trying to use kunit with qemu: AttributeError: module 'importlib' has no attribute 'abc'. Did you mean: '_abc'? Add the missing import. Signed-off-by: Michał Winiarski Reviewed-by: Daniel Latypov Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 44bbe54f25f1..3c4196cef3ed 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -6,6 +6,7 @@ # Author: Felix Guo # Author: Brendan Higgins +import importlib.abc import importlib.util import logging import subprocess From f034cc1301e7d83d4ec428dd6b8ffb57ca446efb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Wed, 12 Jan 2022 14:41:42 -0500 Subject: [PATCH 131/367] selftests: rtc: Increase test timeout so that all tests run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The timeout setting for the rtc kselftest is currently 90 seconds. This setting is used by the kselftest runner to stop running a test if it takes longer than the assigned value. However, two of the test cases inside rtc set alarms. These alarms are set to the next beginning of the minute, so each of these test cases may take up to, in the worst case, 60 seconds. In order to allow for all test cases in rtc to run, even in the worst case, when using the kselftest runner, the timeout value should be increased to at least 120. Set it to 180, so there's some additional slack. Correct operation can be tested by running the following command right after the start of a minute (low second count), and checking that all test cases run: ./run_kselftest.sh -c rtc Signed-off-by: Nícolas F. R. A. Prado Acked-by: Alexandre Belloni Signed-off-by: Shuah Khan --- tools/testing/selftests/rtc/settings | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings index ba4d85f74cd6..a953c96aa16e 100644 --- a/tools/testing/selftests/rtc/settings +++ b/tools/testing/selftests/rtc/settings @@ -1 +1 @@ -timeout=90 +timeout=180 From 40d70d4d60974c28054a60316f2aec8810833526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Fri, 14 Jan 2022 18:21:26 -0500 Subject: [PATCH 132/367] selftests: cpufreq: Write test output to stdout as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use 'tee' to send the test output to stdout in addition to the current output file. This makes the output easier to handle in automated test systems and is superior to only later dumping the output file contents to stdout, since this way the test output can be interleaved with other log messages, like from the kernel, so that chronology is preserved, making it easier to detect issues. Signed-off-by: Nícolas F. R. A. Prado Acked-by: Viresh Kumar Signed-off-by: Shuah Khan --- tools/testing/selftests/cpufreq/main.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh index 31f8c9a76c5f..60ce18ed0666 100755 --- a/tools/testing/selftests/cpufreq/main.sh +++ b/tools/testing/selftests/cpufreq/main.sh @@ -194,5 +194,5 @@ prerequisite # Run requested functions clear_dumps $OUTFILE -do_test >> $OUTFILE.txt +do_test | tee -a $OUTFILE.txt dmesg_dumps $OUTFILE From 92d25637a3a45904292c93f1863c6bbda4e3e38f Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 17 Dec 2021 17:29:55 +0800 Subject: [PATCH 133/367] kselftest: signal all child processes We have some many cases that will create child process as well, such as pidfd_wait. Previously, we will signal/kill the parent process when it is time out, but this signal will not be sent to its child process. In such case, if child process doesn't terminate itself, ksefltest framework will hang forever. Here we group all its child processes so that kill() can signal all of them in timeout. Fixed change log: Shuah Khan Suggested-by: yang xu Signed-off-by: Li Zhijian Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest_harness.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 471eaa7b3a3f..11779405dc80 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -877,7 +877,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) } t->timed_out = true; - kill(t->pid, SIGKILL); + // signal process group + kill(-(t->pid), SIGKILL); } void __wait_for_test(struct __test_metadata *t) @@ -987,6 +988,7 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) { + setpgrp(); t->fn(t, variant); if (t->skip) _exit(255); From 809232619f5b15e31fb3563985e705454f32621f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 17 Jan 2022 15:30:10 -0500 Subject: [PATCH 134/367] sched/membarrier: Fix membarrier-rseq fence command missing from query bitmask The membarrier command MEMBARRIER_CMD_QUERY allows querying the available membarrier commands. When the membarrier-rseq fence commands were added, a new MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK was introduced with the intent to expose them with the MEMBARRIER_CMD_QUERY command, the but it was never added to MEMBARRIER_CMD_BITMASK. The membarrier-rseq fence commands are therefore not wired up with the query command. Rename MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK to MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK (the bitmask is not a command per-se), and change the erroneous MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK (which does not actually exist) to MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ. Wire up MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK in MEMBARRIER_CMD_BITMASK. Fixing this allows discovering availability of the membarrier-rseq fence feature. Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ") Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Cc: # 5.10+ Link: https://lkml.kernel.org/r/20220117203010.30129-1-mathieu.desnoyers@efficios.com --- kernel/sched/membarrier.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index b5add64d9698..3d2825408e3a 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -147,11 +147,11 @@ #endif #ifdef CONFIG_RSEQ -#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK \ +#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \ (MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ \ - | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK) + | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ) #else -#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK 0 +#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK 0 #endif #define MEMBARRIER_CMD_BITMASK \ @@ -159,7 +159,8 @@ | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \ | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \ - | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK) + | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \ + | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK) static void ipi_mb(void *info) { From 5f0c749158158f89eba7647bdc4e8096979de981 Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Mon, 15 Nov 2021 01:51:37 -0500 Subject: [PATCH 135/367] drm/amd/display: Fix for otg synchronization logic [Why] During otg sync trigger, plane states are used to decide whether the otg is already synchronized or not. There are scenarions when otgs are disabled without plane state getting disabled and in such case the otg is excluded from synchronization. [How] Introduced pipe_idx_syncd in pipe_ctx that tracks each otgs master pipe. When a otg is disabled/enabled, pipe_idx_syncd is reset to itself. On sync trigger, pipe_idx_syncd is checked to decide whether a otg is already synchronized and the otg is further included or excluded from synchronization. v2: Don't drop is_blanked logic Reviewed-by: Jun Lei Reviewed-by: Mustapha Ghaddar Acked-by: Bhawanpreet Lakha Signed-off-by: meenakshikumar somasundaram Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Harry Wentland Cc: torvalds@linux-foundation.org Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 40 +++++++++----- .../gpu/drm/amd/display/dc/core/dc_resource.c | 54 +++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../display/dc/dce110/dce110_hw_sequencer.c | 8 +++ .../drm/amd/display/dc/dcn31/dcn31_resource.c | 3 ++ .../gpu/drm/amd/display/dc/inc/core_types.h | 1 + drivers/gpu/drm/amd/display/dc/inc/resource.h | 11 ++++ 7 files changed, 105 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 01c8849b9db2..6f5528d34093 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1404,20 +1404,34 @@ static void program_timing_sync( status->timing_sync_info.master = false; } - /* remove any other unblanked pipes as they have already been synced */ - for (j = j + 1; j < group_size; j++) { - bool is_blanked; - if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked) - is_blanked = - pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp); - else - is_blanked = - pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg); - if (!is_blanked) { - group_size--; - pipe_set[j] = pipe_set[group_size]; - j--; + /* remove any other pipes that are already been synced */ + if (dc->config.use_pipe_ctx_sync_logic) { + /* check pipe's syncd to decide which pipe to be removed */ + for (j = 1; j < group_size; j++) { + if (pipe_set[j]->pipe_idx_syncd == pipe_set[0]->pipe_idx_syncd) { + group_size--; + pipe_set[j] = pipe_set[group_size]; + j--; + } else + /* link slave pipe's syncd with master pipe */ + pipe_set[j]->pipe_idx_syncd = pipe_set[0]->pipe_idx_syncd; + } + } else { + for (j = j + 1; j < group_size; j++) { + bool is_blanked; + + if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked) + is_blanked = + pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp); + else + is_blanked = + pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg); + if (!is_blanked) { + group_size--; + pipe_set[j] = pipe_set[group_size]; + j--; + } } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index d4ff6cc6b8d9..b3912ff9dc91 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3217,6 +3217,60 @@ struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt( } #endif +void reset_syncd_pipes_from_disabled_pipes(struct dc *dc, + struct dc_state *context) +{ + int i, j; + struct pipe_ctx *pipe_ctx_old, *pipe_ctx, *pipe_ctx_syncd; + + /* If pipe backend is reset, need to reset pipe syncd status */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe_ctx_old = &dc->current_state->res_ctx.pipe_ctx[i]; + pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx_old->stream) + continue; + + if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe) + continue; + + if (!pipe_ctx->stream || + pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) { + + /* Reset all the syncd pipes from the disabled pipe */ + for (j = 0; j < dc->res_pool->pipe_count; j++) { + pipe_ctx_syncd = &context->res_ctx.pipe_ctx[j]; + if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_syncd) == pipe_ctx_old->pipe_idx) || + !IS_PIPE_SYNCD_VALID(pipe_ctx_syncd)) + SET_PIPE_SYNCD_TO_PIPE(pipe_ctx_syncd, j); + } + } + } +} + +void check_syncd_pipes_for_disabled_master_pipe(struct dc *dc, + struct dc_state *context, + uint8_t disabled_master_pipe_idx) +{ + int i; + struct pipe_ctx *pipe_ctx, *pipe_ctx_check; + + pipe_ctx = &context->res_ctx.pipe_ctx[disabled_master_pipe_idx]; + if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx) != disabled_master_pipe_idx) || + !IS_PIPE_SYNCD_VALID(pipe_ctx)) + SET_PIPE_SYNCD_TO_PIPE(pipe_ctx, disabled_master_pipe_idx); + + /* for the pipe disabled, check if any slave pipe exists and assert */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe_ctx_check = &context->res_ctx.pipe_ctx[i]; + + if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_check) == disabled_master_pipe_idx) && + IS_PIPE_SYNCD_VALID(pipe_ctx_check) && (i != disabled_master_pipe_idx)) + DC_ERR("DC: Failure: pipe_idx[%d] syncd with disabled master pipe_idx[%d]\n", + i, disabled_master_pipe_idx); + } +} + uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter) { /* TODO - get transmitter to phy idx mapping from DMUB */ diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index da2c78ce14d6..288e7b01f561 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -344,6 +344,7 @@ struct dc_config { uint8_t vblank_alignment_max_frame_time_diff; bool is_asymmetric_memory; bool is_single_rank_dimm; + bool use_pipe_ctx_sync_logic; }; enum visual_confirm { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 78192ecba102..f1593186e964 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1566,6 +1566,10 @@ static enum dc_status apply_single_controller_ctx_to_hw( &pipe_ctx->stream->audio_info); } + /* make sure no pipes syncd to the pipe being enabled */ + if (!pipe_ctx->stream->apply_seamless_boot_optimization && dc->config.use_pipe_ctx_sync_logic) + check_syncd_pipes_for_disabled_master_pipe(dc, context, pipe_ctx->pipe_idx); + #if defined(CONFIG_DRM_AMD_DC_DCN) /* DCN3.1 FPGA Workaround * Need to enable HPO DP Stream Encoder before setting OTG master enable. @@ -2297,6 +2301,10 @@ enum dc_status dce110_apply_ctx_to_hw( enum dc_status status; int i; + /* reset syncd pipes from disabled pipes */ + if (dc->config.use_pipe_ctx_sync_logic) + reset_syncd_pipes_from_disabled_pipes(dc, context); + /* Reset old context */ /* look up the targets that have been removed since last commit */ hws->funcs.reset_hw_ctx_wrap(dc, context); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 42ed47e8133d..8d64187478e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -2260,6 +2260,9 @@ static bool dcn31_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 890280026e69..943240e2809e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -382,6 +382,7 @@ struct pipe_ctx { struct pll_settings pll_settings; uint8_t pipe_idx; + uint8_t pipe_idx_syncd; struct pipe_ctx *top_pipe; struct pipe_ctx *bottom_pipe; diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 4249bf306e09..dbfe6690ded8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -34,6 +34,10 @@ #define MEMORY_TYPE_HBM 2 +#define IS_PIPE_SYNCD_VALID(pipe) ((((pipe)->pipe_idx_syncd) & 0x80)?1:0) +#define GET_PIPE_SYNCD_FROM_PIPE(pipe) ((pipe)->pipe_idx_syncd & 0x7F) +#define SET_PIPE_SYNCD_TO_PIPE(pipe, pipe_syncd) ((pipe)->pipe_idx_syncd = (0x80 | pipe_syncd)) + enum dce_version resource_parse_asic_id( struct hw_asic_id asic_id); @@ -208,6 +212,13 @@ struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt( const struct dc_link *link); #endif +void reset_syncd_pipes_from_disabled_pipes(struct dc *dc, + struct dc_state *context); + +void check_syncd_pipes_for_disabled_master_pipe(struct dc *dc, + struct dc_state *context, + uint8_t disabled_master_pipe_idx); + uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter); #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ From ac46d93235074a6c5d280d35771c23fd8620e7d9 Mon Sep 17 00:00:00 2001 From: Zhan Liu Date: Wed, 19 Jan 2022 16:55:16 -0500 Subject: [PATCH 136/367] drm/amd/display: Correct MPC split policy for DCN301 [Why] DCN301 has seamless boot enabled. With MPC split enabled at the same time, system will hang. [How] Revert MPC split policy back to "MPC_SPLIT_AVOID". Since we have ODM combine enabled on DCN301, pipe split is not necessary here. Signed-off-by: Zhan Liu Reviewed-by: Charlene Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index c1c6e602b06c..b4001233867c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -686,7 +686,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_clock_gate = true, .disable_pplib_clock_request = true, .disable_pplib_wm_range = true, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, From 7e38ac562b820915faa33a5077ca9bccf42d39d2 Mon Sep 17 00:00:00 2001 From: Zhan Liu Date: Wed, 19 Jan 2022 17:07:53 -0500 Subject: [PATCH 137/367] drm/amd/display: change FIFO reset condition to embedded display only [Why] FIFO reset is only necessary for fast boot sequence, where otg is disabled and dig fe is enabled when changing dispclk. Fast boot is only enabled on embedded displays. [How] Change FIFO reset condition to "embedded display only". Signed-off-by: Zhan Liu Reviewed-by: Charlene Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index f1593186e964..f3ff141b706a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1608,7 +1608,7 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx->stream_res.stream_enc, pipe_ctx->stream_res.tg->inst); - if (dc_is_dp_signal(pipe_ctx->stream->signal) && + if (dc_is_embedded_signal(pipe_ctx->stream->signal) && pipe_ctx->stream_res.stream_enc->funcs->reset_fifo) pipe_ctx->stream_res.stream_enc->funcs->reset_fifo( pipe_ctx->stream_res.stream_enc); From 9e5a14bce2402e84251a10269df0235cd7ce9234 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Jan 2022 12:17:07 -0500 Subject: [PATCH 138/367] drm/amdgpu: filter out radeon secondary ids as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Older radeon boards (r2xx-r5xx) had secondary PCI functions which we solely there for supporting multi-head on OSs with special requirements. Add them to the unsupported list as well so we don't attempt to bind to them. The driver would fail to bind to them anyway, but this does so in a cleaner way that should not confuse the user. Cc: stable@vger.kernel.org Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 81 +++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b21bcdc97460..4c83f1db8a24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1525,6 +1525,87 @@ static const u16 amdgpu_unsupported_pciidlist[] = { 0x99A0, 0x99A2, 0x99A4, + /* radeon secondary ids */ + 0x3171, + 0x3e70, + 0x4164, + 0x4165, + 0x4166, + 0x4168, + 0x4170, + 0x4171, + 0x4172, + 0x4173, + 0x496e, + 0x4a69, + 0x4a6a, + 0x4a6b, + 0x4a70, + 0x4a74, + 0x4b69, + 0x4b6b, + 0x4b6c, + 0x4c6e, + 0x4e64, + 0x4e65, + 0x4e66, + 0x4e67, + 0x4e68, + 0x4e69, + 0x4e6a, + 0x4e71, + 0x4f73, + 0x5569, + 0x556b, + 0x556d, + 0x556f, + 0x5571, + 0x5854, + 0x5874, + 0x5940, + 0x5941, + 0x5b72, + 0x5b73, + 0x5b74, + 0x5b75, + 0x5d44, + 0x5d45, + 0x5d6d, + 0x5d6f, + 0x5d72, + 0x5d77, + 0x5e6b, + 0x5e6d, + 0x7120, + 0x7124, + 0x7129, + 0x712e, + 0x712f, + 0x7162, + 0x7163, + 0x7166, + 0x7167, + 0x7172, + 0x7173, + 0x71a0, + 0x71a1, + 0x71a3, + 0x71a7, + 0x71bb, + 0x71e0, + 0x71e1, + 0x71e2, + 0x71e6, + 0x71e7, + 0x71f2, + 0x7269, + 0x726b, + 0x726e, + 0x72a0, + 0x72a8, + 0x72b1, + 0x72b3, + 0x793f, }; static const struct pci_device_id pciidlist[] = { From dc919d670c6fd1ac81ebf31625cd19579f7b3d4c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Jan 2022 12:52:13 -0500 Subject: [PATCH 139/367] drm/amdgpu/display: adjust msleep limit in dp_wait_for_training_aux_rd_interval Some architectures (e.g., ARM) have relatively low udelay limits. On most architectures, anything longer than 2000us is not recommended. Change the check to align with other similar checks in DC. Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 05e216524370..a012899e9dd4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -202,7 +202,7 @@ void dp_wait_for_training_aux_rd_interval( uint32_t wait_in_micro_secs) { #if defined(CONFIG_DRM_AMD_DC_DCN) - if (wait_in_micro_secs > 16000) + if (wait_in_micro_secs > 1000) msleep(wait_in_micro_secs/1000); else udelay(wait_in_micro_secs); From 98fdcacb45f7cd2092151d6af2e60152811eb79c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Jan 2022 12:57:33 -0500 Subject: [PATCH 140/367] drm/amdgpu/display: use msleep rather than udelay for long delays Some architectures (e.g., ARM) throw an compilation error if the udelay is too long. In general udelays of longer than 2000us are not recommended on any architecture. Switch to msleep in these cases. Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index a012899e9dd4..4c3ab2575e4b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -6935,7 +6935,7 @@ bool dpcd_write_128b_132b_sst_payload_allocation_table( } } retries++; - udelay(5000); + msleep(5); } if (!result && retries == max_retries) { @@ -6987,7 +6987,7 @@ bool dpcd_poll_for_allocation_change_trigger(struct dc_link *link) break; } - udelay(5000); + msleep(5); } if (result == ACT_FAILED) { From ebc77bcc6e1660a011483c035d53c461c8dcc4f5 Mon Sep 17 00:00:00 2001 From: Zhou Qingyang Date: Tue, 25 Jan 2022 00:55:51 +0800 Subject: [PATCH 141/367] drm/amd/display/dc/calcs/dce_calcs: Fix a memleak in calculate_bandwidth() In calculate_bandwidth(), the tag free_sclk and free_yclk are reversed, which could lead to a memory leak of yclk. Fix this bug by changing the location of free_sclk and free_yclk. This bug was found by a static analyzer. Builds with 'make allyesconfig' show no new warnings, and our static analyzer no longer warns about this code. Fixes: 2be8989d0fc2 ("drm/amd/display/dc/calcs/dce_calcs: Move some large variables from the stack to the heap") Signed-off-by: Zhou Qingyang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c index ff5bb152ef49..e6ef36de0825 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c @@ -2033,10 +2033,10 @@ static void calculate_bandwidth( kfree(surface_type); free_tiling_mode: kfree(tiling_mode); -free_yclk: - kfree(yclk); free_sclk: kfree(sclk); +free_yclk: + kfree(yclk); } /******************************************************************************* From 72a8d87b87270bff0c0b2fed4d59c48d0dd840d7 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 24 Jan 2022 01:23:35 +0100 Subject: [PATCH 142/367] drm/amd/display: Fix FP start/end for dcn30_internal_validate_bw. It calls populate_dml_pipes which uses doubles to initialize the scale_ratio_depth params. Mirrors the dcn20 logic. Cc: stable@vger.kernel.org Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 602ec9a08549..8ca26383b568 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1878,7 +1878,6 @@ noinline bool dcn30_internal_validate_bw( dc->res_pool->funcs->update_soc_for_wm_a(dc, context); pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - DC_FP_START(); if (!pipe_cnt) { out = true; goto validate_out; @@ -2104,7 +2103,6 @@ validate_fail: out = false; validate_out: - DC_FP_END(); return out; } @@ -2306,7 +2304,9 @@ bool dcn30_validate_bandwidth(struct dc *dc, BW_VAL_TRACE_COUNT(); + DC_FP_START(); out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + DC_FP_END(); if (pipe_cnt == 0) goto validate_out; From 25f1488bdbba63415239ff301fe61a8546140d9f Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 24 Jan 2022 01:23:36 +0100 Subject: [PATCH 143/367] drm/amd/display: Wrap dcn301_calculate_wm_and_dlg for FPU. Mirrors the logic for dcn30. Cue lots of WARNs and some kernel panics without this fix. Cc: stable@vger.kernel.org Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 11 +++++++++++ .../gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c | 2 +- .../gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index b4001233867c..5d9637b07429 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -1380,6 +1380,17 @@ static void set_wm_ranges( pp_smu->nv_funcs.set_wm_ranges(&pp_smu->nv_funcs.pp_smu, &ranges); } +static void dcn301_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + DC_FP_START(); + dcn301_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_END(); +} + static struct resource_funcs dcn301_res_pool_funcs = { .destroy = dcn301_destroy_resource_pool, .link_enc_create = dcn301_link_encoder_create, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index 94c32832a0e7..0a7a33864973 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -327,7 +327,7 @@ void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info) dcn3_01_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; } -void dcn301_calculate_wm_and_dlg(struct dc *dc, +void dcn301_calculate_wm_and_dlg_fp(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h index fc7065d17842..774b0fdfc80b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h @@ -34,7 +34,7 @@ void dcn301_fpu_set_wm_ranges(int i, void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info); -void dcn301_calculate_wm_and_dlg(struct dc *dc, +void dcn301_calculate_wm_and_dlg_fp(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, From 2a807341ed1074ab83638f2fab08dffaa373f6b8 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sun, 23 Jan 2022 03:38:28 +0100 Subject: [PATCH 144/367] drm/amdgpu/display: Remove t_srx_delay_us. Unused. Convert the divisions into asserts on the divisor, to debug why it is zero. The divide by zero is suspected of causing kernel panics. While I have no idea where the zero is coming from I think this patch is a positive either way. Cc: stable@vger.kernel.org Reviewed-by: Harry Wentland Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 1 - .../gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c | 2 -- .../drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c | 2 -- .../gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c | 2 -- .../gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c | 2 -- drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h | 1 - drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c | 3 --- drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c | 4 ---- 8 files changed, 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index ec19678a0702..e447c74be713 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -503,7 +503,6 @@ static void dcn_bw_calc_rq_dlg_ttu( //input[in_idx].dout.output_standard; /*todo: soc->sr_enter_plus_exit_time??*/ - dlg_sys_param->t_srx_delay_us = dc->dcn_ip->dcfclk_cstate_latency / v->dcf_clk_deep_sleep; dml1_rq_dlg_get_rq_params(dml, rq_param, &input->pipe.src); dml1_extract_rq_regs(dml, rq_regs, rq_param); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index 246071c72f6b..548cdef8a8ad 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -1576,8 +1576,6 @@ void dml20_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency - / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated print__dlg_sys_params_st(mode_lib, &dlg_sys_param); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index 015e7f2c0b16..0fc9f3e3ffae 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -1577,8 +1577,6 @@ void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency - / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated print__dlg_sys_params_st(mode_lib, &dlg_sys_param); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index 8bc27de4c104..618f4b682ab1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -1688,8 +1688,6 @@ void dml21_rq_dlg_get_dlg_reg( mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency - / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated print__dlg_sys_params_st(mode_lib, &dlg_sys_param); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c index aef854270054..747167083dea 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c @@ -1858,8 +1858,6 @@ void dml30_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency - / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated print__dlg_sys_params_st(mode_lib, &dlg_sys_param); diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index d46a2733024c..8f9f1d607f7c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -546,7 +546,6 @@ struct _vcs_dpi_display_dlg_sys_params_st { double t_sr_wm_us; double t_extra_us; double mem_trip_us; - double t_srx_delay_us; double deepsleep_dcfclk_mhz; double total_flip_bw; unsigned int total_flip_bytes; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c index 71ea503cb32f..412e75eb4704 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c @@ -141,9 +141,6 @@ void print__dlg_sys_params_st(struct display_mode_lib *mode_lib, const struct _v dml_print("DML_RQ_DLG_CALC: t_urg_wm_us = %3.2f\n", dlg_sys_param->t_urg_wm_us); dml_print("DML_RQ_DLG_CALC: t_sr_wm_us = %3.2f\n", dlg_sys_param->t_sr_wm_us); dml_print("DML_RQ_DLG_CALC: t_extra_us = %3.2f\n", dlg_sys_param->t_extra_us); - dml_print( - "DML_RQ_DLG_CALC: t_srx_delay_us = %3.2f\n", - dlg_sys_param->t_srx_delay_us); dml_print( "DML_RQ_DLG_CALC: deepsleep_dcfclk_mhz = %3.2f\n", dlg_sys_param->deepsleep_dcfclk_mhz); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c index 59dc2c5b58dd..3df559c591f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c @@ -1331,10 +1331,6 @@ void dml1_rq_dlg_get_dlg_params( if (dual_plane) DTRACE("DLG: %s: swath_height_c = %d", __func__, swath_height_c); - DTRACE( - "DLG: %s: t_srx_delay_us = %3.2f", - __func__, - (double) dlg_sys_param->t_srx_delay_us); DTRACE("DLG: %s: line_time_in_us = %3.2f", __func__, (double) line_time_in_us); DTRACE("DLG: %s: vupdate_offset = %d", __func__, vupdate_offset); DTRACE("DLG: %s: vupdate_width = %d", __func__, vupdate_width); From 4b77e4abb32cddb9e666e2ac411b0b0d6b8331dd Mon Sep 17 00:00:00 2001 From: Sergio Paracuellos Date: Mon, 24 Jan 2022 12:30:02 +0100 Subject: [PATCH 145/367] PCI: mt7621: Drop of_match_ptr() to avoid unused variable We have stubs for most OF interfaces even when CONFIG_OF is not set, so we allow building of pcie-mt7621.c in that case for compile testing. When CONFIG_OF is not set, "of_match_ptr(mt7621_pcie_ids)" compiles to NULL, which leaves mt7621_pcie_ids unused: $ make W=1 drivers/pci/controller/pcie-mt7621.c:549:34: warning: unused variable 'mt7621_pcie_ids' [-Wunused-const-variable] Drop of_match_ptr() to avoid the unused variable warning. [bhelgaas: commit log] Fixes: 2bdd5238e756 ("PCI: mt7621: Add MediaTek MT7621 PCIe host controller driver") Link: https://lore.kernel.org/r/20220124113003.406224-2-sergio.paracuellos@gmail.com Link: https://lore.kernel.org/r/202201241754.igtHzgHv-lkp@intel.com Reported-by: kernel test robot Signed-off-by: Sergio Paracuellos Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-mt7621.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-mt7621.c b/drivers/pci/controller/pcie-mt7621.c index 3824862ea144..f2e567282d3e 100644 --- a/drivers/pci/controller/pcie-mt7621.c +++ b/drivers/pci/controller/pcie-mt7621.c @@ -557,7 +557,7 @@ static struct platform_driver mt7621_pcie_driver = { .remove = mt7621_pcie_remove, .driver = { .name = "mt7621-pci", - .of_match_table = of_match_ptr(mt7621_pcie_ids), + .of_match_table = mt7621_pcie_ids, }, }; builtin_platform_driver(mt7621_pcie_driver); From c035366d9c9fe48d947ee6c43465ab43d42e20f2 Mon Sep 17 00:00:00 2001 From: Sergio Paracuellos Date: Mon, 24 Jan 2022 12:30:03 +0100 Subject: [PATCH 146/367] PCI: mt7621: Remove unused function pcie_rmw() Function pcie_rmw() is not being used at all and can be deleted. Hence get rid of it, which fixes this warning: drivers/pci/controller/pcie-mt7621.c:112:20: warning: unused function 'pcie_rmw' [-Wunused-function] Fixes: 2bdd5238e756 ("PCI: mt7621: Add MediaTek MT7621 PCIe host controller driver") Link: https://lore.kernel.org/r/20220124113003.406224-3-sergio.paracuellos@gmail.com Link: https://lore.kernel.org/all/202201241754.igtHzgHv-lkp@intel.com/ Reported-by: kernel test robot Signed-off-by: Sergio Paracuellos Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-mt7621.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/pci/controller/pcie-mt7621.c b/drivers/pci/controller/pcie-mt7621.c index f2e567282d3e..33eb37a2225c 100644 --- a/drivers/pci/controller/pcie-mt7621.c +++ b/drivers/pci/controller/pcie-mt7621.c @@ -109,15 +109,6 @@ static inline void pcie_write(struct mt7621_pcie *pcie, u32 val, u32 reg) writel_relaxed(val, pcie->base + reg); } -static inline void pcie_rmw(struct mt7621_pcie *pcie, u32 reg, u32 clr, u32 set) -{ - u32 val = readl_relaxed(pcie->base + reg); - - val &= ~clr; - val |= set; - writel_relaxed(val, pcie->base + reg); -} - static inline u32 pcie_port_read(struct mt7621_pcie_port *port, u32 reg) { return readl_relaxed(port->base + reg); From 0e3135d3bfa5dfb658145238d2bc723a8e30c3a3 Mon Sep 17 00:00:00 2001 From: He Fengqing Date: Sat, 22 Jan 2022 10:29:36 +0000 Subject: [PATCH 147/367] bpf: Fix possible race in inc_misses_counter It seems inc_misses_counter() suffers from same issue fixed in the commit d979617aa84d ("bpf: Fixes possible race in update_prog_stats() for 32bit arches"): As it can run while interrupts are enabled, it could be re-entered and the u64_stats syncp could be mangled. Fixes: 9ed9e9ba2337 ("bpf: Count the number of times recursion was prevented") Signed-off-by: He Fengqing Acked-by: John Fastabend Link: https://lore.kernel.org/r/20220122102936.1219518-1-hefengqing@huawei.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/trampoline.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 4b6974a195c1..5e7edf913060 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -550,11 +550,12 @@ static __always_inline u64 notrace bpf_prog_start_time(void) static void notrace inc_misses_counter(struct bpf_prog *prog) { struct bpf_prog_stats *stats; + unsigned int flags; stats = this_cpu_ptr(prog->stats); - u64_stats_update_begin(&stats->syncp); + flags = u64_stats_update_begin_irqsave(&stats->syncp); u64_stats_inc(&stats->misses); - u64_stats_update_end(&stats->syncp); + u64_stats_update_end_irqrestore(&stats->syncp, flags); } /* The logic is similar to bpf_prog_run(), but with an explicit From e2bcbd7769ee8f05e1b3d10848aace98973844e4 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Mon, 24 Jan 2022 15:30:28 +0000 Subject: [PATCH 148/367] tools headers UAPI: remove stale lirc.h The lirc.h file is an old copy of lirc.h from the kernel sources. It is out of date, and the bpf lirc tests don't need a new copy anyway. As long as /usr/include/linux/lirc.h is from kernel v5.2 or newer, the tests will compile fine. Signed-off-by: Sean Young Reviewed-by: Shuah Khan Link: https://lore.kernel.org/r/20220124153028.394409-1-sean@mess.org Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/lirc.h | 229 ------------------ .../selftests/bpf/test_lirc_mode2_user.c | 1 - 2 files changed, 230 deletions(-) delete mode 100644 tools/include/uapi/linux/lirc.h diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h deleted file mode 100644 index 45fcbf99d72e..000000000000 --- a/tools/include/uapi/linux/lirc.h +++ /dev/null @@ -1,229 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * lirc.h - linux infrared remote control header file - * last modified 2010/07/13 by Jarod Wilson - */ - -#ifndef _LINUX_LIRC_H -#define _LINUX_LIRC_H - -#include -#include - -#define PULSE_BIT 0x01000000 -#define PULSE_MASK 0x00FFFFFF - -#define LIRC_MODE2_SPACE 0x00000000 -#define LIRC_MODE2_PULSE 0x01000000 -#define LIRC_MODE2_FREQUENCY 0x02000000 -#define LIRC_MODE2_TIMEOUT 0x03000000 - -#define LIRC_VALUE_MASK 0x00FFFFFF -#define LIRC_MODE2_MASK 0xFF000000 - -#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE) -#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE) -#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY) -#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT) - -#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK) -#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK) - -#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE) -#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE) -#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY) -#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT) - -/* used heavily by lirc userspace */ -#define lirc_t int - -/*** lirc compatible hardware features ***/ - -#define LIRC_MODE2SEND(x) (x) -#define LIRC_SEND2MODE(x) (x) -#define LIRC_MODE2REC(x) ((x) << 16) -#define LIRC_REC2MODE(x) ((x) >> 16) - -#define LIRC_MODE_RAW 0x00000001 -#define LIRC_MODE_PULSE 0x00000002 -#define LIRC_MODE_MODE2 0x00000004 -#define LIRC_MODE_SCANCODE 0x00000008 -#define LIRC_MODE_LIRCCODE 0x00000010 - - -#define LIRC_CAN_SEND_RAW LIRC_MODE2SEND(LIRC_MODE_RAW) -#define LIRC_CAN_SEND_PULSE LIRC_MODE2SEND(LIRC_MODE_PULSE) -#define LIRC_CAN_SEND_MODE2 LIRC_MODE2SEND(LIRC_MODE_MODE2) -#define LIRC_CAN_SEND_LIRCCODE LIRC_MODE2SEND(LIRC_MODE_LIRCCODE) - -#define LIRC_CAN_SEND_MASK 0x0000003f - -#define LIRC_CAN_SET_SEND_CARRIER 0x00000100 -#define LIRC_CAN_SET_SEND_DUTY_CYCLE 0x00000200 -#define LIRC_CAN_SET_TRANSMITTER_MASK 0x00000400 - -#define LIRC_CAN_REC_RAW LIRC_MODE2REC(LIRC_MODE_RAW) -#define LIRC_CAN_REC_PULSE LIRC_MODE2REC(LIRC_MODE_PULSE) -#define LIRC_CAN_REC_MODE2 LIRC_MODE2REC(LIRC_MODE_MODE2) -#define LIRC_CAN_REC_SCANCODE LIRC_MODE2REC(LIRC_MODE_SCANCODE) -#define LIRC_CAN_REC_LIRCCODE LIRC_MODE2REC(LIRC_MODE_LIRCCODE) - -#define LIRC_CAN_REC_MASK LIRC_MODE2REC(LIRC_CAN_SEND_MASK) - -#define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16) -#define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16) - -#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000 -#define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000 -#define LIRC_CAN_GET_REC_RESOLUTION 0x20000000 -#define LIRC_CAN_SET_REC_TIMEOUT 0x10000000 -#define LIRC_CAN_SET_REC_FILTER 0x08000000 - -#define LIRC_CAN_MEASURE_CARRIER 0x02000000 -#define LIRC_CAN_USE_WIDEBAND_RECEIVER 0x04000000 - -#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK) -#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK) - -#define LIRC_CAN_NOTIFY_DECODE 0x01000000 - -/*** IOCTL commands for lirc driver ***/ - -#define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32) - -#define LIRC_GET_SEND_MODE _IOR('i', 0x00000001, __u32) -#define LIRC_GET_REC_MODE _IOR('i', 0x00000002, __u32) -#define LIRC_GET_REC_RESOLUTION _IOR('i', 0x00000007, __u32) - -#define LIRC_GET_MIN_TIMEOUT _IOR('i', 0x00000008, __u32) -#define LIRC_GET_MAX_TIMEOUT _IOR('i', 0x00000009, __u32) - -/* code length in bits, currently only for LIRC_MODE_LIRCCODE */ -#define LIRC_GET_LENGTH _IOR('i', 0x0000000f, __u32) - -#define LIRC_SET_SEND_MODE _IOW('i', 0x00000011, __u32) -#define LIRC_SET_REC_MODE _IOW('i', 0x00000012, __u32) -/* Note: these can reset the according pulse_width */ -#define LIRC_SET_SEND_CARRIER _IOW('i', 0x00000013, __u32) -#define LIRC_SET_REC_CARRIER _IOW('i', 0x00000014, __u32) -#define LIRC_SET_SEND_DUTY_CYCLE _IOW('i', 0x00000015, __u32) -#define LIRC_SET_TRANSMITTER_MASK _IOW('i', 0x00000017, __u32) - -/* - * when a timeout != 0 is set the driver will send a - * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is - * never sent, timeout is disabled by default - */ -#define LIRC_SET_REC_TIMEOUT _IOW('i', 0x00000018, __u32) - -/* 1 enables, 0 disables timeout reports in MODE2 */ -#define LIRC_SET_REC_TIMEOUT_REPORTS _IOW('i', 0x00000019, __u32) - -/* - * if enabled from the next key press on the driver will send - * LIRC_MODE2_FREQUENCY packets - */ -#define LIRC_SET_MEASURE_CARRIER_MODE _IOW('i', 0x0000001d, __u32) - -/* - * to set a range use LIRC_SET_REC_CARRIER_RANGE with the - * lower bound first and later LIRC_SET_REC_CARRIER with the upper bound - */ -#define LIRC_SET_REC_CARRIER_RANGE _IOW('i', 0x0000001f, __u32) - -#define LIRC_SET_WIDEBAND_RECEIVER _IOW('i', 0x00000023, __u32) - -/* - * Return the recording timeout, which is either set by - * the ioctl LIRC_SET_REC_TIMEOUT or by the kernel after setting the protocols. - */ -#define LIRC_GET_REC_TIMEOUT _IOR('i', 0x00000024, __u32) - -/* - * struct lirc_scancode - decoded scancode with protocol for use with - * LIRC_MODE_SCANCODE - * - * @timestamp: Timestamp in nanoseconds using CLOCK_MONOTONIC when IR - * was decoded. - * @flags: should be 0 for transmit. When receiving scancodes, - * LIRC_SCANCODE_FLAG_TOGGLE or LIRC_SCANCODE_FLAG_REPEAT can be set - * depending on the protocol - * @rc_proto: see enum rc_proto - * @keycode: the translated keycode. Set to 0 for transmit. - * @scancode: the scancode received or to be sent - */ -struct lirc_scancode { - __u64 timestamp; - __u16 flags; - __u16 rc_proto; - __u32 keycode; - __u64 scancode; -}; - -/* Set if the toggle bit of rc-5 or rc-6 is enabled */ -#define LIRC_SCANCODE_FLAG_TOGGLE 1 -/* Set if this is a nec or sanyo repeat */ -#define LIRC_SCANCODE_FLAG_REPEAT 2 - -/** - * enum rc_proto - the Remote Controller protocol - * - * @RC_PROTO_UNKNOWN: Protocol not known - * @RC_PROTO_OTHER: Protocol known but proprietary - * @RC_PROTO_RC5: Philips RC5 protocol - * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol - * @RC_PROTO_RC5_SZ: StreamZap variant of RC5 - * @RC_PROTO_JVC: JVC protocol - * @RC_PROTO_SONY12: Sony 12 bit protocol - * @RC_PROTO_SONY15: Sony 15 bit protocol - * @RC_PROTO_SONY20: Sony 20 bit protocol - * @RC_PROTO_NEC: NEC protocol - * @RC_PROTO_NECX: Extended NEC protocol - * @RC_PROTO_NEC32: NEC 32 bit protocol - * @RC_PROTO_SANYO: Sanyo protocol - * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard - * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse - * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol - * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol - * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol - * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol - * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol - * @RC_PROTO_SHARP: Sharp protocol - * @RC_PROTO_XMP: XMP protocol - * @RC_PROTO_CEC: CEC protocol - * @RC_PROTO_IMON: iMon Pad protocol - * @RC_PROTO_RCMM12: RC-MM protocol 12 bits - * @RC_PROTO_RCMM24: RC-MM protocol 24 bits - * @RC_PROTO_RCMM32: RC-MM protocol 32 bits - */ -enum rc_proto { - RC_PROTO_UNKNOWN = 0, - RC_PROTO_OTHER = 1, - RC_PROTO_RC5 = 2, - RC_PROTO_RC5X_20 = 3, - RC_PROTO_RC5_SZ = 4, - RC_PROTO_JVC = 5, - RC_PROTO_SONY12 = 6, - RC_PROTO_SONY15 = 7, - RC_PROTO_SONY20 = 8, - RC_PROTO_NEC = 9, - RC_PROTO_NECX = 10, - RC_PROTO_NEC32 = 11, - RC_PROTO_SANYO = 12, - RC_PROTO_MCIR2_KBD = 13, - RC_PROTO_MCIR2_MSE = 14, - RC_PROTO_RC6_0 = 15, - RC_PROTO_RC6_6A_20 = 16, - RC_PROTO_RC6_6A_24 = 17, - RC_PROTO_RC6_6A_32 = 18, - RC_PROTO_RC6_MCE = 19, - RC_PROTO_SHARP = 20, - RC_PROTO_XMP = 21, - RC_PROTO_CEC = 22, - RC_PROTO_IMON = 23, - RC_PROTO_RCMM12 = 24, - RC_PROTO_RCMM24 = 25, - RC_PROTO_RCMM32 = 26, -}; - -#endif diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c index ebf68dce5504..2893e9f2f1e0 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c @@ -28,7 +28,6 @@ // 5. We can read keycode from same /dev/lirc device #include -#include #include #include #include From 76cea3d95513fe40000d06a3719c4bb6b53275e2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Jan 2022 14:05:27 +1000 Subject: [PATCH 149/367] Revert "drm/ast: Support 1600x900 with 108MHz PCLK" This reverts commit 9bb7b689274b67ecb3641e399e76f84adc627df1. This caused a regression reported to Red Hat. Fixes: 9bb7b689274b ("drm/ast: Support 1600x900 with 108MHz PCLK") Signed-off-by: Dave Airlie Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220120040527.552068-1-airlied@gmail.com --- drivers/gpu/drm/ast/ast_tables.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_tables.h b/drivers/gpu/drm/ast/ast_tables.h index d9eb353a4bf0..dbe1cc620f6e 100644 --- a/drivers/gpu/drm/ast/ast_tables.h +++ b/drivers/gpu/drm/ast/ast_tables.h @@ -282,8 +282,6 @@ static const struct ast_vbios_enhtable res_1360x768[] = { }; static const struct ast_vbios_enhtable res_1600x900[] = { - {1800, 1600, 24, 80, 1000, 900, 1, 3, VCLK108, /* 60Hz */ - (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 3, 0x3A }, {1760, 1600, 48, 32, 926, 900, 3, 5, VCLK97_75, /* 60Hz CVT RB */ (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo | AST2500PreCatchCRT), 60, 1, 0x3A }, From c733ebb7cb67dfb146a07c0ae329a0de9ec52f36 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 24 Jan 2022 13:38:09 +0000 Subject: [PATCH 150/367] irqchip/gic-v3-its: Reset each ITS's BASERn register before probe A recent bug report outlined that the way GICv4.1 is handled across kexec is pretty bad. We can end-up in a situation where ITSs share memory (this is the case when SVPET==1) and reprogram the base registers, creating a situation where ITSs that are part of a given affinity group see different pointers. Which is illegal. Boo. In order to restore some sanity, reset the BASERn registers to 0 *before* probing any ITS. Although this isn't optimised at all, this is only a once-per-boot cost, which shouldn't show up on anyone's radar. Cc: Jay Chen Signed-off-by: Marc Zyngier Reviewed-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20211216190315.GA14220@lpieralisi Link: https://lore.kernel.org/r/20220124133809.1291195-1-maz@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 120 +++++++++++++++++++++++++------ 1 file changed, 99 insertions(+), 21 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 7b8f1ec0ff78..220fa45c22f4 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4856,6 +4856,38 @@ static struct syscore_ops its_syscore_ops = { .resume = its_restore_enable, }; +static void __init __iomem *its_map_one(struct resource *res, int *err) +{ + void __iomem *its_base; + u32 val; + + its_base = ioremap(res->start, SZ_64K); + if (!its_base) { + pr_warn("ITS@%pa: Unable to map ITS registers\n", &res->start); + *err = -ENOMEM; + return NULL; + } + + val = readl_relaxed(its_base + GITS_PIDR2) & GIC_PIDR2_ARCH_MASK; + if (val != 0x30 && val != 0x40) { + pr_warn("ITS@%pa: No ITS detected, giving up\n", &res->start); + *err = -ENODEV; + goto out_unmap; + } + + *err = its_force_quiescent(its_base); + if (*err) { + pr_warn("ITS@%pa: Failed to quiesce, giving up\n", &res->start); + goto out_unmap; + } + + return its_base; + +out_unmap: + iounmap(its_base); + return NULL; +} + static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) { struct irq_domain *inner_domain; @@ -4963,29 +4995,14 @@ static int __init its_probe_one(struct resource *res, { struct its_node *its; void __iomem *its_base; - u32 val, ctlr; u64 baser, tmp, typer; struct page *page; + u32 ctlr; int err; - its_base = ioremap(res->start, SZ_64K); - if (!its_base) { - pr_warn("ITS@%pa: Unable to map ITS registers\n", &res->start); - return -ENOMEM; - } - - val = readl_relaxed(its_base + GITS_PIDR2) & GIC_PIDR2_ARCH_MASK; - if (val != 0x30 && val != 0x40) { - pr_warn("ITS@%pa: No ITS detected, giving up\n", &res->start); - err = -ENODEV; - goto out_unmap; - } - - err = its_force_quiescent(its_base); - if (err) { - pr_warn("ITS@%pa: Failed to quiesce, giving up\n", &res->start); - goto out_unmap; - } + its_base = its_map_one(res, &err); + if (!its_base) + return err; pr_info("ITS %pR\n", res); @@ -5249,6 +5266,23 @@ out: return ret; } +/* Mark all the BASER registers as invalid before they get reprogrammed */ +static int __init its_reset_one(struct resource *res) +{ + void __iomem *its_base; + int err, i; + + its_base = its_map_one(res, &err); + if (!its_base) + return err; + + for (i = 0; i < GITS_BASER_NR_REGS; i++) + gits_write_baser(0, its_base + GITS_BASER + (i << 3)); + + iounmap(its_base); + return 0; +} + static const struct of_device_id its_device_id[] = { { .compatible = "arm,gic-v3-its", }, {}, @@ -5259,6 +5293,26 @@ static int __init its_of_probe(struct device_node *node) struct device_node *np; struct resource res; + /* + * Make sure *all* the ITS are reset before we probe any, as + * they may be sharing memory. If any of the ITS fails to + * reset, don't even try to go any further, as this could + * result in something even worse. + */ + for (np = of_find_matching_node(node, its_device_id); np; + np = of_find_matching_node(np, its_device_id)) { + int err; + + if (!of_device_is_available(np) || + !of_property_read_bool(np, "msi-controller") || + of_address_to_resource(np, 0, &res)) + continue; + + err = its_reset_one(&res); + if (err) + return err; + } + for (np = of_find_matching_node(node, its_device_id); np; np = of_find_matching_node(np, its_device_id)) { if (!of_device_is_available(np)) @@ -5421,11 +5475,35 @@ dom_err: return err; } +static int __init its_acpi_reset(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_generic_translator *its_entry; + struct resource res; + + its_entry = (struct acpi_madt_generic_translator *)header; + res = (struct resource) { + .start = its_entry->base_address, + .end = its_entry->base_address + ACPI_GICV3_ITS_MEM_SIZE - 1, + .flags = IORESOURCE_MEM, + }; + + return its_reset_one(&res); +} + static void __init its_acpi_probe(void) { acpi_table_parse_srat_its(); - acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR, - gic_acpi_parse_madt_its, 0); + /* + * Make sure *all* the ITS are reset before we probe any, as + * they may be sharing memory. If any of the ITS fails to + * reset, don't even try to go any further, as this could + * result in something even worse. + */ + if (acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR, + its_acpi_reset, 0) > 0) + acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR, + gic_acpi_parse_madt_its, 0); acpi_its_srat_maps_free(); } #else From 825911492eb15bf8bb7fb94bc0c0421fe7a6327d Mon Sep 17 00:00:00 2001 From: Sing-Han Chen Date: Wed, 12 Jan 2022 17:41:43 +0800 Subject: [PATCH 151/367] ucsi_ccg: Check DEV_INT bit only when starting CCG4 CCGx clears Bit 0:Device Interrupt in the INTR_REG if CCGx is reset successfully. However, there might be a chance that other bits in INTR_REG are not cleared due to internal data queued in PPM. This case misleads the driver that CCGx reset failed. The commit checks bit 0 in INTR_REG and ignores other bits. The ucsi driver would reset PPM later. Fixes: 247c554a14aa ("usb: typec: ucsi: add support for Cypress CCGx") Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Signed-off-by: Sing-Han Chen Signed-off-by: Wayne Chang Link: https://lore.kernel.org/r/20220112094143.628610-1-waynec@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_ccg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index bff96d64dddf..6db7c8ddd51c 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -325,7 +325,7 @@ static int ucsi_ccg_init(struct ucsi_ccg *uc) if (status < 0) return status; - if (!data) + if (!(data & DEV_INT)) return 0; status = ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data)); From 904edf8aeb459697129be5fde847e2a502f41fd9 Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Sat, 22 Jan 2022 08:33:22 +0530 Subject: [PATCH 152/367] usb: gadget: f_sourcesink: Fix isoc transfer for USB_SPEED_SUPER_PLUS Currently when gadget enumerates in super speed plus, the isoc endpoint request buffer size is not calculated correctly. Fix this by checking the gadget speed against USB_SPEED_SUPER_PLUS and update the request buffer size. Fixes: 90c4d05780d4 ("usb: fix various gadgets null ptr deref on 10gbps cabling.") Cc: stable Signed-off-by: Pavankumar Kondeti Link: https://lore.kernel.org/r/1642820602-20619-1-git-send-email-quic_pkondeti@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_sourcesink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c index 1abf08e5164a..6803cd60cc6d 100644 --- a/drivers/usb/gadget/function/f_sourcesink.c +++ b/drivers/usb/gadget/function/f_sourcesink.c @@ -584,6 +584,7 @@ static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in, if (is_iso) { switch (speed) { + case USB_SPEED_SUPER_PLUS: case USB_SPEED_SUPER: size = ss->isoc_maxpacket * (ss->isoc_mult + 1) * From 2e3dd4a6246945bf84ea6f478365d116e661554c Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Mon, 17 Jan 2022 15:00:39 +0000 Subject: [PATCH 153/367] usb: common: ulpi: Fix crash in ulpi_match() Commit 7495af930835 ("ARM: multi_v7_defconfig: Enable drivers for DragonBoard 410c") enables the CONFIG_PHY_QCOM_USB_HS for the ARM multi_v7_defconfig. Enabling this Kconfig is causing the kernel to crash on the Tegra20 Ventana platform in the ulpi_match() function. The Qualcomm USB HS PHY driver that is enabled by CONFIG_PHY_QCOM_USB_HS, registers a ulpi_driver but this driver does not provide an 'id_table', so when ulpi_match() is called on the Tegra20 Ventana platform, it crashes when attempting to deference the id_table pointer which is not valid. The Qualcomm USB HS PHY driver uses device-tree for matching the ULPI driver with the device and so fix this crash by using device-tree for matching if the id_table is not valid. Fixes: ef6a7bcfb01c ("usb: ulpi: Support device discovery via DT") Cc: stable Signed-off-by: Jon Hunter Link: https://lore.kernel.org/r/20220117150039.44058-1-jonathanh@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/ulpi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 4169cf40a03b..8f8405b0d608 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -39,8 +39,11 @@ static int ulpi_match(struct device *dev, struct device_driver *driver) struct ulpi *ulpi = to_ulpi_dev(dev); const struct ulpi_device_id *id; - /* Some ULPI devices don't have a vendor id so rely on OF match */ - if (ulpi->id.vendor == 0) + /* + * Some ULPI devices don't have a vendor id + * or provide an id_table so rely on OF match. + */ + if (ulpi->id.vendor == 0 || !drv->id_table) return of_driver_match_device(dev, driver); for (id = drv->id_table; id->vendor; id++) From 9df478463d9feb90dae24f183383961cf123a0ec Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 10 Jan 2022 11:27:38 -0600 Subject: [PATCH 154/367] usb: xhci-plat: fix crash when suspend if remote wake enable Crashed at i.mx8qm platform when suspend if enable remote wakeup Internal error: synchronous external abort: 96000210 [#1] PREEMPT SMP Modules linked in: CPU: 2 PID: 244 Comm: kworker/u12:6 Not tainted 5.15.5-dirty #12 Hardware name: Freescale i.MX8QM MEK (DT) Workqueue: events_unbound async_run_entry_fn pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : xhci_disable_hub_port_wake.isra.62+0x60/0xf8 lr : xhci_disable_hub_port_wake.isra.62+0x34/0xf8 sp : ffff80001394bbf0 x29: ffff80001394bbf0 x28: 0000000000000000 x27: ffff00081193b578 x26: ffff00081193b570 x25: 0000000000000000 x24: 0000000000000000 x23: ffff00081193a29c x22: 0000000000020001 x21: 0000000000000001 x20: 0000000000000000 x19: ffff800014e90490 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000002 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000960 x9 : ffff80001394baa0 x8 : ffff0008145d1780 x7 : ffff0008f95b8e80 x6 : 000000001853b453 x5 : 0000000000000496 x4 : 0000000000000000 x3 : ffff00081193a29c x2 : 0000000000000001 x1 : 0000000000000000 x0 : ffff000814591620 Call trace: xhci_disable_hub_port_wake.isra.62+0x60/0xf8 xhci_suspend+0x58/0x510 xhci_plat_suspend+0x50/0x78 platform_pm_suspend+0x2c/0x78 dpm_run_callback.isra.25+0x50/0xe8 __device_suspend+0x108/0x3c0 The basic flow: 1. run time suspend call xhci_suspend, xhci parent devices gate the clock. 2. echo mem >/sys/power/state, system _device_suspend call xhci_suspend 3. xhci_suspend call xhci_disable_hub_port_wake, which access register, but clock already gated by run time suspend. This problem was hidden by power domain driver, which call run time resume before it. But the below commit remove it and make this issue happen. commit c1df456d0f06e ("PM: domains: Don't runtime resume devices at genpd_prepare()") This patch call run time resume before suspend to make sure clock is on before access register. Reviewed-by: Peter Chen Cc: stable Signed-off-by: Frank Li Testeb-by: Abel Vesa Link: https://lore.kernel.org/r/20220110172738.31686-1-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index c1edcc9b13ce..dc570ce4e831 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -437,6 +437,9 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev) struct xhci_hcd *xhci = hcd_to_xhci(hcd); int ret; + if (pm_runtime_suspended(dev)) + pm_runtime_resume(dev); + ret = xhci_priv_suspend_quirk(hcd); if (ret) return ret; From 9678f3361afc27a3124cd2824aec0227739986fb Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 25 Jan 2022 18:02:50 -0600 Subject: [PATCH 155/367] usb: dwc3: xilinx: Skip resets and USB3 register settings for USB2.0 mode It appears that the PIPE clock should not be selected when only USB 2.0 is being used in the design and no USB 3.0 reference clock is used. Also, the core resets are not required if a USB3 PHY is not in use, and will break things if USB3 is actually used but the PHY entry is not listed in the device tree. Skip core resets and register settings that are only required for USB3 mode when no USB3 PHY is specified in the device tree. Fixes: 84770f028fab ("usb: dwc3: Add driver for Xilinx platforms") Cc: stable Signed-off-by: Robert Hancock Link: https://lore.kernel.org/r/20220126000253.1586760-2-robert.hancock@calian.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-xilinx.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index 9cc3ad701a29..06b591b14b09 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -110,6 +110,18 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) usb3_phy = NULL; } + /* + * The following core resets are not required unless a USB3 PHY + * is used, and the subsequent register settings are not required + * unless a core reset is performed (they should be set properly + * by the first-stage boot loader, but may be reverted by a core + * reset). They may also break the configuration if USB3 is actually + * in use but the usb3-phy entry is missing from the device tree. + * Therefore, skip these operations in this case. + */ + if (!usb3_phy) + goto skip_usb3_phy; + crst = devm_reset_control_get_exclusive(dev, "usb_crst"); if (IS_ERR(crst)) { ret = PTR_ERR(crst); @@ -188,6 +200,7 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) goto err; } +skip_usb3_phy: /* * This routes the USB DMA traffic to go through FPD path instead * of reaching DDR directly. This traffic routing is needed to From 2cc9b1c93b1c4caa2d971856c0780fb5f7d04692 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 25 Jan 2022 18:02:51 -0600 Subject: [PATCH 156/367] usb: dwc3: xilinx: Fix error handling when getting USB3 PHY The code that looked up the USB3 PHY was ignoring all errors other than EPROBE_DEFER in an attempt to handle the PHY not being present. Fix and simplify the code by using devm_phy_optional_get and dev_err_probe so that a missing PHY is not treated as an error and unexpected errors are handled properly. Fixes: 84770f028fab ("usb: dwc3: Add driver for Xilinx platforms") Cc: stable Signed-off-by: Robert Hancock Link: https://lore.kernel.org/r/20220126000253.1586760-3-robert.hancock@calian.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-xilinx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index 06b591b14b09..e14ac15e24c3 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -102,12 +102,12 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) int ret; u32 reg; - usb3_phy = devm_phy_get(dev, "usb3-phy"); - if (PTR_ERR(usb3_phy) == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; + usb3_phy = devm_phy_optional_get(dev, "usb3-phy"); + if (IS_ERR(usb3_phy)) { + ret = PTR_ERR(usb3_phy); + dev_err_probe(dev, ret, + "failed to get USB3 PHY\n"); goto err; - } else if (IS_ERR(usb3_phy)) { - usb3_phy = NULL; } /* From 26d81b29249273d39e753cc0c7b0ca62c6a6283f Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Wed, 19 Jan 2022 10:08:49 +0800 Subject: [PATCH 157/367] usb: gadget: at91_udc: fix incorrect print type Fix a build error observed with ARCH=arm DEFCONFIG=allmodconfig build. drivers/usb/gadget/udc/at91_udc.h:174:42: error: format '%d' expects argument of type 'int', but argument 3 has type 'struct gpio_desc *' [-Werror=format=] Fixes: 4a555f2b8d31 ("usb: gadget: at91_udc: Convert to GPIO descriptors") Reviewed-by: Macpaul Lin Signed-off-by: Miles Chen Link: https://lore.kernel.org/r/20220119020849.25732-1-miles.chen@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/at91_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/at91_udc.c b/drivers/usb/gadget/udc/at91_udc.c index dd0819df096e..9040a0561466 100644 --- a/drivers/usb/gadget/udc/at91_udc.c +++ b/drivers/usb/gadget/udc/at91_udc.c @@ -1895,7 +1895,7 @@ static int at91udc_probe(struct platform_device *pdev) at91_vbus_irq, 0, driver_name, udc); if (retval) { DBG("request vbus irq %d failed\n", - udc->board.vbus_pin); + desc_to_gpio(udc->board.vbus_pin)); goto err_unprepare_iclk; } } From ac55d163855924aa5af9f1560977da8f346963c8 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Tue, 7 Dec 2021 14:01:01 +0100 Subject: [PATCH 158/367] usb: dwc2: gadget: don't try to disable ep0 in dwc2_hsotg_suspend Calling dwc2_hsotg_ep_disable on ep0 (in/out) will lead to the following logs before returning -EINVAL: dwc2 49000000.usb-otg: dwc2_hsotg_ep_disable: called for ep0 dwc2 49000000.usb-otg: dwc2_hsotg_ep_disable: called for ep0 To avoid these two logs while suspending, start disabling the endpoint from the index 1, as done in dwc2_hsotg_udc_stop: /* all endpoints should be shutdown */ for (ep = 1; ep < hsotg->num_of_eps; ep++) { if (hsotg->eps_in[ep]) dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep); if (hsotg->eps_out[ep]) dwc2_hsotg_ep_disable_lock(&hsotg->eps_out[ep]->ep); } Acked-by: Minas Harutyunyan Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20211207130101.270314-1-amelie.delaunay@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 2bc03f41c70a..eee3504397e6 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -5097,7 +5097,7 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg) hsotg->gadget.speed = USB_SPEED_UNKNOWN; spin_unlock_irqrestore(&hsotg->lock, flags); - for (ep = 0; ep < hsotg->num_of_eps; ep++) { + for (ep = 1; ep < hsotg->num_of_eps; ep++) { if (hsotg->eps_in[ep]) dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep); if (hsotg->eps_out[ep]) From 90cafce461de108bfb07c06148395dc86c3fcd23 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Mon, 6 Dec 2021 18:19:31 +0800 Subject: [PATCH 159/367] spi: change clk_disable_unprepare to clk_unprepare The corresponding API for clk_prepare is clk_unprepare, other than clk_disable_unprepare. Fix this by changing clk_disable_unprepare to clk_unprepare. Fixes: 5762ab71eb24 ("spi: Add support for Armada 3700 SPI Controller") Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20211206101931.2816597-1-mudongliangabcd@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-armada-3700.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 46feafe4e201..d8cc4b270644 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -901,7 +901,7 @@ static int a3700_spi_probe(struct platform_device *pdev) return 0; error_clk: - clk_disable_unprepare(spi->clk); + clk_unprepare(spi->clk); error: spi_master_put(master); out: From 23e3404de1aecc62c14ac96d4b63403c3e0f52d5 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Wed, 22 Dec 2021 13:48:12 +0900 Subject: [PATCH 160/367] spi: uniphier: Fix a bug that doesn't point to private data correctly In uniphier_spi_remove(), there is a wrong code to get private data from the platform device, so the driver can't be removed properly. The driver should get spi_master from the platform device and retrieve the private data from it. Cc: Fixes: 5ba155a4d4cc ("spi: add SPI controller driver for UniPhier SoC") Signed-off-by: Kunihiko Hayashi Link: https://lore.kernel.org/r/1640148492-32178-1-git-send-email-hayashi.kunihiko@socionext.com Signed-off-by: Mark Brown --- drivers/spi/spi-uniphier.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c index 8900e51e1a1c..342ee8d2c476 100644 --- a/drivers/spi/spi-uniphier.c +++ b/drivers/spi/spi-uniphier.c @@ -767,12 +767,13 @@ out_master_put: static int uniphier_spi_remove(struct platform_device *pdev) { - struct uniphier_spi_priv *priv = platform_get_drvdata(pdev); + struct spi_master *master = platform_get_drvdata(pdev); + struct uniphier_spi_priv *priv = spi_master_get_devdata(master); - if (priv->master->dma_tx) - dma_release_channel(priv->master->dma_tx); - if (priv->master->dma_rx) - dma_release_channel(priv->master->dma_rx); + if (master->dma_tx) + dma_release_channel(master->dma_tx); + if (master->dma_rx) + dma_release_channel(master->dma_rx); clk_disable_unprepare(priv->clk); From 79aa3e19fe8f5be30e846df8a436bfe306e8b1a6 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 11 Jan 2022 10:07:37 +0100 Subject: [PATCH 161/367] usb: cdnsp: Fix segmentation fault in cdns_lost_power function CDNSP driver read not initialized cdns->otg_v0_regs which lead to segmentation fault. Patch fixes this issue. Fixes: 2cf2581cd229 ("usb: cdns3: add power lost support for system resume") cc: Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20220111090737.10345-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/drd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c index 55c73b1d8704..d00ff98dffab 100644 --- a/drivers/usb/cdns3/drd.c +++ b/drivers/usb/cdns3/drd.c @@ -483,11 +483,11 @@ int cdns_drd_exit(struct cdns *cdns) /* Indicate the cdns3 core was power lost before */ bool cdns_power_is_lost(struct cdns *cdns) { - if (cdns->version == CDNS3_CONTROLLER_V1) { - if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0))) + if (cdns->version == CDNS3_CONTROLLER_V0) { + if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0))) return true; } else { - if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0))) + if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0))) return true; } return false; From e937440f7fc444a3e3f1fb75ea65292d6f433a44 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 26 Jan 2022 11:04:47 +0000 Subject: [PATCH 162/367] spi: meson-spicc: add IRQ check in meson_spicc_probe This check misses checking for platform_get_irq()'s call and may passes the negative error codes to devm_request_irq(), which takes unsigned IRQ #, causing it to fail with -EINVAL, overriding an original error code. Stop calling devm_request_irq() with invalid IRQ #s. Fixes: 454fa271bc4e ("spi: Add Meson SPICC driver") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220126110447.24549-1-linmq006@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-meson-spicc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index c208efeadd18..0bc7daa7afc8 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -693,6 +693,11 @@ static int meson_spicc_probe(struct platform_device *pdev) writel_relaxed(0, spicc->base + SPICC_INTREG); irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto out_master; + } + ret = devm_request_irq(&pdev->dev, irq, meson_spicc_irq, 0, NULL, spicc); if (ret) { From 6a7b9f002eca6788d346c16a6ff0c218b41f8d1d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 26 Jan 2022 14:33:58 +0100 Subject: [PATCH 163/367] Revert "tty: serial: Use fifo in 8250 console driver" This reverts commit 5021d709b31b8a14317998a33cbc78be0de9ab30. The patch is still a bit buggy, and this breaks some other hardware types. It needs to be resubmitted in a non-buggy way, and make sure the other hardware types also continue to work properly. Fixes: 5021d709b31b ("tty: serial: Use fifo in 8250 console driver") Reported-by: Sebastian Andrzej Siewior Reported-by: Jon Hunter Link: https://lore.kernel.org/r/Ye/1+Z8mEzbKbrqG@linutronix.de Link: https://lore.kernel.org/r/a1ac6254-f79e-d131-fa2a-c7ad714c6d4a@nvidia.com Cc: Wander Lairson Costa Cc: Andy Shevchenko Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 61 +++-------------------------- 1 file changed, 6 insertions(+), 55 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 2abb3de11a48..3b12bfc1ed67 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2056,7 +2056,10 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state) serial8250_rpm_put(up); } -static void wait_for_lsr(struct uart_8250_port *up, int bits) +/* + * Wait for transmitter & holding register to empty + */ +static void wait_for_xmitr(struct uart_8250_port *up, int bits) { unsigned int status, tmout = 10000; @@ -2073,16 +2076,6 @@ static void wait_for_lsr(struct uart_8250_port *up, int bits) udelay(1); touch_nmi_watchdog(); } -} - -/* - * Wait for transmitter & holding register to empty - */ -static void wait_for_xmitr(struct uart_8250_port *up, int bits) -{ - unsigned int tmout; - - wait_for_lsr(up, bits); /* Wait up to 1s for flow control if necessary */ if (up->port.flags & UPF_CONS_FLOW) { @@ -3332,35 +3325,6 @@ static void serial8250_console_restore(struct uart_8250_port *up) serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS); } -/* - * Print a string to the serial port using the device FIFO - * - * It sends fifosize bytes and then waits for the fifo - * to get empty. - */ -static void serial8250_console_fifo_write(struct uart_8250_port *up, - const char *s, unsigned int count) -{ - int i; - const char *end = s + count; - unsigned int fifosize = up->port.fifosize; - bool cr_sent = false; - - while (s != end) { - wait_for_lsr(up, UART_LSR_THRE); - - for (i = 0; i < fifosize && s != end; ++i) { - if (*s == '\n' && !cr_sent) { - serial_out(up, UART_TX, '\r'); - cr_sent = true; - } else { - serial_out(up, UART_TX, *s++); - cr_sent = false; - } - } - } -} - /* * Print a string to the serial port trying not to disturb * any possible real use of the port... @@ -3376,7 +3340,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, struct uart_8250_em485 *em485 = up->em485; struct uart_port *port = &up->port; unsigned long flags; - unsigned int ier, use_fifo; + unsigned int ier; int locked = 1; touch_nmi_watchdog(); @@ -3408,20 +3372,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, mdelay(port->rs485.delay_rts_before_send); } - use_fifo = (up->capabilities & UART_CAP_FIFO) && - port->fifosize > 1 && - (serial_port_in(port, UART_FCR) & UART_FCR_ENABLE_FIFO) && - /* - * After we put a data in the fifo, the controller will send - * it regardless of the CTS state. Therefore, only use fifo - * if we don't use control flow. - */ - !(up->port.flags & UPF_CONS_FLOW); - - if (likely(use_fifo)) - serial8250_console_fifo_write(up, s, count); - else - uart_console_write(port, s, count, serial8250_console_putchar); + uart_console_write(port, s, count, serial8250_console_putchar); /* * Finally, wait for transmitter to become empty From 592ee1197f78b30bd60c87db9b6c8c045c8d8314 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 26 Jan 2022 09:21:32 +0800 Subject: [PATCH 164/367] blk-mq: fix missing blk_account_io_done() in error path If blk_mq_request_issue_directly() failed from blk_insert_cloned_request(), the request will be accounted start. Currently, blk_insert_cloned_request() is only called by dm, and such request won't be accounted done by dm. In normal path, io will be accounted start from blk_mq_bio_to_request(), when the request is allocated, and such io will be accounted done from __blk_mq_end_request_acct() whether it succeeded or failed. Thus add blk_account_io_done() to fix the problem. Signed-off-by: Yu Kuai Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220126012132.3111551-1-yukuai3@huawei.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index f3bf3358a3bb..1adfe4824ef5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2922,6 +2922,8 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * */ blk_mq_run_dispatch_ops(rq->q, ret = blk_mq_request_issue_directly(rq, true)); + if (ret) + blk_account_io_done(rq, ktime_get_ns()); return ret; } EXPORT_SYMBOL_GPL(blk_insert_cloned_request); From 152d1afa834c84530828ee031cf07a00e0fc0b8c Mon Sep 17 00:00:00 2001 From: Cameron Williams Date: Mon, 24 Jan 2022 09:42:23 +0000 Subject: [PATCH 165/367] tty: Add support for Brainboxes UC cards. This commit adds support for the some of the Brainboxes PCI range of cards, including the UC-101, UC-235/246, UC-257, UC-268, UC-275/279, UC-302, UC-310, UC-313, UC-320/324, UC-346, UC-357, UC-368 and UC-420/431. Signed-off-by: Cameron Williams Cc: stable Link: https://lore.kernel.org/r/AM5PR0202MB2564688493F7DD9B9C610827C45E9@AM5PR0202MB2564.eurprd02.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 100 ++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index e8b5469e9dfa..e17e97ea86fa 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -4779,8 +4779,30 @@ static const struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, /* 135a.0dc0 */ pbn_b2_4_115200 }, + /* Brainboxes Devices */ /* - * BrainBoxes UC-260 + * Brainboxes UC-101 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0BA1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-235/246 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0AA1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_1_115200 }, + /* + * Brainboxes UC-257 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0861, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-260/271/701/756 */ { PCI_VENDOR_ID_INTASHIELD, 0x0D21, PCI_ANY_ID, PCI_ANY_ID, @@ -4788,7 +4810,81 @@ static const struct pci_device_id serial_pci_tbl[] = { pbn_b2_4_115200 }, { PCI_VENDOR_ID_INTASHIELD, 0x0E34, PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-268 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0841, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-275/279 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0881, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_8_115200 }, + /* + * Brainboxes UC-302 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08E1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-310 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08C1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-313 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08A3, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-320/324 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0A61, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_1_115200 }, + /* + * Brainboxes UC-346 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0B02, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-357 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0A81, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0A83, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-368 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0C41, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-420/431 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0921, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, pbn_b2_4_115200 }, /* * Perle PCI-RAS cards From f23653fe64479d96910bfda2b700b1af17c991ac Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 26 Jan 2022 09:22:54 +0000 Subject: [PATCH 166/367] tty: Partially revert the removal of the Cyclades public API Fix a user API regression introduced with commit f76edd8f7ce0 ("tty: cyclades, remove this orphan"), which removed a part of the API and caused compilation errors for user programs using said part, such as GCC 9 in its libsanitizer component[1]: .../libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc:160:10: fatal error: linux/cyclades.h: No such file or directory 160 | #include | ^~~~~~~~~~~~~~~~~~ compilation terminated. make[4]: *** [Makefile:664: sanitizer_platform_limits_posix.lo] Error 1 As the absolute minimum required bring `struct cyclades_monitor' and ioctl numbers back then so as to make the library build again. Add a preprocessor warning as to the obsolescence of the features provided. References: [1] GCC PR sanitizer/100379, "cyclades.h is removed from linux kernel header files", Fixes: f76edd8f7ce0 ("tty: cyclades, remove this orphan") Cc: stable@vger.kernel.org # v5.13+ Reviewed-by: Christoph Hellwig Signed-off-by: Maciej W. Rozycki Link: https://lore.kernel.org/r/alpine.DEB.2.20.2201260733430.11348@tpp.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/cyclades.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 include/uapi/linux/cyclades.h diff --git a/include/uapi/linux/cyclades.h b/include/uapi/linux/cyclades.h new file mode 100644 index 000000000000..6225c5aebe06 --- /dev/null +++ b/include/uapi/linux/cyclades.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_CYCLADES_H +#define _UAPI_LINUX_CYCLADES_H + +#warning "Support for features provided by this header has been removed" +#warning "Please consider updating your code" + +struct cyclades_monitor { + unsigned long int_count; + unsigned long char_count; + unsigned long char_max; + unsigned long char_last; +}; + +#define CYGETMON 0x435901 +#define CYGETTHRESH 0x435902 +#define CYSETTHRESH 0x435903 +#define CYGETDEFTHRESH 0x435904 +#define CYSETDEFTHRESH 0x435905 +#define CYGETTIMEOUT 0x435906 +#define CYSETTIMEOUT 0x435907 +#define CYGETDEFTIMEOUT 0x435908 +#define CYSETDEFTIMEOUT 0x435909 +#define CYSETRFLOW 0x43590a +#define CYGETRFLOW 0x43590b +#define CYSETRTSDTR_INV 0x43590c +#define CYGETRTSDTR_INV 0x43590d +#define CYZSETPOLLCYCLE 0x43590e +#define CYZGETPOLLCYCLE 0x43590f +#define CYGETCD1400VER 0x435910 +#define CYSETWAIT 0x435912 +#define CYGETWAIT 0x435913 + +#endif /* _UAPI_LINUX_CYCLADES_H */ From db7f19c0aa0abcb751ff0ed694a071363f702b1d Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Tue, 4 Jan 2022 17:35:45 +0100 Subject: [PATCH 167/367] tty: rpmsg: Fix race condition releasing tty port The tty_port struct is part of the rpmsg_tty_port structure. The issue is that the rpmsg_tty_port structure is freed on rpmsg_tty_remove while it is still referenced in the tty_struct. Its release is not predictable due to workqueues. For instance following ftrace shows that rpmsg_tty_close is called after rpmsg_tty_release_cport: nr_test.sh-389 [000] ..... 212.093752: rpmsg_tty_remove <-rpmsg_dev_ remove cat-1191 [001] ..... 212.095697: tty_release <-__fput nr_test.sh-389 [000] ..... 212.099166: rpmsg_tty_release_cport <-rpm sg_tty_remove cat-1191 [001] ..... 212.115352: rpmsg_tty_close <-tty_release cat-1191 [001] ..... 212.115371: release_tty <-tty_release_str As consequence, the port must be free only when user has released the TTY interface. This path : - Introduce the .destruct port tty ops function to release the allocated rpmsg_tty_port structure. - Introduce the .hangup tty ops function to call tty_port_hangup. - Manages the tty port refcounting to trig the .destruct port ops, - Introduces the rpmsg_tty_cleanup function to ensure that the TTY is removed before decreasing the port refcount. Fixes: 7c0408d80579 ("tty: add rpmsg driver") Cc: stable Signed-off-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20220104163545.34710-1-arnaud.pouliquen@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/rpmsg_tty.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/tty/rpmsg_tty.c b/drivers/tty/rpmsg_tty.c index dae2a4e44f38..29db413bbc03 100644 --- a/drivers/tty/rpmsg_tty.c +++ b/drivers/tty/rpmsg_tty.c @@ -50,10 +50,17 @@ static int rpmsg_tty_cb(struct rpmsg_device *rpdev, void *data, int len, void *p static int rpmsg_tty_install(struct tty_driver *driver, struct tty_struct *tty) { struct rpmsg_tty_port *cport = idr_find(&tty_idr, tty->index); + struct tty_port *port; tty->driver_data = cport; - return tty_port_install(&cport->port, driver, tty); + port = tty_port_get(&cport->port); + return tty_port_install(port, driver, tty); +} + +static void rpmsg_tty_cleanup(struct tty_struct *tty) +{ + tty_port_put(tty->port); } static int rpmsg_tty_open(struct tty_struct *tty, struct file *filp) @@ -106,12 +113,19 @@ static unsigned int rpmsg_tty_write_room(struct tty_struct *tty) return size; } +static void rpmsg_tty_hangup(struct tty_struct *tty) +{ + tty_port_hangup(tty->port); +} + static const struct tty_operations rpmsg_tty_ops = { .install = rpmsg_tty_install, .open = rpmsg_tty_open, .close = rpmsg_tty_close, .write = rpmsg_tty_write, .write_room = rpmsg_tty_write_room, + .hangup = rpmsg_tty_hangup, + .cleanup = rpmsg_tty_cleanup, }; static struct rpmsg_tty_port *rpmsg_tty_alloc_cport(void) @@ -137,8 +151,10 @@ static struct rpmsg_tty_port *rpmsg_tty_alloc_cport(void) return cport; } -static void rpmsg_tty_release_cport(struct rpmsg_tty_port *cport) +static void rpmsg_tty_destruct_port(struct tty_port *port) { + struct rpmsg_tty_port *cport = container_of(port, struct rpmsg_tty_port, port); + mutex_lock(&idr_lock); idr_remove(&tty_idr, cport->id); mutex_unlock(&idr_lock); @@ -146,7 +162,10 @@ static void rpmsg_tty_release_cport(struct rpmsg_tty_port *cport) kfree(cport); } -static const struct tty_port_operations rpmsg_tty_port_ops = { }; +static const struct tty_port_operations rpmsg_tty_port_ops = { + .destruct = rpmsg_tty_destruct_port, +}; + static int rpmsg_tty_probe(struct rpmsg_device *rpdev) { @@ -166,7 +185,8 @@ static int rpmsg_tty_probe(struct rpmsg_device *rpdev) cport->id, dev); if (IS_ERR(tty_dev)) { ret = dev_err_probe(dev, PTR_ERR(tty_dev), "Failed to register tty port\n"); - goto err_destroy; + tty_port_put(&cport->port); + return ret; } cport->rpdev = rpdev; @@ -177,12 +197,6 @@ static int rpmsg_tty_probe(struct rpmsg_device *rpdev) rpdev->src, rpdev->dst, cport->id); return 0; - -err_destroy: - tty_port_destroy(&cport->port); - rpmsg_tty_release_cport(cport); - - return ret; } static void rpmsg_tty_remove(struct rpmsg_device *rpdev) @@ -192,13 +206,11 @@ static void rpmsg_tty_remove(struct rpmsg_device *rpdev) dev_dbg(&rpdev->dev, "Removing rpmsg tty device %d\n", cport->id); /* User hang up to release the tty */ - if (tty_port_initialized(&cport->port)) - tty_port_tty_hangup(&cport->port, false); + tty_port_tty_hangup(&cport->port, false); tty_unregister_device(rpmsg_tty_driver, cport->id); - tty_port_destroy(&cport->port); - rpmsg_tty_release_cport(cport); + tty_port_put(&cport->port); } static struct rpmsg_device_id rpmsg_driver_tty_id_table[] = { From d06b1cf28297e27127d3da54753a3a01a2fa2f28 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Wed, 12 Jan 2022 13:42:14 -0600 Subject: [PATCH 168/367] serial: 8250: of: Fix mapped region size when using reg-offset property 8250_of supports a reg-offset property which is intended to handle cases where the device registers start at an offset inside the region of memory allocated to the device. The Xilinx 16550 UART, for which this support was initially added, requires this. However, the code did not adjust the overall size of the mapped region accordingly, causing the driver to request an area of memory past the end of the device's allocation. For example, if the UART was allocated an address of 0xb0130000, size of 0x10000 and reg-offset of 0x1000 in the device tree, the region of memory reserved was b0131000-b0140fff, which caused the driver for the region starting at b0140000 to fail to probe. Fix this by subtracting reg-offset from the mapped region size. Fixes: b912b5e2cfb3 ([POWERPC] Xilinx: of_serial support for Xilinx uart 16550.) Cc: stable Signed-off-by: Robert Hancock Link: https://lore.kernel.org/r/20220112194214.881844-1-robert.hancock@calian.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_of.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index bce28729dd7b..be8626234627 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -83,8 +83,17 @@ static int of_platform_serial_setup(struct platform_device *ofdev, port->mapsize = resource_size(&resource); /* Check for shifted address mapping */ - if (of_property_read_u32(np, "reg-offset", &prop) == 0) + if (of_property_read_u32(np, "reg-offset", &prop) == 0) { + if (prop >= port->mapsize) { + dev_warn(&ofdev->dev, "reg-offset %u exceeds region size %pa\n", + prop, &port->mapsize); + ret = -EINVAL; + goto err_unprepare; + } + port->mapbase += prop; + port->mapsize -= prop; + } port->iotype = UPIO_MEM; if (of_property_read_u32(np, "reg-io-width", &prop) == 0) { From 8838b2af23caf1ff0610caef2795d6668a013b2d Mon Sep 17 00:00:00 2001 From: "daniel.starke@siemens.com" Date: Thu, 20 Jan 2022 02:18:57 -0800 Subject: [PATCH 169/367] tty: n_gsm: fix SW flow control encoding/handling n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. Chapter 5.2.7.3 states that DC1 (XON) and DC3 (XOFF) are the control characters defined in ISO/IEC 646. These shall be quoted if seen in the data stream to avoid interpretation as flow control characters. ISO/IEC 646 refers to the set of ISO standards described as the ISO 7-bit coded character set for information interchange. Its final version is also known as ITU T.50. See https://www.itu.int/rec/T-REC-T.50-199209-I/en To abide the standard it is needed to quote DC1 and DC3 correctly if these are seen as data bytes and not as control characters. The current implementation already tries to enforce this but fails to catch all defined cases. 3GPP 27.010 chapter 5.2.7.3 clearly states that the most significant bit shall be ignored for DC1 and DC3 handling. The current implementation handles only the case with the most significant bit set 0. Cases in which DC1 and DC3 have the most significant bit set 1 are left unhandled. This patch fixes this by masking the data bytes with ISO_IEC_646_MASK (only the 7 least significant bits set 1) before comparing them with XON (a.k.a. DC1) and XOFF (a.k.a. DC3) when testing which byte values need quotation via byte stuffing. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220120101857.2509-1-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index ba27b274c967..0b1808e3a912 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -322,6 +322,7 @@ static int addr_cnt; #define GSM1_ESCAPE_BITS 0x20 #define XON 0x11 #define XOFF 0x13 +#define ISO_IEC_646_MASK 0x7F static const struct tty_port_operations gsm_port_ops; @@ -531,7 +532,8 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len) int olen = 0; while (len--) { if (*input == GSM1_SOF || *input == GSM1_ESCAPE - || *input == XON || *input == XOFF) { + || (*input & ISO_IEC_646_MASK) == XON + || (*input & ISO_IEC_646_MASK) == XOFF) { *output++ = GSM1_ESCAPE; *output++ = *input++ ^ GSM1_ESCAPE_BITS; olen++; From d3d079bde07e1b7deaeb57506dc0b86010121d17 Mon Sep 17 00:00:00 2001 From: Valentin Caron Date: Tue, 11 Jan 2022 17:44:40 +0100 Subject: [PATCH 170/367] serial: stm32: prevent TDR register overwrite when sending x_char When sending x_char in stm32_usart_transmit_chars(), driver can overwrite the value of TDR register by the value of x_char. If this happens, the previous value that was present in TDR register will not be sent through uart. This code checks if the previous value in TDR register is sent before writing the x_char value into register. Fixes: 48a6092fb41f ("serial: stm32-usart: Add STM32 USART Driver") Cc: stable Signed-off-by: Valentin Caron Link: https://lore.kernel.org/r/20220111164441.6178-2-valentin.caron@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 1f89ab0e49ac..c1b8828451c8 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -550,11 +550,23 @@ static void stm32_usart_transmit_chars(struct uart_port *port) struct stm32_port *stm32_port = to_stm32_port(port); const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; struct circ_buf *xmit = &port->state->xmit; + u32 isr; + int ret; if (port->x_char) { if (stm32_usart_tx_dma_started(stm32_port) && stm32_usart_tx_dma_enabled(stm32_port)) stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAT); + + /* Check that TDR is empty before filling FIFO */ + ret = + readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr, + isr, + (isr & USART_SR_TXE), + 10, 1000); + if (ret) + dev_warn(port->dev, "1 character may be erased\n"); + writel_relaxed(port->x_char, port->membase + ofs->tdr); port->x_char = 0; port->icount.tx++; From 037b91ec7729524107982e36ec4b40f9b174f7a2 Mon Sep 17 00:00:00 2001 From: Valentin Caron Date: Tue, 11 Jan 2022 17:44:41 +0100 Subject: [PATCH 171/367] serial: stm32: fix software flow control transfer x_char is ignored by stm32_usart_start_tx() when xmit buffer is empty. Fix start_tx condition to allow x_char to be sent. Fixes: 48a6092fb41f ("serial: stm32-usart: Add STM32 USART Driver") Cc: stable Signed-off-by: Erwan Le Ray Signed-off-by: Valentin Caron Link: https://lore.kernel.org/r/20220111164441.6178-3-valentin.caron@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index c1b8828451c8..9570002d07e7 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -742,7 +742,7 @@ static void stm32_usart_start_tx(struct uart_port *port) struct serial_rs485 *rs485conf = &port->rs485; struct circ_buf *xmit = &port->state->xmit; - if (uart_circ_empty(xmit)) + if (uart_circ_empty(xmit) && !port->x_char) return; if (rs485conf->flags & SER_RS485_ENABLED) { From 62f676ff7898f6c1bd26ce014564773a3dc00601 Mon Sep 17 00:00:00 2001 From: Jochen Mades Date: Sat, 23 Jan 2021 05:10:14 +0100 Subject: [PATCH 172/367] serial: pl011: Fix incorrect rs485 RTS polarity on set_mctrl Commit 8d479237727c ("serial: amba-pl011: add RS485 support") sought to keep RTS deasserted on set_mctrl if rs485 is enabled. However it did so only if deasserted RTS polarity is high. Fix it in case it's low. Fixes: 8d479237727c ("serial: amba-pl011: add RS485 support") Cc: stable@vger.kernel.org # v5.15+ Cc: Lino Sanfilippo Signed-off-by: Jochen Mades [lukas: copyedit commit message, add stable designation] Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/85fa3323ba8c307943969b7343e23f34c3e652ba.1642909284.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 1f1df46242f9..f64c475a1379 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1582,8 +1582,12 @@ static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl) container_of(port, struct uart_amba_port, port); unsigned int cr; - if (port->rs485.flags & SER_RS485_ENABLED) - mctrl &= ~TIOCM_RTS; + if (port->rs485.flags & SER_RS485_ENABLED) { + if (port->rs485.flags & SER_RS485_RTS_AFTER_SEND) + mctrl &= ~TIOCM_RTS; + else + mctrl |= TIOCM_RTS; + } cr = pl011_read(uap, REG_CR); From 2dd8a74fddd21b95dcc60a2d3c9eaec993419d69 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 23 Jan 2022 05:21:14 +0100 Subject: [PATCH 173/367] serial: core: Initialize rs485 RTS polarity already on probe RTS polarity of rs485-enabled ports is currently initialized on uart open via: tty_port_open() tty_port_block_til_ready() tty_port_raise_dtr_rts() # if (C_BAUD(tty)) uart_dtr_rts() uart_port_dtr_rts() There's at least three problems here: First, if no baud rate is set, RTS polarity is not initialized. That's the right thing to do for rs232, but not for rs485, which requires that RTS is deasserted unconditionally. Second, if the DeviceTree property "linux,rs485-enabled-at-boot-time" is present, RTS should be deasserted as early as possible, i.e. on probe. Otherwise it may remain asserted until first open. Third, even though RTS is deasserted on open and close, it may subsequently be asserted by uart_throttle(), uart_unthrottle() or uart_set_termios() because those functions aren't rs485-aware. (Only uart_tiocmset() is.) To address these issues, move RTS initialization from uart_port_dtr_rts() to uart_configure_port(). Prevent subsequent modification of RTS polarity by moving the existing rs485 check from uart_tiocmget() to uart_update_mctrl(). That way, RTS is initialized on probe and then remains unmodified unless the uart transmits data. If rs485 is enabled at runtime (instead of at boot) through a TIOCSRS485 ioctl(), RTS is initialized by the uart driver's ->rs485_config() callback and then likewise remains unmodified. The PL011 driver initializes RTS on uart open and prevents subsequent modification in its ->set_mctrl() callback. That code is obsoleted by the present commit, so drop it. Cc: Jan Kiszka Cc: Su Bao Cheng Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/2d2acaf3a69e89b7bf687c912022b11fd29dfa1e.1642909284.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 15 +------------- drivers/tty/serial/serial_core.c | 34 +++++++++++--------------------- 2 files changed, 13 insertions(+), 36 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index f64c475a1379..ba053a68529f 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1582,13 +1582,6 @@ static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl) container_of(port, struct uart_amba_port, port); unsigned int cr; - if (port->rs485.flags & SER_RS485_ENABLED) { - if (port->rs485.flags & SER_RS485_RTS_AFTER_SEND) - mctrl &= ~TIOCM_RTS; - else - mctrl |= TIOCM_RTS; - } - cr = pl011_read(uap, REG_CR); #define TIOCMBIT(tiocmbit, uartbit) \ @@ -1812,14 +1805,8 @@ static int pl011_startup(struct uart_port *port) cr &= UART011_CR_RTS | UART011_CR_DTR; cr |= UART01x_CR_UARTEN | UART011_CR_RXE; - if (port->rs485.flags & SER_RS485_ENABLED) { - if (port->rs485.flags & SER_RS485_RTS_AFTER_SEND) - cr &= ~UART011_CR_RTS; - else - cr |= UART011_CR_RTS; - } else { + if (!(port->rs485.flags & SER_RS485_ENABLED)) cr |= UART011_CR_TXE; - } pl011_write(cr, uap, REG_CR); diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index dc40c4155356..0db90be4c3bc 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -144,6 +144,11 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) unsigned long flags; unsigned int old; + if (port->rs485.flags & SER_RS485_ENABLED) { + set &= ~TIOCM_RTS; + clear &= ~TIOCM_RTS; + } + spin_lock_irqsave(&port->lock, flags); old = port->mctrl; port->mctrl = (old & ~clear) | set; @@ -157,23 +162,10 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) static void uart_port_dtr_rts(struct uart_port *uport, int raise) { - int rs485_on = uport->rs485_config && - (uport->rs485.flags & SER_RS485_ENABLED); - int RTS_after_send = !!(uport->rs485.flags & SER_RS485_RTS_AFTER_SEND); - - if (raise) { - if (rs485_on && RTS_after_send) { - uart_set_mctrl(uport, TIOCM_DTR); - uart_clear_mctrl(uport, TIOCM_RTS); - } else { - uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS); - } - } else { - unsigned int clear = TIOCM_DTR; - - clear |= (!rs485_on || RTS_after_send) ? TIOCM_RTS : 0; - uart_clear_mctrl(uport, clear); - } + if (raise) + uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS); + else + uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS); } /* @@ -1075,11 +1067,6 @@ uart_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) goto out; if (!tty_io_error(tty)) { - if (uport->rs485.flags & SER_RS485_ENABLED) { - set &= ~TIOCM_RTS; - clear &= ~TIOCM_RTS; - } - uart_update_mctrl(uport, set, clear); ret = 0; } @@ -2390,6 +2377,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, */ spin_lock_irqsave(&port->lock, flags); port->mctrl &= TIOCM_DTR; + if (port->rs485.flags & SER_RS485_ENABLED && + !(port->rs485.flags & SER_RS485_RTS_AFTER_SEND)) + port->mctrl |= TIOCM_RTS; port->ops->set_mctrl(port, port->mctrl); spin_unlock_irqrestore(&port->lock, flags); From 961c39121759ad09a89598ec4ccdd34ae0468a19 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 6 Dec 2021 11:38:40 +0000 Subject: [PATCH 174/367] perf: Always wake the parent event When using per-process mode and event inheritance is set to true, forked processes will create a new perf events via inherit_event() -> perf_event_alloc(). But these events will not have ring buffers assigned to them. Any call to wakeup will be dropped if it's called on an event with no ring buffer assigned because that's the object that holds the wakeup list. If the child event is disabled due to a call to perf_aux_output_begin() or perf_aux_output_end(), the wakeup is dropped leaving userspace hanging forever on the poll. Normally the event is explicitly re-enabled by userspace after it wakes up to read the aux data, but in this case it does not get woken up so the event remains disabled. This can be reproduced when using Arm SPE and 'stress' which forks once before running the workload. By looking at the list of aux buffers read, it's apparent that they stop after the fork: perf record -e arm_spe// -vvv -- stress -c 1 With this patch applied they continue to be printed. This behaviour doesn't happen when using systemwide or per-cpu mode. Reported-by: Ruben Ayrapetyan Signed-off-by: James Clark Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20211206113840.130802-2-james.clark@arm.com --- kernel/events/core.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 479c9e672ec4..b1c1928c0e7c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5985,6 +5985,8 @@ static void ring_buffer_attach(struct perf_event *event, struct perf_buffer *old_rb = NULL; unsigned long flags; + WARN_ON_ONCE(event->parent); + if (event->rb) { /* * Should be impossible, we set this when removing @@ -6042,6 +6044,9 @@ static void ring_buffer_wakeup(struct perf_event *event) { struct perf_buffer *rb; + if (event->parent) + event = event->parent; + rcu_read_lock(); rb = rcu_dereference(event->rb); if (rb) { @@ -6055,6 +6060,9 @@ struct perf_buffer *ring_buffer_get(struct perf_event *event) { struct perf_buffer *rb; + if (event->parent) + event = event->parent; + rcu_read_lock(); rb = rcu_dereference(event->rb); if (rb) { @@ -6763,7 +6771,7 @@ static unsigned long perf_prepare_sample_aux(struct perf_event *event, if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id())) goto out; - rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler); + rb = ring_buffer_get(sampler); if (!rb) goto out; @@ -6829,7 +6837,7 @@ static void perf_aux_sample_output(struct perf_event *event, if (WARN_ON_ONCE(!sampler || !data->aux_size)) return; - rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler); + rb = ring_buffer_get(sampler); if (!rb) return; From c5de60cd622a2607c043ba65e25a6e9998a369f9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Jan 2022 11:58:08 -0800 Subject: [PATCH 175/367] perf/core: Fix cgroup event list management The active cgroup events are managed in the per-cpu cgrp_cpuctx_list. This list is only accessed from current cpu and not protected by any locks. But from the commit ef54c1a476ae ("perf: Rework perf_event_exit_event()"), it's possible to access (actually modify) the list from another cpu. In the perf_remove_from_context(), it can remove an event from the context without an IPI when the context is not active. This is not safe with cgroup events which can have some active events in the context even if ctx->is_active is 0 at the moment. The target cpu might be in the middle of list iteration at the same time. If the event is enabled when it's about to be closed, it might call perf_cgroup_event_disable() and list_del() with the cgrp_cpuctx_list on a different cpu. This resulted in a crash due to an invalid list pointer access during the cgroup list traversal on the cpu which the event belongs to. Let's fallback to IPI to access the cgrp_cpuctx_list from that cpu. Similarly, perf_install_in_context() should use IPI for the cgroup events too. Fixes: ef54c1a476ae ("perf: Rework perf_event_exit_event()") Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124195808.2252071-1-namhyung@kernel.org --- kernel/events/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index b1c1928c0e7c..76c754e45d01 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2462,7 +2462,11 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla * event_function_call() user. */ raw_spin_lock_irq(&ctx->lock); - if (!ctx->is_active) { + /* + * Cgroup events are per-cpu events, and must IPI because of + * cgrp_cpuctx_list. + */ + if (!ctx->is_active && !is_cgroup_event(event)) { __perf_remove_from_context(event, __get_cpu_context(ctx), ctx, (void *)flags); raw_spin_unlock_irq(&ctx->lock); @@ -2895,11 +2899,14 @@ perf_install_in_context(struct perf_event_context *ctx, * perf_event_attr::disabled events will not run and can be initialized * without IPI. Except when this is the first event for the context, in * that case we need the magic of the IPI to set ctx->is_active. + * Similarly, cgroup events for the context also needs the IPI to + * manipulate the cgrp_cpuctx_list. * * The IOC_ENABLE that is sure to follow the creation of a disabled * event will issue the IPI and reprogram the hardware. */ - if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) { + if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && + ctx->nr_events && !is_cgroup_event(event)) { raw_spin_lock_irq(&ctx->lock); if (ctx->task == TASK_TOMBSTONE) { raw_spin_unlock_irq(&ctx->lock); From 7fde14d705985dd933a3d916d39daa72b1668098 Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 23 Jan 2022 01:10:04 -0800 Subject: [PATCH 176/367] drm/privacy-screen: honor acpi=off in detect_thinkpad_privacy_screen when acpi=off is provided in bootarg, kernel crash with [ 1.252739] BUG: kernel NULL pointer dereference, address: 0000000000000018 [ 1.258308] Call Trace: [ 1.258490] ? acpi_walk_namespace+0x147/0x147 [ 1.258770] acpi_get_devices+0xe4/0x137 [ 1.258921] ? drm_core_init+0xc0/0xc0 [drm] [ 1.259108] detect_thinkpad_privacy_screen+0x5e/0xa8 [drm] [ 1.259337] drm_privacy_screen_lookup_init+0xe/0xe85 [drm] The reason is that acpi_walk_namespace expects acpi related stuff initialized but in fact it wouldn't when acpi is set to off. In this case we should honor acpi=off in detect_thinkpad_privacy_screen(). Signed-off-by: Tong Zhang Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20220123091004.763775-1-ztong0001@gmail.com --- drivers/gpu/drm/drm_privacy_screen_x86.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_privacy_screen_x86.c b/drivers/gpu/drm/drm_privacy_screen_x86.c index a2cafb294ca6..e7aa74ad0b24 100644 --- a/drivers/gpu/drm/drm_privacy_screen_x86.c +++ b/drivers/gpu/drm/drm_privacy_screen_x86.c @@ -33,6 +33,9 @@ static bool __init detect_thinkpad_privacy_screen(void) unsigned long long output; acpi_status status; + if (acpi_disabled) + return false; + /* Get embedded-controller handle */ status = acpi_get_devices("PNP0C09", acpi_set_handle, NULL, &ec_handle); if (ACPI_FAILURE(status) || !ec_handle) From 37c2c83ca4f1ef4b6908181ac98e18360af89b42 Mon Sep 17 00:00:00 2001 From: Xin Xiong Date: Tue, 25 Jan 2022 18:12:15 +0800 Subject: [PATCH 177/367] spi: uniphier: fix reference count leak in uniphier_spi_probe() The issue happens in several error paths in uniphier_spi_probe(). When either dma_get_slave_caps() or devm_spi_register_master() returns an error code, the function forgets to decrease the refcount of both `dma_rx` and `dma_tx` objects, which may lead to refcount leaks. Fix it by decrementing the reference count of specific objects in those error paths. Signed-off-by: Xin Xiong Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Reviewed-by: Kunihiko Hayashi Fixes: 28d1dddc59f6 ("spi: uniphier: Add DMA transfer mode support") Link: https://lore.kernel.org/r/20220125101214.35677-1-xiongx18@fudan.edu.cn Signed-off-by: Mark Brown --- drivers/spi/spi-uniphier.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c index 342ee8d2c476..cc0da4822231 100644 --- a/drivers/spi/spi-uniphier.c +++ b/drivers/spi/spi-uniphier.c @@ -726,7 +726,7 @@ static int uniphier_spi_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "failed to get TX DMA capacities: %d\n", ret); - goto out_disable_clk; + goto out_release_dma; } dma_tx_burst = caps.max_burst; } @@ -735,7 +735,7 @@ static int uniphier_spi_probe(struct platform_device *pdev) if (IS_ERR_OR_NULL(master->dma_rx)) { if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER) { ret = -EPROBE_DEFER; - goto out_disable_clk; + goto out_release_dma; } master->dma_rx = NULL; dma_rx_burst = INT_MAX; @@ -744,7 +744,7 @@ static int uniphier_spi_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "failed to get RX DMA capacities: %d\n", ret); - goto out_disable_clk; + goto out_release_dma; } dma_rx_burst = caps.max_burst; } @@ -753,10 +753,20 @@ static int uniphier_spi_probe(struct platform_device *pdev) ret = devm_spi_register_master(&pdev->dev, master); if (ret) - goto out_disable_clk; + goto out_release_dma; return 0; +out_release_dma: + if (!IS_ERR_OR_NULL(master->dma_rx)) { + dma_release_channel(master->dma_rx); + master->dma_rx = NULL; + } + if (!IS_ERR_OR_NULL(master->dma_tx)) { + dma_release_channel(master->dma_tx); + master->dma_tx = NULL; + } + out_disable_clk: clk_disable_unprepare(priv->clk); From 66d28b21fe6b3da8d1e9f0a7ba38bc61b6c547e1 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 26 Jan 2022 09:40:01 -0600 Subject: [PATCH 178/367] PCI/sysfs: Find shadow ROM before static attribute initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ville reported that the sysfs "rom" file for VGA devices disappeared after 527139d738d7 ("PCI/sysfs: Convert "rom" to static attribute"). Prior to 527139d738d7, FINAL fixups, including pci_fixup_video() where we find shadow ROMs, were run before pci_create_sysfs_dev_files() created the sysfs "rom" file. After 527139d738d7, "rom" is a static attribute and is created before FINAL fixups are run, so we didn't create "rom" files for shadow ROMs: acpi_pci_root_add ... pci_scan_single_device pci_device_add pci_fixup_video # <-- new HEADER fixup device_add ... if (grp->is_visible()) pci_dev_rom_attr_is_visible # after 527139d738d7 pci_bus_add_devices pci_bus_add_device pci_fixup_device(pci_fixup_final) pci_fixup_video # <-- previous FINAL fixup pci_create_sysfs_dev_files if (pci_resource_len(pdev, PCI_ROM_RESOURCE)) sysfs_create_bin_file("rom") # before 527139d738d7 Change pci_fixup_video() to be a HEADER fixup so it runs before sysfs static attributes are initialized. Rename the Loongson pci_fixup_radeon() to pci_fixup_video() and make its dmesg logging identical to the others since it is doing the same job. Link: https://lore.kernel.org/r/YbxqIyrkv3GhZVxx@intel.com Fixes: 527139d738d7 ("PCI/sysfs: Convert "rom" to static attribute") Link: https://lore.kernel.org/r/20220126154001.16895-1-helgaas@kernel.org Reported-by: Ville Syrjälä Tested-by: Ville Syrjälä Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v5.13+ Cc: Huacai Chen Cc: Jiaxun Yang Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Krzysztof Wilczyński --- arch/ia64/pci/fixup.c | 4 ++-- arch/mips/loongson64/vbios_quirk.c | 9 ++++----- arch/x86/pci/fixup.c | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c index acb55a41260d..2bcdd7d3a1ad 100644 --- a/arch/ia64/pci/fixup.c +++ b/arch/ia64/pci/fixup.c @@ -76,5 +76,5 @@ static void pci_fixup_video(struct pci_dev *pdev) } } } -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); +DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); diff --git a/arch/mips/loongson64/vbios_quirk.c b/arch/mips/loongson64/vbios_quirk.c index 9a29e94d3db1..3115d4de982c 100644 --- a/arch/mips/loongson64/vbios_quirk.c +++ b/arch/mips/loongson64/vbios_quirk.c @@ -3,7 +3,7 @@ #include #include -static void pci_fixup_radeon(struct pci_dev *pdev) +static void pci_fixup_video(struct pci_dev *pdev) { struct resource *res = &pdev->resource[PCI_ROM_RESOURCE]; @@ -22,8 +22,7 @@ static void pci_fixup_radeon(struct pci_dev *pdev) res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | IORESOURCE_PCI_FIXED; - dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n", - PCI_ROM_RESOURCE, res); + dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n", res); } -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, 0x9615, - PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_radeon); +DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, 0x9615, + PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 2edd86649468..615a76d70019 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -353,8 +353,8 @@ static void pci_fixup_video(struct pci_dev *pdev) } } } -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); +DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); static const struct dmi_system_id msi_k8t_dmi_table[] = { From c80d401c52a2d1baf2a5afeb06f0ffe678e56d23 Mon Sep 17 00:00:00 2001 From: Tianchen Ding Date: Tue, 18 Jan 2022 18:05:18 +0800 Subject: [PATCH 179/367] cpuset: Fix the bug that subpart_cpus updated wrongly in update_cpumask() subparts_cpus should be limited as a subset of cpus_allowed, but it is updated wrongly by using cpumask_andnot(). Use cpumask_and() instead to fix it. Fixes: ee8dde0cd2ce ("cpuset: Add new v2 cpuset.sched.partition flag") Signed-off-by: Tianchen Ding Reviewed-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index bb3531e7fda7..804ff5738c5f 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1635,8 +1635,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * Make sure that subparts_cpus is a subset of cpus_allowed. */ if (cs->nr_subparts_cpus) { - cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus, - cs->cpus_allowed); + cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed); cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); } spin_unlock_irq(&callback_lock); From 519669cc58368385db93fd1560c09bf3334a6ecc Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 24 Jan 2022 16:43:59 -0800 Subject: [PATCH 180/367] KVM: VMX: Remove vmcs_config.order The maximum size of a VMCS (or VMXON region) is 4096. By definition, these are order 0 allocations. Signed-off-by: Jim Mattson Message-Id: <20220125004359.147600-1-jmattson@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/capabilities.h | 1 - arch/x86/kvm/vmx/vmx.c | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 959b59d13b5a..3f430e218375 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -54,7 +54,6 @@ struct nested_vmx_msrs { struct vmcs_config { int size; - int order; u32 basic_cap; u32 revision_id; u32 pin_based_exec_ctrl; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2833b095a804..a6446eb935df 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2603,7 +2603,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, return -EIO; vmcs_conf->size = vmx_msr_high & 0x1fff; - vmcs_conf->order = get_order(vmcs_conf->size); vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; vmcs_conf->revision_id = vmx_msr_low; @@ -2628,7 +2627,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) struct page *pages; struct vmcs *vmcs; - pages = __alloc_pages_node(node, flags, vmcs_config.order); + pages = __alloc_pages_node(node, flags, 0); if (!pages) return NULL; vmcs = page_address(pages); @@ -2647,7 +2646,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) void free_vmcs(struct vmcs *vmcs) { - free_pages((unsigned long)vmcs, vmcs_config.order); + free_page((unsigned long)vmcs); } /* From 35fe7cfbab2e81f1afb23fc4212210b1de6d9633 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 25 Jan 2022 01:17:00 -0800 Subject: [PATCH 181/367] KVM: LAPIC: Also cancel preemption timer during SET_LAPIC The below warning is splatting during guest reboot. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1931 at arch/x86/kvm/x86.c:10322 kvm_arch_vcpu_ioctl_run+0x874/0x880 [kvm] CPU: 0 PID: 1931 Comm: qemu-system-x86 Tainted: G I 5.17.0-rc1+ #5 RIP: 0010:kvm_arch_vcpu_ioctl_run+0x874/0x880 [kvm] Call Trace: kvm_vcpu_ioctl+0x279/0x710 [kvm] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fd39797350b This can be triggered by not exposing tsc-deadline mode and doing a reboot in the guest. The lapic_shutdown() function which is called in sys_reboot path will not disarm the flying timer, it just masks LVTT. lapic_shutdown() clears APIC state w/ LVT_MASKED and timer-mode bit is 0, this can trigger timer-mode switch between tsc-deadline and oneshot/periodic, which can result in preemption timer be cancelled in apic_update_lvtt(). However, We can't depend on this when not exposing tsc-deadline mode and oneshot/periodic modes emulated by preemption timer. Qemu will synchronise states around reset, let's cancel preemption timer under KVM_SET_LAPIC. Signed-off-by: Wanpeng Li Message-Id: <1643102220-35667-1-git-send-email-wanpengli@tencent.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index baca9fa37a91..4662469240bc 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2629,7 +2629,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) kvm_apic_set_version(vcpu); apic_update_ppr(apic); - hrtimer_cancel(&apic->lapic_timer.timer); + cancel_apic_timer(apic); apic->lapic_timer.expired_tscdeadline = 0; apic_update_lvtt(apic); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); From 1ffce0924a8c86cf0590c039cd5f5c9375d32e9b Mon Sep 17 00:00:00 2001 From: Like Xu Date: Tue, 25 Jan 2022 19:52:23 +0800 Subject: [PATCH 182/367] KVM: x86/cpuid: Exclude unpermitted xfeatures sizes at KVM_GET_SUPPORTED_CPUID With the help of xstate_get_guest_group_perm(), KVM can exclude unpermitted xfeatures in cpuid.0xd.0.eax, in which case the corresponding xfeatures sizes should also be matched to the permitted xfeatures. To fix this inconsistency, the permitted_xcr0 and permitted_xss are defined consistently, which implies 'supported' plus certain permissions for this task, and it also fixes cpuid.0xd.1.ebx and later leaf-by-leaf queries. Fixes: 445ecdf79be0 ("kvm: x86: Exclude unpermitted xfeatures at KVM_GET_SUPPORTED_CPUID") Signed-off-by: Like Xu Message-Id: <20220125115223.33707-1-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 89d7822a8f5b..8e2704b50e47 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -899,13 +899,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) } break; case 0xd: { - u64 guest_perm = xstate_get_guest_group_perm(); + u64 permitted_xcr0 = supported_xcr0 & xstate_get_guest_group_perm(); + u64 permitted_xss = supported_xss; - entry->eax &= supported_xcr0 & guest_perm; - entry->ebx = xstate_required_size(supported_xcr0, false); + entry->eax &= permitted_xcr0; + entry->ebx = xstate_required_size(permitted_xcr0, false); entry->ecx = entry->ebx; - entry->edx &= (supported_xcr0 & guest_perm) >> 32; - if (!supported_xcr0) + entry->edx &= permitted_xcr0 >> 32; + if (!permitted_xcr0) break; entry = do_host_cpuid(array, function, 1); @@ -914,20 +915,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) cpuid_entry_override(entry, CPUID_D_1_EAX); if (entry->eax & (F(XSAVES)|F(XSAVEC))) - entry->ebx = xstate_required_size(supported_xcr0 | supported_xss, + entry->ebx = xstate_required_size(permitted_xcr0 | permitted_xss, true); else { - WARN_ON_ONCE(supported_xss != 0); + WARN_ON_ONCE(permitted_xss != 0); entry->ebx = 0; } - entry->ecx &= supported_xss; - entry->edx &= supported_xss >> 32; + entry->ecx &= permitted_xss; + entry->edx &= permitted_xss >> 32; for (i = 2; i < 64; ++i) { bool s_state; - if (supported_xcr0 & BIT_ULL(i)) + if (permitted_xcr0 & BIT_ULL(i)) s_state = false; - else if (supported_xss & BIT_ULL(i)) + else if (permitted_xss & BIT_ULL(i)) s_state = true; else continue; @@ -941,7 +942,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) * invalid sub-leafs. Only valid sub-leafs should * reach this point, and they should have a non-zero * save state size. Furthermore, check whether the - * processor agrees with supported_xcr0/supported_xss + * processor agrees with permitted_xcr0/permitted_xss * on whether this is an XCR0- or IA32_XSS-managed area. */ if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 0x1) != s_state)) { From 47c28d436f409f5b009dc82bd82d4971088aa391 Mon Sep 17 00:00:00 2001 From: Denis Valeev Date: Sat, 22 Jan 2022 23:13:57 +0300 Subject: [PATCH 183/367] KVM: x86: nSVM: skip eax alignment check for non-SVM instructions The bug occurs on #GP triggered by VMware backdoor when eax value is unaligned. eax alignment check should not be applied to non-SVM instructions because it leads to incorrect omission of the instructions emulation. Apply the alignment check only to SVM instructions to fix. Fixes: d1cba6c92237 ("KVM: x86: nSVM: test eax for 4K alignment for GP errata workaround") Signed-off-by: Denis Valeev Message-Id: Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 6d31d357a83b..ec05f8bfc21b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2091,10 +2091,6 @@ static int gp_interception(struct kvm_vcpu *vcpu) if (error_code) goto reinject; - /* All SVM instructions expect page aligned RAX */ - if (svm->vmcb->save.rax & ~PAGE_MASK) - goto reinject; - /* Decode the instruction for usage later */ if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK) goto reinject; @@ -2112,8 +2108,13 @@ static int gp_interception(struct kvm_vcpu *vcpu) if (!is_guest_mode(vcpu)) return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE); - } else + } else { + /* All SVM instructions expect page aligned RAX */ + if (svm->vmcb->save.rax & ~PAGE_MASK) + goto reinject; + return emulate_svm_instr(vcpu, opcode); + } reinject: kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); From 55467fcd55b89c622e62b4afe60ac0eb2fae91f2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:11 +0000 Subject: [PATCH 184/367] KVM: SVM: Never reject emulation due to SMAP errata for !SEV guests Always signal that emulation is possible for !SEV guests regardless of whether or not the CPU provided a valid instruction byte stream. KVM can read all guest state (memory and registers) for !SEV guests, i.e. can fetch the code stream from memory even if the CPU failed to do so because of the SMAP errata. Fixes: 05d5a4863525 ("KVM: SVM: Workaround errata#1096 (insn_len maybe zero on SMAP violation)") Cc: stable@vger.kernel.org Cc: Tom Lendacky Cc: Brijesh Singh Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ec05f8bfc21b..a46e1c6abf0e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4258,8 +4258,13 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i bool smep, smap, is_user; unsigned long cr4; + /* Emulation is always possible when KVM has access to all guest state. */ + if (!sev_guest(vcpu->kvm)) + return true; + /* - * When the guest is an SEV-ES guest, emulation is not possible. + * Emulation is impossible for SEV-ES guests as KVM doesn't have access + * to guest register state. */ if (sev_es_guest(vcpu->kvm)) return false; @@ -4319,9 +4324,6 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i smap = cr4 & X86_CR4_SMAP; is_user = svm_get_cpl(vcpu) == 3; if (smap && (!smep || is_user)) { - if (!sev_guest(vcpu->kvm)) - return true; - pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n"); kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); } From 31c25585695abdf03d6160aa6d829e855b256329 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:12 +0000 Subject: [PATCH 185/367] Revert "KVM: SVM: avoid infinite loop on NPF from bad address" Revert a completely broken check on an "invalid" RIP in SVM's workaround for the DecodeAssists SMAP errata. kvm_vcpu_gfn_to_memslot() obviously expects a gfn, i.e. operates in the guest physical address space, whereas RIP is a virtual (not even linear) address. The "fix" worked for the problematic KVM selftest because the test identity mapped RIP. Fully revert the hack instead of trying to translate RIP to a GPA, as the non-SEV case is now handled earlier, and KVM cannot access guest page tables to translate RIP. This reverts commit e72436bc3a5206f95bb384e741154166ddb3202e. Fixes: e72436bc3a52 ("KVM: SVM: avoid infinite loop on NPF from bad address") Reported-by: Liam Merwick Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 7 ------- virt/kvm/kvm_main.c | 1 - 2 files changed, 8 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a46e1c6abf0e..37eb3168e0ea 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4312,13 +4312,6 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i if (likely(!insn || insn_len)) return true; - /* - * If RIP is invalid, go ahead with emulation which will cause an - * internal error exit. - */ - if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT)) - return true; - cr4 = kvm_read_cr4(vcpu); smep = cr4 & X86_CR4_SMEP; smap = cr4 & X86_CR4_SMAP; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0bbb3cbf6014..2755ba4177d6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2248,7 +2248,6 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn return NULL; } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { From 0b0be065b7563ac708aaa9f69dd4941c80b3446d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:13 +0000 Subject: [PATCH 186/367] KVM: SVM: Don't intercept #GP for SEV guests Never intercept #GP for SEV guests as reading SEV guest private memory will return cyphertext, i.e. emulating on #GP can't work as intended. Cc: stable@vger.kernel.org Cc: Tom Lendacky Cc: Brijesh Singh Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 37eb3168e0ea..defc91a8c04c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -312,7 +312,11 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) return ret; } - if (svm_gp_erratum_intercept) + /* + * Never intercept #GP for SEV guests, KVM can't + * decrypt guest memory to workaround the erratum. + */ + if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm)) set_exception_intercept(svm, GP_VECTOR); } } @@ -1010,9 +1014,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu) * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. * We intercept those #GP and allow access to them anyway - * as VMware does. + * as VMware does. Don't intercept #GP for SEV guests as KVM can't + * decrypt guest memory to decode the faulting instruction. */ - if (enable_vmware_backdoor) + if (enable_vmware_backdoor && !sev_guest(vcpu->kvm)) set_exception_intercept(svm, GP_VECTOR); svm_set_intercept(svm, INTERCEPT_INTR); From c532f2903b69b775d27016511fbe29a14a098f95 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:14 +0000 Subject: [PATCH 187/367] KVM: SVM: Explicitly require DECODEASSISTS to enable SEV support Add a sanity check on DECODEASSIST being support if SEV is supported, as KVM cannot read guest private memory and thus relies on the CPU to provide the instruction byte stream on #NPF for emulation. The intent of the check is to document the dependency, it should never fail in practice as producing hardware that supports SEV but not DECODEASSISTS would be non-sensical. Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6a22798eaaee..17b53457d866 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2100,8 +2100,13 @@ void __init sev_hardware_setup(void) if (!sev_enabled || !npt_enabled) goto out; - /* Does the CPU support SEV? */ - if (!boot_cpu_has(X86_FEATURE_SEV)) + /* + * SEV must obviously be supported in hardware. Sanity check that the + * CPU supports decode assists, which is mandatory for SEV guests to + * support instruction emulation. + */ + if (!boot_cpu_has(X86_FEATURE_SEV) || + WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS))) goto out; /* Retrieve SEV CPUID information */ From 4d31d9eff244e2631f028d658979ccbbdbcb423b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:15 +0000 Subject: [PATCH 188/367] KVM: x86: Pass emulation type to can_emulate_instruction() Pass the emulation type to kvm_x86_ops.can_emulate_insutrction() so that a future commit can harden KVM's SEV support to WARN on emulation scenarios that should never happen. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/svm/svm.c | 3 ++- arch/x86/kvm/vmx/vmx.c | 7 ++++--- arch/x86/kvm/x86.c | 11 +++++++++-- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9b2fffeeab16..2fef4ef62061 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1482,7 +1482,8 @@ struct kvm_x86_ops { int (*get_msr_feature)(struct kvm_msr_entry *entry); - bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len); + bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index defc91a8c04c..115743e250bb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4258,7 +4258,8 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu) } } -static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) +static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) { bool smep, smap, is_user; unsigned long cr4; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a6446eb935df..b1165bb13a5a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1487,11 +1487,12 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) return 0; } -static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) +static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) { /* * Emulation of instructions in SGX enclaves is impossible as RIP does - * not point tthe failing instruction, and even if it did, the code + * not point at the failing instruction, and even if it did, the code * stream is inaccessible. Inject #UD instead of exiting to userspace * so that guest userspace can't DoS the guest simply by triggering * emulation (enclaves are CPL3 only). @@ -5425,7 +5426,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) { gpa_t gpa; - if (!vmx_can_emulate_instruction(vcpu, NULL, 0)) + if (!vmx_can_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0)) return 1; /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9ceeab1e5bdc..c8f3d3ea8a96 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6810,6 +6810,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, } EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); +static int kvm_can_emulate_insn(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) +{ + return static_call(kvm_x86_can_emulate_instruction)(vcpu, emul_type, + insn, insn_len); +} + int handle_ud(struct kvm_vcpu *vcpu) { static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX }; @@ -6817,7 +6824,7 @@ int handle_ud(struct kvm_vcpu *vcpu) char sig[5]; /* ud2; .ascii "kvm" */ struct x86_exception e; - if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, NULL, 0))) + if (unlikely(!kvm_can_emulate_insn(vcpu, emul_type, NULL, 0))) return 1; if (force_emulation_prefix && @@ -8193,7 +8200,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool writeback = true; bool write_fault_to_spt; - if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, insn, insn_len))) + if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len))) return 1; vcpu->arch.l1tf_flush_l1d = true; From 132627c64d94b1561ba5a444824e46c9f84c3d5b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:16 +0000 Subject: [PATCH 189/367] KVM: SVM: WARN if KVM attempts emulation on #UD or #GP for SEV guests WARN if KVM attempts to emulate in response to #UD or #GP for SEV guests, i.e. if KVM intercepts #UD or #GP, as emulation on any fault except #NPF is impossible since KVM cannot read guest private memory to get the code stream, and the CPU's DecodeAssists feature only provides the instruction bytes on #NPF. Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-7-seanjc@google.com> [Warn on EMULTYPE_TRAP_UD_FORCED according to Liam Merwick's review. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 115743e250bb..85bbfba1fa07 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4268,6 +4268,11 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, if (!sev_guest(vcpu->kvm)) return true; + /* #UD and #GP should never be intercepted for SEV guests. */ + WARN_ON_ONCE(emul_type & (EMULTYPE_TRAP_UD | + EMULTYPE_TRAP_UD_FORCED | + EMULTYPE_VMWARE_GP)); + /* * Emulation is impossible for SEV-ES guests as KVM doesn't have access * to guest register state. From 04c40f344defdbd842d8a64fcfb47ef74b39ef4e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:17 +0000 Subject: [PATCH 190/367] KVM: SVM: Inject #UD on attempted emulation for SEV guest w/o insn buffer Inject #UD if KVM attempts emulation for an SEV guests without an insn buffer and instruction decoding is required. The previous behavior of allowing emulation if there is no insn buffer is undesirable as doing so means KVM is reading guest private memory and thus decoding cyphertext, i.e. is emulating garbage. The check was previously necessary as the emulation type was not provided, i.e. SVM needed to allow emulation to handle completion of emulation after exiting to userspace to handle I/O. Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 89 ++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 34 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 85bbfba1fa07..fb65bfabea25 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4280,49 +4280,70 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, if (sev_es_guest(vcpu->kvm)) return false; + /* + * Emulation is possible if the instruction is already decoded, e.g. + * when completing I/O after returning from userspace. + */ + if (emul_type & EMULTYPE_NO_DECODE) + return true; + + /* + * Emulation is possible for SEV guests if and only if a prefilled + * buffer containing the bytes of the intercepted instruction is + * available. SEV guest memory is encrypted with a guest specific key + * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and + * decode garbage. + * + * Inject #UD if KVM reached this point without an instruction buffer. + * In practice, this path should never be hit by a well-behaved guest, + * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path + * is still theoretically reachable, e.g. via unaccelerated fault-like + * AVIC access, and needs to be handled by KVM to avoid putting the + * guest into an infinite loop. Injecting #UD is somewhat arbitrary, + * but its the least awful option given lack of insight into the guest. + */ + if (unlikely(!insn)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return false; + } + + /* + * Emulate for SEV guests if the insn buffer is not empty. The buffer + * will be empty if the DecodeAssist microcode cannot fetch bytes for + * the faulting instruction because the code fetch itself faulted, e.g. + * the guest attempted to fetch from emulated MMIO or a guest page + * table used to translate CS:RIP resides in emulated MMIO. + */ + if (likely(insn_len)) + return true; + /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. * * Errata: - * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is - * possible that CPU microcode implementing DecodeAssist will fail - * to read bytes of instruction which caused #NPF. In this case, - * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly - * return 0 instead of the correct guest instruction bytes. - * - * This happens because CPU microcode reading instruction bytes - * uses a special opcode which attempts to read data using CPL=0 - * privileges. The microcode reads CS:RIP and if it hits a SMAP - * fault, it gives up and returns no instruction bytes. + * When CPU raises #NPF on guest data access and vCPU CR4.SMAP=1, it is + * possible that CPU microcode implementing DecodeAssist will fail to + * read guest memory at CS:RIP and vmcb.GuestIntrBytes will incorrectly + * be '0'. This happens because microcode reads CS:RIP using a _data_ + * loap uop with CPL=0 privileges. If the load hits a SMAP #PF, ucode + * gives up and does not fill the instruction bytes buffer. * * Detection: - * We reach here in case CPU supports DecodeAssist, raised #NPF and - * returned 0 in GuestIntrBytes field of the VMCB. - * First, errata can only be triggered in case vCPU CR4.SMAP=1. - * Second, if vCPU CR4.SMEP=1, errata could only be triggered - * in case vCPU CPL==3 (Because otherwise guest would have triggered - * a SMEP fault instead of #NPF). - * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL. - * As most guests enable SMAP if they have also enabled SMEP, use above - * logic in order to attempt minimize false-positive of detecting errata - * while still preserving all cases semantic correctness. + * KVM reaches this point if the VM is an SEV guest, the CPU supports + * DecodeAssist, a #NPF was raised, KVM's page fault handler triggered + * emulation (e.g. for MMIO), and the CPU returned 0 in GuestIntrBytes + * field of the VMCB. * - * Workaround: - * To determine what instruction the guest was executing, the hypervisor - * will have to decode the instruction at the instruction pointer. + * This does _not_ mean that the erratum has been encountered, as the + * DecodeAssist will also fail if the load for CS:RIP hits a legitimate + * #PF, e.g. if the guest attempt to execute from emulated MMIO and + * encountered a reserved/not-present #PF. * - * In non SEV guest, hypervisor will be able to read the guest - * memory to decode the instruction pointer when insn_len is zero - * so we return true to indicate that decoding is possible. - * - * But in the SEV guest, the guest memory is encrypted with the - * guest specific key and hypervisor will not be able to decode the - * instruction pointer so we will not able to workaround it. Lets - * print the error and request to kill the guest. + * To reduce the likelihood of false positives, take action if and only + * if CR4.SMAP=1 (obviously required to hit the erratum) and CR4.SMEP=0 + * or CPL=3. If SMEP=1 and CPL!=3, the erratum cannot have been hit as + * the guest would have encountered a SMEP violation #PF, not a #NPF. */ - if (likely(!insn || insn_len)) - return true; - cr4 = kvm_read_cr4(vcpu); smep = cr4 & X86_CR4_SMEP; smap = cr4 & X86_CR4_SMAP; From 3280cc22aea74d78ebbea277ff8bc8d593582de3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:18 +0000 Subject: [PATCH 191/367] KVM: SVM: Don't apply SEV+SMAP workaround on code fetch or PT access Resume the guest instead of synthesizing a triple fault shutdown if the instruction bytes buffer is empty due to the #NPF being on the code fetch itself or on a page table access. The SMAP errata applies if and only if the code fetch was successful and ucode's subsequent data read from the code page encountered a SMAP violation. In practice, the guest is likely hosed either way, but crashing the guest on a code fetch to emulated MMIO is technically wrong according to the behavior described in the APM. Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 43 +++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index fb65bfabea25..be25831830b0 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4263,6 +4263,7 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, { bool smep, smap, is_user; unsigned long cr4; + u64 error_code; /* Emulation is always possible when KVM has access to all guest state. */ if (!sev_guest(vcpu->kvm)) @@ -4328,22 +4329,31 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, * loap uop with CPL=0 privileges. If the load hits a SMAP #PF, ucode * gives up and does not fill the instruction bytes buffer. * - * Detection: - * KVM reaches this point if the VM is an SEV guest, the CPU supports - * DecodeAssist, a #NPF was raised, KVM's page fault handler triggered - * emulation (e.g. for MMIO), and the CPU returned 0 in GuestIntrBytes - * field of the VMCB. + * As above, KVM reaches this point iff the VM is an SEV guest, the CPU + * supports DecodeAssist, a #NPF was raised, KVM's page fault handler + * triggered emulation (e.g. for MMIO), and the CPU returned 0 in the + * GuestIntrBytes field of the VMCB. * * This does _not_ mean that the erratum has been encountered, as the * DecodeAssist will also fail if the load for CS:RIP hits a legitimate * #PF, e.g. if the guest attempt to execute from emulated MMIO and * encountered a reserved/not-present #PF. * - * To reduce the likelihood of false positives, take action if and only - * if CR4.SMAP=1 (obviously required to hit the erratum) and CR4.SMEP=0 - * or CPL=3. If SMEP=1 and CPL!=3, the erratum cannot have been hit as - * the guest would have encountered a SMEP violation #PF, not a #NPF. + * To hit the erratum, the following conditions must be true: + * 1. CR4.SMAP=1 (obviously). + * 2. CR4.SMEP=0 || CPL=3. If SMEP=1 and CPL<3, the erratum cannot + * have been hit as the guest would have encountered a SMEP + * violation #PF, not a #NPF. + * 3. The #NPF is not due to a code fetch, in which case failure to + * retrieve the instruction bytes is legitimate (see abvoe). + * + * In addition, don't apply the erratum workaround if the #NPF occurred + * while translating guest page tables (see below). */ + error_code = to_svm(vcpu)->vmcb->control.exit_info_1; + if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK)) + goto resume_guest; + cr4 = kvm_read_cr4(vcpu); smep = cr4 & X86_CR4_SMEP; smap = cr4 & X86_CR4_SMAP; @@ -4353,6 +4363,21 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); } +resume_guest: + /* + * If the erratum was not hit, simply resume the guest and let it fault + * again. While awful, e.g. the vCPU may get stuck in an infinite loop + * if the fault is at CPL=0, it's the lesser of all evils. Exiting to + * userspace will kill the guest, and letting the emulator read garbage + * will yield random behavior and potentially corrupt the guest. + * + * Simply resuming the guest is technically not a violation of the SEV + * architecture. AMD's APM states that all code fetches and page table + * accesses for SEV guest are encrypted, regardless of the C-Bit. The + * APM also states that encrypted accesses to MMIO are "ignored", but + * doesn't explicitly define "ignored", i.e. doing nothing and letting + * the guest spin is technically "ignoring" the access. + */ return false; } From cdf85e0c5dc766fc7fc779466280e454a6d04f87 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:19 +0000 Subject: [PATCH 192/367] KVM: SVM: Don't kill SEV guest if SMAP erratum triggers in usermode Inject a #GP instead of synthesizing triple fault to try to avoid killing the guest if emulation of an SEV guest fails due to encountering the SMAP erratum. The injected #GP may still be fatal to the guest, e.g. if the userspace process is providing critical functionality, but KVM should make every attempt to keep the guest alive. Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-10-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index be25831830b0..f14f4ab97a80 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4360,7 +4360,21 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, is_user = svm_get_cpl(vcpu) == 3; if (smap && (!smep || is_user)) { pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n"); - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + + /* + * If the fault occurred in userspace, arbitrarily inject #GP + * to avoid killing the guest and to hopefully avoid confusing + * the guest kernel too much, e.g. injecting #PF would not be + * coherent with respect to the guest's page tables. Request + * triple fault if the fault occurred in the kernel as there's + * no fault that KVM can inject without confusing the guest. + * In practice, the triple fault is moot as no sane SEV kernel + * will execute from user memory while also running with SMAP=1. + */ + if (is_user) + kvm_inject_gp(vcpu, 0); + else + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); } resume_guest: From 38dfa8308cfc43f671a74c753302fec26808edc0 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 20 Dec 2021 16:21:36 +0100 Subject: [PATCH 193/367] KVM: SVM: hyper-v: Enable Enlightened MSR-Bitmap support for real Commit c4327f15dfc7 ("KVM: SVM: hyper-v: Enlightened MSR-Bitmap support") introduced enlightened MSR-Bitmap support for KVM-on-Hyper-V but it didn't actually enable the support. Similar to enlightened NPT TLB flush and direct TLB flush features, the guest (KVM) has to tell L0 (Hyper-V) that it's using the feature by setting the appropriate feature fit in VMCB control area (sw reserved fields). Fixes: c4327f15dfc7 ("KVM: SVM: hyper-v: Enlightened MSR-Bitmap support") Signed-off-by: Vitaly Kuznetsov Message-Id: <20211220152139.418372-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm_onhyperv.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index c53b8bf8d013..1ad43e997053 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -46,6 +46,9 @@ static inline void svm_hv_init_vmcb(struct vmcb *vmcb) if (npt_enabled && ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) hve->hv_enlightenments_control.enlightened_npt_tlb = 1; + + if (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP) + hve->hv_enlightenments_control.msr_bitmap = 1; } static inline void svm_hv_hardware_setup(void) From aa3b39f38c7a5dfdd10b3f61a0d055b85aa85451 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 20 Dec 2021 16:21:35 +0100 Subject: [PATCH 194/367] KVM: SVM: drop unnecessary code in svm_hv_vmcb_dirty_nested_enlightenments() Commit 3fa5e8fd0a0e4 ("KVM: SVM: delay svm_vcpu_init_msrpm after svm->vmcb is initialized") re-arranged svm_vcpu_init_msrpm() call in svm_create_vcpu(), thus making the comment about vmcb being NULL obsolete. Drop it. While on it, drop superfluous vmcb_is_clean() check: vmcb_mark_dirty() is a bit flip, an extra check is unlikely to bring any performance gain. Drop now-unneeded vmcb_is_clean() helper as well. Fixes: 3fa5e8fd0a0e4 ("KVM: SVM: delay svm_vcpu_init_msrpm after svm->vmcb is initialized") Signed-off-by: Vitaly Kuznetsov Message-Id: <20211220152139.418372-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.h | 5 ----- arch/x86/kvm/svm/svm_onhyperv.h | 9 +-------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 47ef8f4a9358..1160faaf6ef3 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -304,11 +304,6 @@ static inline void vmcb_mark_all_clean(struct vmcb *vmcb) & ~VMCB_ALWAYS_DIRTY_MASK; } -static inline bool vmcb_is_clean(struct vmcb *vmcb, int bit) -{ - return (vmcb->control.clean & (1 << bit)); -} - static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit) { vmcb->control.clean &= ~(1 << bit); diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index 1ad43e997053..489ca56212c6 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -86,14 +86,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments( struct hv_enlightenments *hve = (struct hv_enlightenments *)vmcb->control.reserved_sw; - /* - * vmcb can be NULL if called during early vcpu init. - * And its okay not to mark vmcb dirty during vcpu init - * as we mark it dirty unconditionally towards end of vcpu - * init phase. - */ - if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) && - hve->hv_enlightenments_control.msr_bitmap) + if (hve->hv_enlightenments_control.msr_bitmap) vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS); } From f7e570780efc5cec9b2ed1e0472a7da14e864fdb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 25 Jan 2022 22:03:58 +0000 Subject: [PATCH 195/367] KVM: x86: Forcibly leave nested virt when SMM state is toggled Forcibly leave nested virtualization operation if userspace toggles SMM state via KVM_SET_VCPU_EVENTS or KVM_SYNC_X86_EVENTS. If userspace forces the vCPU out of SMM while it's post-VMXON and then injects an SMI, vmx_enter_smm() will overwrite vmx->nested.smm.vmxon and end up with both vmxon=false and smm.vmxon=false, but all other nVMX state allocated. Don't attempt to gracefully handle the transition as (a) most transitions are nonsencial, e.g. forcing SMM while L2 is running, (b) there isn't sufficient information to handle all transitions, e.g. SVM wants access to the SMRAM save state, and (c) KVM_SET_VCPU_EVENTS must precede KVM_SET_NESTED_STATE during state restore as the latter disallows putting the vCPU into L2 if SMM is active, and disallows tagging the vCPU as being post-VMXON in SMM if SMM is not active. Abuse of KVM_SET_VCPU_EVENTS manifests as a WARN and memory leak in nVMX due to failure to free vmcs01's shadow VMCS, but the bug goes far beyond just a memory leak, e.g. toggling SMM on while L2 is active puts the vCPU in an architecturally impossible state. WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline] WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656 Modules linked in: CPU: 1 PID: 3606 Comm: syz-executor725 Not tainted 5.17.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline] RIP: 0010:free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656 Code: <0f> 0b eb b3 e8 8f 4d 9f 00 e9 f7 fe ff ff 48 89 df e8 92 4d 9f 00 Call Trace: kvm_arch_vcpu_destroy+0x72/0x2f0 arch/x86/kvm/x86.c:11123 kvm_vcpu_destroy arch/x86/kvm/../../../virt/kvm/kvm_main.c:441 [inline] kvm_destroy_vcpus+0x11f/0x290 arch/x86/kvm/../../../virt/kvm/kvm_main.c:460 kvm_free_vcpus arch/x86/kvm/x86.c:11564 [inline] kvm_arch_destroy_vm+0x2e8/0x470 arch/x86/kvm/x86.c:11676 kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1217 [inline] kvm_put_kvm+0x4fa/0xb00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1250 kvm_vm_release+0x3f/0x50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1273 __fput+0x286/0x9f0 fs/file_table.c:311 task_work_run+0xdd/0x1a0 kernel/task_work.c:164 exit_task_work include/linux/task_work.h:32 [inline] do_exit+0xb29/0x2a30 kernel/exit.c:806 do_group_exit+0xd2/0x2f0 kernel/exit.c:935 get_signal+0x4b0/0x28c0 kernel/signal.c:2862 arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:868 handle_signal_work kernel/entry/common.c:148 [inline] exit_to_user_mode_loop kernel/entry/common.c:172 [inline] exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:207 __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline] syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:300 do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x44/0xae Cc: stable@vger.kernel.org Reported-by: syzbot+8112db3ab20e70d50c31@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Message-Id: <20220125220358.2091737-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/nested.c | 9 +++++---- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/svm/svm.h | 2 +- arch/x86/kvm/vmx/nested.c | 1 + arch/x86/kvm/x86.c | 4 +++- 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2fef4ef62061..efee29d5ad4f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1496,6 +1496,7 @@ struct kvm_x86_ops { }; struct kvm_x86_nested_ops { + void (*leave_nested)(struct kvm_vcpu *vcpu); int (*check_events)(struct kvm_vcpu *vcpu); bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); void (*triple_fault)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index cf206855ebf0..1218b5a342fc 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -983,9 +983,9 @@ void svm_free_nested(struct vcpu_svm *svm) /* * Forcibly leave nested mode in order to be able to reset the VCPU later on. */ -void svm_leave_nested(struct vcpu_svm *svm) +void svm_leave_nested(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = &svm->vcpu; + struct vcpu_svm *svm = to_svm(vcpu); if (is_guest_mode(vcpu)) { svm->nested.nested_run_pending = 0; @@ -1411,7 +1411,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) { - svm_leave_nested(svm); + svm_leave_nested(vcpu); svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); return 0; } @@ -1478,7 +1478,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, */ if (is_guest_mode(vcpu)) - svm_leave_nested(svm); + svm_leave_nested(vcpu); else svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; @@ -1532,6 +1532,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) } struct kvm_x86_nested_ops svm_nested_ops = { + .leave_nested = svm_leave_nested, .check_events = svm_check_nested_events, .triple_fault = nested_svm_triple_fault, .get_nested_state_pages = svm_get_nested_state_pages, diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f14f4ab97a80..9cef8e4598df 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -290,7 +290,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) { if (!(efer & EFER_SVME)) { - svm_leave_nested(svm); + svm_leave_nested(vcpu); svm_set_gif(svm, true); /* #GP intercept is still needed for vmware backdoor */ if (!enable_vmware_backdoor) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 1160faaf6ef3..73525353e424 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -520,7 +520,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm) int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun); -void svm_leave_nested(struct vcpu_svm *svm); +void svm_leave_nested(struct kvm_vcpu *vcpu); void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f235f77cbc03..7eebfdf7204f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6771,6 +6771,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) } struct kvm_x86_nested_ops vmx_nested_ops = { + .leave_nested = vmx_leave_nested, .check_events = vmx_check_nested_events, .hv_timer_pending = nested_vmx_preemption_timer_pending, .triple_fault = nested_vmx_triple_fault, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c8f3d3ea8a96..a50baf4c5bff 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4860,8 +4860,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.apic->sipi_vector = events->sipi_vector; if (events->flags & KVM_VCPUEVENT_VALID_SMM) { - if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) + if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { + kvm_x86_ops.nested_ops->leave_nested(vcpu); kvm_smm_changed(vcpu, events->smi.smm); + } vcpu->arch.smi_pending = events->smi.pending; From 033a3ea59a19df63edb4db6bfdbb357cd028258a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 26 Jan 2022 14:18:04 +0100 Subject: [PATCH 196/367] KVM: x86: Check .flags in kvm_cpuid_check_equal() too kvm_cpuid_check_equal() checks for the (full) equality of the supplied CPUID data so .flags need to be checked too. Reported-by: Sean Christopherson Fixes: c6617c61e8fe ("KVM: x86: Partially allow KVM_SET_CPUID{,2} after KVM_RUN") Signed-off-by: Vitaly Kuznetsov Message-Id: <20220126131804.2839410-1-vkuznets@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8e2704b50e47..d0c9b10d448d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -133,6 +133,7 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 orig = &vcpu->arch.cpuid_entries[i]; if (e2[i].function != orig->function || e2[i].index != orig->index || + e2[i].flags != orig->flags || e2[i].eax != orig->eax || e2[i].ebx != orig->ebx || e2[i].ecx != orig->ecx || e2[i].edx != orig->edx) return -EINVAL; From 4cf3d3ebe8794c449af3e0e8c1d790c97e461d20 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 25 Jan 2022 22:17:25 +0000 Subject: [PATCH 197/367] KVM: selftests: Don't skip L2's VMCALL in SMM test for SVM guest Don't skip the vmcall() in l2_guest_code() prior to re-entering L2, doing so will result in L2 running to completion, popping '0' off the stack for RET, jumping to address '0', and ultimately dying with a triple fault shutdown. It's not at all obvious why the test re-enters L2 and re-executes VMCALL, but presumably it serves a purpose. The VMX path doesn't skip vmcall(), and the test can't possibly have passed on SVM, so just do what VMX does. Fixes: d951b2210c1a ("KVM: selftests: smm_test: Test SMM enter from L2") Cc: Maxim Levitsky Signed-off-by: Sean Christopherson Message-Id: <20220125221725.2101126-1-seanjc@google.com> Reviewed-by: Vitaly Kuznetsov Tested-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/smm_test.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index 2da8eb8e2d96..a626d40fdb48 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -105,7 +105,6 @@ static void guest_code(void *arg) if (cpu_has_svm()) { run_guest(svm->vmcb, svm->vmcb_gpa); - svm->vmcb->save.rip += 3; run_guest(svm->vmcb, svm->vmcb_gpa); } else { vmlaunch(); From d6e656cd266cdcc95abd372c7faef05bee271d1a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 25 Jan 2022 22:05:27 +0000 Subject: [PATCH 198/367] KVM: nVMX: WARN on any attempt to allocate shadow VMCS for vmcs02 WARN if KVM attempts to allocate a shadow VMCS for vmcs02. KVM emulates VMCS shadowing but doesn't virtualize it, i.e. KVM should never allocate a "real" shadow VMCS for L2. The previous code WARNed but continued anyway with the allocation, presumably in an attempt to avoid NULL pointer dereference. However, alloc_vmcs (and hence alloc_shadow_vmcs) can fail, and indeed the sole caller does: if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu)) goto out_shadow_vmcs; which makes it not a useful attempt. Signed-off-by: Sean Christopherson Message-Id: <20220125220527.2093146-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 7eebfdf7204f..2777cea05cc0 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4851,18 +4851,20 @@ static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu) struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs; /* - * We should allocate a shadow vmcs for vmcs01 only when L1 - * executes VMXON and free it when L1 executes VMXOFF. - * As it is invalid to execute VMXON twice, we shouldn't reach - * here when vmcs01 already have an allocated shadow vmcs. + * KVM allocates a shadow VMCS only when L1 executes VMXON and frees it + * when L1 executes VMXOFF or the vCPU is forced out of nested + * operation. VMXON faults if the CPU is already post-VMXON, so it + * should be impossible to already have an allocated shadow VMCS. KVM + * doesn't support virtualization of VMCS shadowing, so vmcs01 should + * always be the loaded VMCS. */ - WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs); + if (WARN_ON(loaded_vmcs != &vmx->vmcs01 || loaded_vmcs->shadow_vmcs)) + return loaded_vmcs->shadow_vmcs; + + loaded_vmcs->shadow_vmcs = alloc_vmcs(true); + if (loaded_vmcs->shadow_vmcs) + vmcs_clear(loaded_vmcs->shadow_vmcs); - if (!loaded_vmcs->shadow_vmcs) { - loaded_vmcs->shadow_vmcs = alloc_vmcs(true); - if (loaded_vmcs->shadow_vmcs) - vmcs_clear(loaded_vmcs->shadow_vmcs); - } return loaded_vmcs->shadow_vmcs; } From 811f95ff95270e6048197821434d9301e3d7f07c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 25 Jan 2022 21:04:45 +0000 Subject: [PATCH 199/367] KVM: x86: Free kvm_cpuid_entry2 array on post-KVM_RUN KVM_SET_CPUID{,2} Free the "struct kvm_cpuid_entry2" array on successful post-KVM_RUN KVM_SET_CPUID{,2} to fix a memory leak, the callers of kvm_set_cpuid() free the array only on failure. BUG: memory leak unreferenced object 0xffff88810963a800 (size 2048): comm "syz-executor025", pid 3610, jiffies 4294944928 (age 8.080s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 0d 00 00 00 ................ 47 65 6e 75 6e 74 65 6c 69 6e 65 49 00 00 00 00 GenuntelineI.... backtrace: [] kmalloc_node include/linux/slab.h:604 [inline] [] kvmalloc_node+0x3e/0x100 mm/util.c:580 [] kvmalloc include/linux/slab.h:732 [inline] [] vmemdup_user+0x22/0x100 mm/util.c:199 [] kvm_vcpu_ioctl_set_cpuid2+0x8f/0xf0 arch/x86/kvm/cpuid.c:423 [] kvm_arch_vcpu_ioctl+0xb99/0x1e60 arch/x86/kvm/x86.c:5251 [] kvm_vcpu_ioctl+0x4ad/0x950 arch/x86/kvm/../../../virt/kvm/kvm_main.c:4066 [] vfs_ioctl fs/ioctl.c:51 [inline] [] __do_sys_ioctl fs/ioctl.c:874 [inline] [] __se_sys_ioctl fs/ioctl.c:860 [inline] [] __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:860 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: c6617c61e8fe ("KVM: x86: Partially allow KVM_SET_CPUID{,2} after KVM_RUN") Cc: stable@vger.kernel.org Reported-by: syzbot+be576ad7655690586eec@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Message-Id: <20220125210445.2053429-1-seanjc@google.com> Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index d0c9b10d448d..28be02adc669 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -359,8 +359,14 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, * KVM_SET_CPUID{,2} again. To support this legacy behavior, check * whether the supplied CPUID data is equal to what's already set. */ - if (vcpu->arch.last_vmentry_cpu != -1) - return kvm_cpuid_check_equal(vcpu, e2, nent); + if (vcpu->arch.last_vmentry_cpu != -1) { + r = kvm_cpuid_check_equal(vcpu, e2, nent); + if (r) + return r; + + kvfree(e2); + return 0; + } r = kvm_check_cpuid(vcpu, e2, nent); if (r) From 663d34c8df98740f1e90241e78e456d00b3c6cad Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 20 Jan 2022 16:23:19 +0100 Subject: [PATCH 200/367] s390/hypfs: include z/VM guests with access control group set Currently if z/VM guest is allowed to retrieve hypervisor performance data globally for all guests (privilege class B) the query is formed in a way to include all guests but the group name is left empty. This leads to that z/VM guests which have access control group set not being included in the results (even local vm). Change the query group identifier from empty to "any" to retrieve information about all guests from any groups (or without a group set). Cc: stable@vger.kernel.org Fixes: 31cb4bd31a48 ("[S390] Hypervisor filesystem (s390_hypfs) for z/VM") Reviewed-by: Gerald Schaefer Signed-off-by: Vasily Gorbik --- arch/s390/hypfs/hypfs_vm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index 33f973ff9744..e8f15dbb89d0 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -20,6 +20,7 @@ static char local_guest[] = " "; static char all_guests[] = "* "; +static char *all_groups = all_guests; static char *guest_query; struct diag2fc_data { @@ -62,10 +63,11 @@ static int diag2fc(int size, char* query, void *addr) memcpy(parm_list.userid, query, NAME_LEN); ASCEBC(parm_list.userid, NAME_LEN); - parm_list.addr = (unsigned long) addr ; + memcpy(parm_list.aci_grp, all_groups, NAME_LEN); + ASCEBC(parm_list.aci_grp, NAME_LEN); + parm_list.addr = (unsigned long)addr; parm_list.size = size; parm_list.fmt = 0x02; - memset(parm_list.aci_grp, 0x40, NAME_LEN); rc = -1; diag_stat_inc(DIAG_STAT_X2FC); From be4f3b3f82271c3193ce200a996dc70682c8e622 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 26 Jan 2022 17:22:24 +0000 Subject: [PATCH 201/367] KVM: x86: Keep MSR_IA32_XSS unchanged for INIT It has been corrected from SDM version 075 that MSR_IA32_XSS is reset to zero on Power up and Reset but keeps unchanged on INIT. Fixes: a554d207dc46 ("KVM: X86: Processor States following Reset or INIT") Cc: stable@vger.kernel.org Signed-off-by: Xiaoyao Li Signed-off-by: Sean Christopherson Message-Id: <20220126172226.2298529-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a50baf4c5bff..b6d60c296eda 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11266,6 +11266,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.msr_misc_features_enables = 0; vcpu->arch.xcr0 = XFEATURE_MASK_FP; + vcpu->arch.ia32_xss = 0; } /* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */ @@ -11282,8 +11283,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0); kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600); - vcpu->arch.ia32_xss = 0; - static_call(kvm_x86_vcpu_reset)(vcpu, init_event); kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); From 4c282e51e4450b94680d6ca3b10f830483b1f243 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Wed, 26 Jan 2022 17:22:25 +0000 Subject: [PATCH 202/367] KVM: x86: Update vCPU's runtime CPUID on write to MSR_IA32_XSS Do a runtime CPUID update for a vCPU if MSR_IA32_XSS is written, as the size in bytes of the XSAVE area is affected by the states enabled in XSS. Fixes: 203000993de5 ("kvm: vmx: add MSR logic for XSAVES") Cc: stable@vger.kernel.org Signed-off-by: Like Xu [sean: split out as a separate patch, adjust Fixes tag] Signed-off-by: Sean Christopherson Message-Id: <20220126172226.2298529-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b6d60c296eda..9c984eeed30c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3535,6 +3535,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~supported_xss) return 1; vcpu->arch.ia32_xss = data; + kvm_update_cpuid_runtime(vcpu); break; case MSR_SMI_COUNT: if (!msr_info->host_initiated) From 05a9e065059e566f218f8778c4d17ee75db56c55 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Wed, 26 Jan 2022 17:22:26 +0000 Subject: [PATCH 203/367] KVM: x86: Sync the states size with the XCR0/IA32_XSS at, any time XCR0 is reset to 1 by RESET but not INIT and IA32_XSS is zeroed by both RESET and INIT. The kvm_set_msr_common()'s handling of MSR_IA32_XSS also needs to update kvm_update_cpuid_runtime(). In the above cases, the size in bytes of the XSAVE area containing all states enabled by XCR0 or (XCRO | IA32_XSS) needs to be updated. For simplicity and consistency, existing helpers are used to write values and call kvm_update_cpuid_runtime(), and it's not exactly a fast path. Fixes: a554d207dc46 ("KVM: X86: Processor States following Reset or INIT") Cc: stable@vger.kernel.org Signed-off-by: Like Xu Signed-off-by: Sean Christopherson Message-Id: <20220126172226.2298529-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9c984eeed30c..5481d2259f02 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11266,8 +11266,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.msr_misc_features_enables = 0; - vcpu->arch.xcr0 = XFEATURE_MASK_FP; - vcpu->arch.ia32_xss = 0; + __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP); + __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true); } /* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */ From dd4516aee365fc9c944c9d6036b6b87363398680 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Jan 2022 07:44:34 -0500 Subject: [PATCH 204/367] selftests: kvm: move vm_xsave_req_perm call to amx_test There is no need for tests other than amx_test to enable dynamic xsave states. Remove the call to vm_xsave_req_perm from generic code, and move it inside the test. While at it, allow customizing the bit that is requested, so that future tests can use it differently. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util_base.h | 1 - .../testing/selftests/kvm/include/x86_64/processor.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 7 ------- tools/testing/selftests/kvm/lib/x86_64/processor.c | 12 ++++++------ tools/testing/selftests/kvm/x86_64/amx_test.c | 2 ++ 5 files changed, 9 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 66775de26952..4ed6aa049a91 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -345,7 +345,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, * guest_code - The vCPU's entry point */ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); -void vm_xsave_req_perm(void); bool vm_is_unrestricted_guest(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 423d8a61bd2e..8a470da7b71a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -458,6 +458,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); +void vm_xsave_req_perm(int bit); enum x86_page_size { X86_PAGE_SIZE_4K = 0, diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 8c53f96ab7fe..d8cf851ab119 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -393,13 +393,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, struct kvm_vm *vm; int i; -#ifdef __x86_64__ - /* - * Permission needs to be requested before KVM_SET_CPUID2. - */ - vm_xsave_req_perm(); -#endif - /* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */ if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES) slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 5f9d7e91dc69..c1d1c195a838 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -665,16 +665,16 @@ static bool is_xfd_supported(void) return !!(eax & CPUID_XFD_BIT); } -void vm_xsave_req_perm(void) +void vm_xsave_req_perm(int bit) { - unsigned long bitmask; + u64 bitmask; long rc; if (!is_xfd_supported()) - return; + exit(KSFT_SKIP); + + rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); - rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, - XSTATE_XTILE_DATA_BIT); /* * The older kernel version(<5.15) can't support * ARCH_REQ_XCOMP_GUEST_PERM and directly return. @@ -684,7 +684,7 @@ void vm_xsave_req_perm(void) rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); - TEST_ASSERT(bitmask & XFEATURE_XTILE_MASK, + TEST_ASSERT(bitmask & (1ULL << bit), "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", bitmask); } diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 523c1e99ed64..52a3ef6629e8 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -329,6 +329,8 @@ int main(int argc, char *argv[]) u32 amx_offset; int stage, ret; + vm_xsave_req_perm(XSTATE_XTILE_DATA_BIT); + /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); From fc55e63e148f1db2180867da875460a00aac8bd1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 11 Jan 2022 20:32:43 +0300 Subject: [PATCH 205/367] counter: fix an IS_ERR() vs NULL bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are 8 callers for devm_counter_alloc() and they all check for NULL instead of error pointers. I think NULL is the better thing to return for allocation functions so update counter_alloc() and devm_counter_alloc() to return NULL instead of error pointers. Fixes: c18e2760308e ("counter: Provide alternative counter registration functions") Acked-by: Uwe Kleine-König Acked-by: William Breathitt Gray Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20220111173243.GA2192@kili Signed-off-by: Greg Kroah-Hartman --- drivers/counter/counter-core.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/counter/counter-core.c b/drivers/counter/counter-core.c index 7e0957eea094..869894b74741 100644 --- a/drivers/counter/counter-core.c +++ b/drivers/counter/counter-core.c @@ -90,10 +90,8 @@ struct counter_device *counter_alloc(size_t sizeof_priv) int err; ch = kzalloc(sizeof(*ch) + sizeof_priv, GFP_KERNEL); - if (!ch) { - err = -ENOMEM; - goto err_alloc_ch; - } + if (!ch) + return NULL; counter = &ch->counter; dev = &counter->dev; @@ -123,9 +121,8 @@ err_chrdev_add: err_ida_alloc: kfree(ch); -err_alloc_ch: - return ERR_PTR(err); + return NULL; } EXPORT_SYMBOL_GPL(counter_alloc); @@ -208,12 +205,12 @@ struct counter_device *devm_counter_alloc(struct device *dev, size_t sizeof_priv int err; counter = counter_alloc(sizeof_priv); - if (IS_ERR(counter)) - return counter; + if (!counter) + return NULL; err = devm_add_action_or_reset(dev, devm_counter_put, counter); if (err < 0) - return ERR_PTR(err); + return NULL; return counter; } From 3758a6c74e08bdc15ccccd6872a6ad37d165239a Mon Sep 17 00:00:00 2001 From: Evgenii Stepanov Date: Tue, 25 Jan 2022 10:22:17 -0800 Subject: [PATCH 206/367] arm64: extable: fix load_unaligned_zeropad() reg indices In ex_handler_load_unaligned_zeropad() we erroneously extract the data and addr register indices from ex->type rather than ex->data. As ex->type will contain EX_TYPE_LOAD_UNALIGNED_ZEROPAD (i.e. 4): * We'll always treat X0 as the address register, since EX_DATA_REG_ADDR is extracted from bits [9:5]. Thus, we may attempt to dereference an arbitrary address as X0 may hold an arbitrary value. * We'll always treat X4 as the data register, since EX_DATA_REG_DATA is extracted from bits [4:0]. Thus we will corrupt X4 and cause arbitrary behaviour within load_unaligned_zeropad() and its caller. Fix this by extracting both values from ex->data as originally intended. On an MTE-enabled QEMU image we are hitting the following crash: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Call trace: fixup_exception+0xc4/0x108 __do_kernel_fault+0x3c/0x268 do_tag_check_fault+0x3c/0x104 do_mem_abort+0x44/0xf4 el1_abort+0x40/0x64 el1h_64_sync_handler+0x60/0xa0 el1h_64_sync+0x7c/0x80 link_path_walk+0x150/0x344 path_openat+0xa0/0x7dc do_filp_open+0xb8/0x168 do_sys_openat2+0x88/0x17c __arm64_sys_openat+0x74/0xa0 invoke_syscall+0x48/0x148 el0_svc_common+0xb8/0xf8 do_el0_svc+0x28/0x88 el0_svc+0x24/0x84 el0t_64_sync_handler+0x88/0xec el0t_64_sync+0x1b4/0x1b8 Code: f8695a69 71007d1f 540000e0 927df12a (f940014a) Fixes: 753b32368705 ("arm64: extable: add load_unaligned_zeropad() handler") Cc: # 5.16.x Reviewed-by: Mark Rutland Signed-off-by: Evgenii Stepanov Link: https://lore.kernel.org/r/20220125182217.2605202-1-eugenis@google.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/extable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index c0181e60cc98..489455309695 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -40,8 +40,8 @@ static bool ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, struct pt_regs *regs) { - int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->type); - int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->type); + int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data); + int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data); unsigned long data, addr, offset; addr = pt_regs_read_reg(regs, reg_addr); From 89d43d0551a848e70e63d9ba11534aaeabc82443 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 12 Jan 2022 12:29:04 +0800 Subject: [PATCH 207/367] ceph: put the requests/sessions when it fails to alloc memory When failing to allocate the sessions memory we should make sure the req1 and req2 and the sessions get put. And also in case the max_sessions decreased so when kreallocate the new memory some sessions maybe missed being put. And if the max_sessions is 0 krealloc will return ZERO_SIZE_PTR, which will lead to a distinct access fault. URL: https://tracker.ceph.com/issues/53819 Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs") Signed-off-by: Xiubo Li Reviewed-by: Venky Shankar Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 55 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7d305b974824..b472cd066d1c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2218,6 +2218,7 @@ static int unsafe_request_wait(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; + unsigned int max_sessions; int ret, err = 0; spin_lock(&ci->i_unsafe_lock); @@ -2235,37 +2236,45 @@ static int unsafe_request_wait(struct inode *inode) } spin_unlock(&ci->i_unsafe_lock); + /* + * The mdsc->max_sessions is unlikely to be changed + * mostly, here we will retry it by reallocating the + * sessions array memory to get rid of the mdsc->mutex + * lock. + */ +retry: + max_sessions = mdsc->max_sessions; + /* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */ - if (req1 || req2) { + if ((req1 || req2) && likely(max_sessions)) { struct ceph_mds_session **sessions = NULL; struct ceph_mds_session *s; struct ceph_mds_request *req; - unsigned int max; int i; - /* - * The mdsc->max_sessions is unlikely to be changed - * mostly, here we will retry it by reallocating the - * sessions arrary memory to get rid of the mdsc->mutex - * lock. - */ -retry: - max = mdsc->max_sessions; - sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO); - if (!sessions) - return -ENOMEM; + sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL); + if (!sessions) { + err = -ENOMEM; + goto out; + } spin_lock(&ci->i_unsafe_lock); if (req1) { list_for_each_entry(req, &ci->i_unsafe_dirops, r_unsafe_dir_item) { s = req->r_session; - if (unlikely(s->s_mds >= max)) { + if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); + for (i = 0; i < max_sessions; i++) { + s = sessions[i]; + if (s) + ceph_put_mds_session(s); + } + kfree(sessions); goto retry; } if (!sessions[s->s_mds]) { @@ -2278,8 +2287,14 @@ retry: list_for_each_entry(req, &ci->i_unsafe_iops, r_unsafe_target_item) { s = req->r_session; - if (unlikely(s->s_mds >= max)) { + if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); + for (i = 0; i < max_sessions; i++) { + s = sessions[i]; + if (s) + ceph_put_mds_session(s); + } + kfree(sessions); goto retry; } if (!sessions[s->s_mds]) { @@ -2300,7 +2315,7 @@ retry: spin_unlock(&ci->i_ceph_lock); /* send flush mdlog request to MDSes */ - for (i = 0; i < max; i++) { + for (i = 0; i < max_sessions; i++) { s = sessions[i]; if (s) { send_flush_mdlog(s); @@ -2317,15 +2332,19 @@ retry: ceph_timeout_jiffies(req1->r_timeout)); if (ret) err = -EIO; - ceph_mdsc_put_request(req1); } if (req2) { ret = !wait_for_completion_timeout(&req2->r_safe_completion, ceph_timeout_jiffies(req2->r_timeout)); if (ret) err = -EIO; - ceph_mdsc_put_request(req2); } + +out: + if (req1) + ceph_mdsc_put_request(req1); + if (req2) + ceph_mdsc_put_request(req2); return err; } From 932a9b5870d38b87ba0a9923c804b1af7d3605b9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 25 Jan 2022 15:39:16 -0500 Subject: [PATCH 208/367] ceph: properly put ceph_string reference after async create attempt The reference acquired by try_prep_async_create is currently leaked. Ensure we put it. Cc: stable@vger.kernel.org Fixes: 9a8d03ca2e2c ("ceph: attempt to do async create when possible") Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5b9104b8e453..6afae97be9da 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -750,8 +750,10 @@ retry: restore_deleg_ino(dir, req->r_deleg_ino); ceph_mdsc_put_request(req); try_async = false; + ceph_put_string(rcu_dereference_raw(lo.pool_ns)); goto retry; } + ceph_put_string(rcu_dereference_raw(lo.pool_ns)); goto out_req; } } From 4584a768f22b7669cdebabc911543621ac661341 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 26 Jan 2022 12:36:49 -0500 Subject: [PATCH 209/367] ceph: set pool_ns in new inode layout for async creates Dan reported that he was unable to write to files that had been asynchronously created when the client's OSD caps are restricted to a particular namespace. The issue is that the layout for the new inode is only partially being filled. Ensure that we populate the pool_ns_data and pool_ns_len in the iinfo before calling ceph_fill_inode. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/54013 Fixes: 9a8d03ca2e2c ("ceph: attempt to do async create when possible") Reported-by: Dan van der Ster Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 6afae97be9da..bbed3224ad68 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -583,6 +583,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, struct ceph_inode_info *ci = ceph_inode(dir); struct inode *inode; struct timespec64 now; + struct ceph_string *pool_ns; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_vino vino = { .ino = req->r_deleg_ino, .snap = CEPH_NOSNAP }; @@ -632,6 +633,12 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, in.max_size = cpu_to_le64(lo->stripe_unit); ceph_file_layout_to_legacy(lo, &in.layout); + /* lo is private, so pool_ns can't change */ + pool_ns = rcu_dereference_raw(lo->pool_ns); + if (pool_ns) { + iinfo.pool_ns_len = pool_ns->len; + iinfo.pool_ns_data = pool_ns->str; + } down_read(&mdsc->snap_rwsem); ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session, From da123016ca8cb5697366c0b2dd55059b976e67e4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 26 Jan 2022 10:42:58 -0800 Subject: [PATCH 210/367] rcu-tasks: Fix computation of CPU-to-list shift counts The ->percpu_enqueue_shift field is used to map from the running CPU number to the index of the corresponding callback list. This mapping can change at runtime in response to varying callback load, resulting in varying levels of contention on the callback-list locks. Unfortunately, the initial value of this field is correct only if the system happens to have a power-of-two number of CPUs, otherwise the callbacks from the high-numbered CPUs can be placed into the callback list indexed by 1 (rather than 0), and those index-1 callbacks will be ignored. This can result in soft lockups and hangs. This commit therefore corrects this mapping, adding one to this shift count as needed for systems having odd numbers of CPUs. Fixes: 7a30871b6a27 ("rcu-tasks: Introduce ->percpu_enqueue_shift for dynamic queue selection") Reported-by: Andrii Nakryiko Cc: Reported-by: Martin Lau Cc: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 84f1d91604cc..d64f0b1d8cd3 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -123,7 +123,7 @@ static struct rcu_tasks rt_name = \ .call_func = call, \ .rtpcpu = &rt_name ## __percpu, \ .name = n, \ - .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS), \ + .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS) + 1, \ .percpu_enqueue_lim = 1, \ .percpu_dequeue_lim = 1, \ .barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \ @@ -216,6 +216,7 @@ static void cblist_init_generic(struct rcu_tasks *rtp) int cpu; unsigned long flags; int lim; + int shift; raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rcu_task_enqueue_lim < 0) { @@ -229,7 +230,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp) if (lim > nr_cpu_ids) lim = nr_cpu_ids; - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids / lim)); + shift = ilog2(nr_cpu_ids / lim); + if (((nr_cpu_ids - 1) >> shift) >= lim) + shift++; + WRITE_ONCE(rtp->percpu_enqueue_shift, shift); WRITE_ONCE(rtp->percpu_dequeue_lim, lim); smp_store_release(&rtp->percpu_enqueue_lim, lim); for_each_possible_cpu(cpu) { @@ -298,7 +302,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, if (unlikely(needadjust)) { raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rtp->percpu_enqueue_lim != nr_cpu_ids) { - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids)); + WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids); smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids); pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name); @@ -413,7 +417,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp) if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) { raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rtp->percpu_enqueue_lim > 1) { - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids)); + WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); smp_store_release(&rtp->percpu_enqueue_lim, 1); rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu(); pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name); From 7355bfe0e0cc27597d530f78e259a985cb85af40 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 23 Jan 2022 13:57:17 +0100 Subject: [PATCH 211/367] netfilter: Remove flowtable relics NF_FLOW_TABLE_IPV4 and NF_FLOW_TABLE_IPV6 are invisble, selected by nothing (so they can no longer be enabled), and their last real users have been removed (nf_flow_table_ipv6.c is empty). Clean up the leftovers. Fixes: c42ba4290b2147aa ("netfilter: flowtable: remove ipv4/ipv6 modules") Signed-off-by: Geert Uytterhoeven Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 4 ---- net/ipv6/netfilter/Kconfig | 4 ---- net/ipv6/netfilter/Makefile | 3 --- net/ipv6/netfilter/nf_flow_table_ipv6.c | 0 4 files changed, 11 deletions(-) delete mode 100644 net/ipv6/netfilter/nf_flow_table_ipv6.c diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 67087f95579f..aab384126f61 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -58,10 +58,6 @@ config NF_TABLES_ARP endif # NF_TABLES -config NF_FLOW_TABLE_IPV4 - tristate - select NF_FLOW_TABLE_INET - config NF_DUP_IPV4 tristate "Netfilter IPv4 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 97d3d1b36dbc..0ba62f4868f9 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -47,10 +47,6 @@ config NFT_FIB_IPV6 endif # NF_TABLES_IPV6 endif # NF_TABLES -config NF_FLOW_TABLE_IPV6 - tristate - select NF_FLOW_TABLE_INET - config NF_DUP_IPV6 tristate "Netfilter IPv6 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index b85383606df7..b8d6dc9aeeb6 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -28,9 +28,6 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o -# flow table support -obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o - # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c deleted file mode 100644 index e69de29bb2d1..000000000000 From 34243b9ec856309339172b1507379074156947e8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 23 Jan 2022 15:24:00 +0100 Subject: [PATCH 212/367] netfilter: nft_ct: fix use after free when attaching zone template The conversion erroneously removed the refcount increment. In case we can use the percpu template, we need to increment the refcount, else it will be released when the skb gets freed. In case the slowpath is taken, the new template already has a refcount of 1. Fixes: 719774377622 ("netfilter: conntrack: convert to refcount_t api") Reported-by: kernel test robot Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 518d96c8c247..5adf8bb628a8 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -260,9 +260,12 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, ct = this_cpu_read(nft_ct_pcpu_template); if (likely(refcount_read(&ct->ct_general.use) == 1)) { + refcount_inc(&ct->ct_general.use); nf_ct_zone_add(ct, &zone); } else { - /* previous skb got queued to userspace */ + /* previous skb got queued to userspace, allocate temporary + * one until percpu template can be reused. + */ ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC); if (!ct) { regs->verdict.code = NF_DROP; From c858620d2ae3489409af593f005a48a8a324da3d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 23 Jan 2022 15:45:54 +0100 Subject: [PATCH 213/367] selftests: netfilter: reduce zone stress test running time This selftests needs almost 3 minutes to complete, reduce the insertes zones to 1000. Test now completes in about 20 seconds. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_zones_many.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh index 04633119b29a..5a8db0b48928 100755 --- a/tools/testing/selftests/netfilter/nft_zones_many.sh +++ b/tools/testing/selftests/netfilter/nft_zones_many.sh @@ -9,7 +9,7 @@ ns="ns-$sfx" # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -zones=20000 +zones=2000 have_ct_tool=0 ret=0 @@ -75,10 +75,10 @@ EOF while [ $i -lt $max_zones ]; do local start=$(date +%s%3N) - i=$((i + 10000)) + i=$((i + 1000)) j=$((j + 1)) # nft rule in output places each packet in a different zone. - dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 + dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 if [ $? -ne 0 ] ;then ret=1 break @@ -86,7 +86,7 @@ EOF stop=$(date +%s%3N) local duration=$((stop-start)) - echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)" + echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)" done if [ $have_ct_tool -eq 1 ]; then @@ -128,11 +128,11 @@ test_conntrack_tool() { break fi - if [ $((i%10000)) -eq 0 ];then + if [ $((i%1000)) -eq 0 ];then stop=$(date +%s%3N) local duration=$((stop-start)) - echo "PASS: added 10000 entries in $duration ms (now $i total)" + echo "PASS: added 1000 entries in $duration ms (now $i total)" start=$stop fi done From aad51ca71ad83273e8826d6cfdcf53c98748d1fa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Jan 2022 22:09:15 +0100 Subject: [PATCH 214/367] selftests: netfilter: check stateless nat udp checksum fixup Add a test that sends large udp packet (which is fragmented) via a stateless nft nat rule, i.e. 'ip saddr set 10.2.3.4' and check that the datagram is received by peer. On kernels without commit 4e1860a38637 ("netfilter: nft_payload: do not update layer 4 checksum when mangling fragments")', this will fail with: cmp: EOF on /tmp/tmp.V1q0iXJyQF which is empty -rw------- 1 root root 4096 Jan 24 22:03 /tmp/tmp.Aaqnq4rBKS -rw------- 1 root root 0 Jan 24 22:03 /tmp/tmp.V1q0iXJyQF ERROR: in and output file mismatch when checking udp with stateless nat FAIL: nftables v1.0.0 (Fearless Fosdick #2) On patched kernels, this will show: PASS: IP statless for ns2-PFp89amx Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_nat.sh | 152 +++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 349a319a9e51..79fe627b9e81 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -899,6 +899,144 @@ EOF ip netns exec "$ns0" nft delete table $family nat } +test_stateless_nat_ip() +{ + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules" + return 1 + fi + +ip netns exec "$ns0" nft -f /dev/stdin < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules" + lret=1 + fi + + # ns1 should have seen packets from .2.2, due to stateless rewrite. + expect="packets 1 bytes 84" + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 1" + lret=1 + fi + + for dir in "in" "out" ; do + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3" + lret=1 + fi + + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4" + lret=1 + fi + done + + reset_counters + + socat -h > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run stateless nat frag test without socat tool" + if [ $lret -eq 0 ]; then + return $ksft_skip + fi + + ip netns exec "$ns0" nft delete table ip stateless + return $lret + fi + + local tmpfile=$(mktemp) + dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null + + local outfile=$(mktemp) + ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null & + sc_r=$! + + sleep 1 + # re-do with large ping -> ip fragmentation + ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null + if [ $? -ne 0 ] ; then + echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2 + lret=1 + fi + + wait + + cmp "$tmpfile" "$outfile" + if [ $? -ne 0 ]; then + ls -l "$tmpfile" "$outfile" + echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2 + lret=1 + fi + + rm -f "$tmpfile" "$outfile" + + # ns1 should have seen packets from 2.2, due to stateless rewrite. + expect="packets 3 bytes 4164" + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 5" + lret=1 + fi + + ip netns exec "$ns0" nft delete table ip stateless + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete table ip stateless" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP statless for $ns2" + + return $lret +} + # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 for i in 0 1 2; do ip netns exec ns$i-$sfx nft -f /dev/stdin < Date: Tue, 25 Jan 2022 20:06:03 +0100 Subject: [PATCH 215/367] netfilter: nft_reject_bridge: Fix for missing reply from prerouting Prior to commit fa538f7cf05aa ("netfilter: nf_reject: add reject skbuff creation helpers"), nft_reject_bridge did not assign to nskb->dev before passing nskb on to br_forward(). The shared skbuff creation helpers introduced in above commit do which seems to confuse br_forward() as reject statements in prerouting hook won't emit a packet anymore. Fix this by simply passing NULL instead of 'dev' to the helpers - they use the pointer for just that assignment, nothing else. Fixes: fa538f7cf05aa ("netfilter: nf_reject: add reject skbuff creation helpers") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_reject_bridge.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index eba0efe64d05..fbf858ddec35 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -49,7 +49,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook); + nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, NULL, hook); if (!nskb) return; @@ -65,7 +65,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code); + nskb = nf_reject_skb_v4_unreach(net, oldskb, NULL, hook, code); if (!nskb) return; @@ -81,7 +81,7 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook); + nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, NULL, hook); if (!nskb) return; @@ -98,7 +98,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code); + nskb = nf_reject_skb_v6_unreach(net, oldskb, NULL, hook, code); if (!nskb) return; From f459bfd4b9793f25e0fcf19878edd87d8dc569d9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 26 Jan 2022 01:46:58 +0100 Subject: [PATCH 216/367] netfilter: nft_byteorder: track register operations Cancel tracking for byteorder operation, otherwise selector + byteorder operation is incorrectly reduced if source and destination registers are the same. Reported-by: kernel test robot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_byteorder.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 9d5947ab8d4e..e646e9ee4a98 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -167,12 +167,24 @@ nla_put_failure: return -1; } +static bool nft_byteorder_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + struct nft_byteorder *priv = nft_expr_priv(expr); + + track->regs[priv->dreg].selector = NULL; + track->regs[priv->dreg].bitwise = NULL; + + return false; +} + static const struct nft_expr_ops nft_byteorder_ops = { .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), .eval = nft_byteorder_eval, .init = nft_byteorder_init, .dump = nft_byteorder_dump, + .reduce = nft_byteorder_reduce, }; struct nft_expr_type nft_byteorder_type __read_mostly = { From eda0cf1202acf1ef47f93d8f92d4839213431424 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jan 2022 12:54:54 +0100 Subject: [PATCH 217/367] selftests: nft_concat_range: add test for reload with no element add/del Add a specific test for the reload issue fixed with commit 23c54263efd7cb ("netfilter: nft_set_pipapo: allocate pcpu scratch maps on clone"). Add to set, then flush set content + restore without other add/remove in the transaction. On kernels before the fix, this test case fails: net,mac with reload [FAIL] Signed-off-by: Florian Westphal Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- .../selftests/netfilter/nft_concat_range.sh | 72 ++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index ed61f6cab60f..df322e47a54f 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add" +BUGS="flush_remove_add reload" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -354,6 +354,23 @@ TYPE_flush_remove_add=" display Add two elements, flush, re-add " +TYPE_reload=" +display net,mac with reload +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 1 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1473,6 +1490,59 @@ test_bug_flush_remove_add() { nft flush ruleset } +# - add ranged element, check that packets match it +# - reload the set, check packets still match +test_bug_reload() { + setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} + rstart=${start} + + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + # check kernel does allocate pcpu sctrach map + # for reload with no elemet add/delete + ( echo flush set inet filter test ; + nft list set inet filter test ) | nft -f - + + start=${rstart} + range_size=1 + + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush ruleset +} + test_reported_issues() { eval test_bug_"${subtest}" } From f9d87929d451d3e649699d0f1d74f71f77ad38f5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 24 Jan 2022 12:46:50 -0600 Subject: [PATCH 218/367] ucount: Make get_ucount a safe get_user replacement When the ucount code was refactored to create get_ucount it was missed that some of the contexts in which a rlimit is kept elevated can be the only reference to the user/ucount in the system. Ordinary ucount references exist in places that also have a reference to the user namspace, but in POSIX message queues, the SysV shm code, and the SIGPENDING code there is no independent user namespace reference. Inspection of the the user_namespace show no instance of circular references between struct ucounts and the user_namespace. So hold a reference from struct ucount to i's user_namespace to resolve this problem. Link: https://lore.kernel.org/lkml/YZV7Z+yXbsx9p3JN@fixkernel.com/ Reported-by: Qian Cai Reported-by: Mathias Krause Tested-by: Mathias Krause Reviewed-by: Mathias Krause Reviewed-by: Alexey Gladkov Fixes: d64696905554 ("Reimplement RLIMIT_SIGPENDING on top of ucounts") Fixes: 6e52a9f0532f ("Reimplement RLIMIT_MSGQUEUE on top of ucounts") Fixes: d7c9e99aee48 ("Reimplement RLIMIT_MEMLOCK on top of ucounts") Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- kernel/ucount.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/ucount.c b/kernel/ucount.c index 7b32c356ebc5..65b597431c86 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -190,6 +190,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) kfree(new); } else { hlist_add_head(&new->node, hashent); + get_user_ns(new->ns); spin_unlock_irq(&ucounts_lock); return new; } @@ -210,6 +211,7 @@ void put_ucounts(struct ucounts *ucounts) if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) { hlist_del_init(&ucounts->node); spin_unlock_irqrestore(&ucounts_lock, flags); + put_user_ns(ucounts->ns); kfree(ucounts); } } From 9b6d90e2085ca2ce72ef9ea78658bf270855e62e Mon Sep 17 00:00:00 2001 From: Zhou Qingyang Date: Tue, 25 Jan 2022 00:45:25 +0800 Subject: [PATCH 219/367] ata: pata_platform: Fix a NULL pointer dereference in __pata_platform_probe() In __pata_platform_probe(), devm_kzalloc() is assigned to ap->ops and there is a dereference of it right after that, which could introduce a NULL pointer dereference bug. Fix this by adding a NULL check of ap->ops. This bug was found by a static analyzer. Builds with 'make allyesconfig' show no new warnings, and our static analyzer no longer warns about this code. Fixes: f3d5e4f18dba ("ata: pata_of_platform: Allow to use 16-bit wide data transfer") Signed-off-by: Zhou Qingyang Signed-off-by: Damien Le Moal Reviewed-by: Sergey Shtylyov --- drivers/ata/pata_platform.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c index 028329428b75..87c7c90676ca 100644 --- a/drivers/ata/pata_platform.c +++ b/drivers/ata/pata_platform.c @@ -128,6 +128,8 @@ int __pata_platform_probe(struct device *dev, struct resource *io_res, ap = host->ports[0]; ap->ops = devm_kzalloc(dev, sizeof(*ap->ops), GFP_KERNEL); + if (!ap->ops) + return -ENOMEM; ap->ops->inherits = &ata_sff_port_ops; ap->ops->cable_detect = ata_cable_unknown; ap->ops->set_mode = pata_platform_set_mode; From 25e58af4be412d59e056da65cc1cefbd89185bd2 Mon Sep 17 00:00:00 2001 From: Wu Zheng Date: Mon, 21 Jun 2021 19:07:01 -0400 Subject: [PATCH 220/367] nvme-pci: add the IGNORE_DEV_SUBNQN quirk for Intel P4500/P4600 SSDs The Intel P4500/P4600 SSDs do not report a subsystem NQN despite claiming compliance to a standards version where reporting one is required. Add the IGNORE_DEV_SUBNQN quirk to not fail the initialization of a second such SSDs in a system. Signed-off-by: Zheng Wu Signed-off-by: Ye Jinhe Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d8585df2c2fd..6a99ed680915 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3391,7 +3391,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DEALLOCATE_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, }, From a5f3851b7f7951e8d4ba0a9ba3b5308a5f250a2d Mon Sep 17 00:00:00 2001 From: Changcheng Deng Date: Fri, 7 Jan 2022 02:23:06 +0000 Subject: [PATCH 221/367] nvme-fabrics: remove the unneeded ret variable in nvmf_dev_show Remove unneeded variable and directly return 0. Reported-by: Zeal Robot Signed-off-by: Changcheng Deng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 7ae041e2b3fb..f79a66d4e22c 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1092,7 +1092,6 @@ static void __nvmf_concat_opt_tokens(struct seq_file *seq_file) static int nvmf_dev_show(struct seq_file *seq_file, void *private) { struct nvme_ctrl *ctrl; - int ret = 0; mutex_lock(&nvmf_dev_mutex); ctrl = seq_file->private; @@ -1106,7 +1105,7 @@ static int nvmf_dev_show(struct seq_file *seq_file, void *private) out_unlock: mutex_unlock(&nvmf_dev_mutex); - return ret; + return 0; } static int nvmf_dev_open(struct inode *inode, struct file *file) From 1293fccc9e892712d910ec96079d3717307f1d2d Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 25 Jan 2022 13:14:21 +0100 Subject: [PATCH 222/367] net: ieee802154: hwsim: Ensure proper channel selection at probe time Drivers are expected to set the PHY current_channel and current_page according to their default state. The hwsim driver is advertising being configured on channel 13 by default but that is not reflected in its own internal pib structure. In order to ensure that this driver consider the current channel as being 13 internally, we at least need to set the pib->channel field to 13. Fixes: f25da51fdc38 ("ieee802154: hwsim: add replacement for fakelb") Signed-off-by: Miquel Raynal [stefan@datenfreihafen.org: fixed assigment from page to channel] Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20220125121426.848337-2-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mac802154_hwsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 8caa61ec718f..36f1c5aa98fc 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -786,6 +786,7 @@ static int hwsim_add_one(struct genl_info *info, struct device *dev, goto err_pib; } + pib->channel = 13; rcu_assign_pointer(phy->pib, pib); phy->idx = idx; INIT_LIST_HEAD(&phy->edges); From d753c4004820a888ec007dd88b271fa9c3172c5c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 25 Jan 2022 13:14:22 +0100 Subject: [PATCH 223/367] net: ieee802154: mcr20a: Fix lifs/sifs periods These periods are expressed in time units (microseconds) while 40 and 12 are the number of symbol durations these periods will last. We need to multiply them both with phy->symbol_duration in order to get these values in microseconds. Fixes: 8c6ad9cc5157 ("ieee802154: Add NXP MCR20A IEEE 802.15.4 transceiver driver") Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20220125121426.848337-3-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mcr20a.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 8dc04e2590b1..383231b85464 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -976,8 +976,8 @@ static void mcr20a_hw_setup(struct mcr20a_local *lp) dev_dbg(printdev(lp), "%s\n", __func__); phy->symbol_duration = 16; - phy->lifs_period = 40; - phy->sifs_period = 12; + phy->lifs_period = 40 * phy->symbol_duration; + phy->sifs_period = 12 * phy->symbol_duration; hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT | From e5ce576d45bf72fd0e3dc37eff897bfcc488f6a9 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 25 Jan 2022 13:14:23 +0100 Subject: [PATCH 224/367] net: ieee802154: at86rf230: Stop leaking skb's Upon error the ieee802154_xmit_complete() helper is not called. Only ieee802154_wake_queue() is called manually. In the Tx case we then leak the skb structure. Free the skb structure upon error before returning when appropriate. As the 'is_tx = 0' cannot be moved in the complete handler because of a possible race between the delay in switching to STATE_RX_AACK_ON and a new interrupt, we introduce an intermediate 'was_tx' boolean just for this purpose. There is no Fixes tag applying here, many changes have been made on this area and the issue kind of always existed. Suggested-by: Alexander Aring Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20220125121426.848337-4-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/at86rf230.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 7d67f41387f5..4f5ef8a9a9a8 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -100,6 +100,7 @@ struct at86rf230_local { unsigned long cal_timeout; bool is_tx; bool is_tx_from_off; + bool was_tx; u8 tx_retry; struct sk_buff *tx_skb; struct at86rf230_state_change tx; @@ -343,7 +344,11 @@ at86rf230_async_error_recover_complete(void *context) if (ctx->free) kfree(ctx); - ieee802154_wake_queue(lp->hw); + if (lp->was_tx) { + lp->was_tx = 0; + dev_kfree_skb_any(lp->tx_skb); + ieee802154_wake_queue(lp->hw); + } } static void @@ -352,7 +357,11 @@ at86rf230_async_error_recover(void *context) struct at86rf230_state_change *ctx = context; struct at86rf230_local *lp = ctx->lp; - lp->is_tx = 0; + if (lp->is_tx) { + lp->was_tx = 1; + lp->is_tx = 0; + } + at86rf230_async_state_change(lp, ctx, STATE_RX_AACK_ON, at86rf230_async_error_recover_complete); } From 621b24b09eb61c63f262da0c9c5f0e93348897e5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 25 Jan 2022 13:14:24 +0100 Subject: [PATCH 225/367] net: ieee802154: ca8210: Stop leaking skb's Upon error the ieee802154_xmit_complete() helper is not called. Only ieee802154_wake_queue() is called manually. We then leak the skb structure. Free the skb structure upon error before returning. Fixes: ded845a781a5 ("ieee802154: Add CA8210 IEEE 802.15.4 device driver") Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20220125121426.848337-5-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index ece6ff6049f6..f3438d3e104a 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -1771,6 +1771,7 @@ static int ca8210_async_xmit_complete( status ); if (status != MAC_TRANSACTION_OVERFLOW) { + dev_kfree_skb_any(priv->tx_skb); ieee802154_wake_queue(priv->hw); return 0; } From 79c37ca73a6e9a33f7b2b7783ba6af07a448c8a9 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 25 Jan 2022 13:14:25 +0100 Subject: [PATCH 226/367] net: ieee802154: Return meaningful error codes from the netlink helpers Returning -1 does not indicate anything useful. Use a standard and meaningful error code instead. Fixes: a26c5fd7622d ("nl802154: add support for security layer") Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20220125121426.848337-6-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- net/ieee802154/nl802154.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 277124f206e0..e0b072aecf0f 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1441,7 +1441,7 @@ static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1634,7 +1634,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1812,7 +1812,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1988,7 +1988,7 @@ static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; From 5d8a8b324ff48c9d9fe4f1634e33dc647d2481b4 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 25 Jan 2022 13:14:26 +0100 Subject: [PATCH 227/367] MAINTAINERS: Remove Harry Morris bouncing address Harry's e-mail address from Cascoda bounces, I have not found any contributions from him since 2018 so let's drop the Maintainer entry from the CA8210 driver and mark it Orphan. Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20220125121426.848337-7-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0d7883977e9b..ee938e96b101 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4157,9 +4157,8 @@ N: csky K: csky CA8210 IEEE-802.15.4 RADIO DRIVER -M: Harry Morris L: linux-wpan@vger.kernel.org -S: Maintained +S: Orphan W: https://github.com/Cascoda/ca8210-linux.git F: Documentation/devicetree/bindings/net/ieee802154/ca8210.txt F: drivers/net/ieee802154/ca8210.c From d1ad2721b1eb05d54e81393a7ebc332d4a35c68f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 27 Jan 2022 08:33:04 +0100 Subject: [PATCH 228/367] kbuild: remove include/linux/cyclades.h from header file check The file now rightfully throws up a big warning that it should never be included, so remove it from the header_check test. Fixes: f23653fe6447 ("tty: Partially revert the removal of the Cyclades public API") Cc: stable Cc: Masahiro Yamada Cc: "Maciej W. Rozycki" Reported-by: Stephen Rothwell Reported-by: kernel test robot Link: https://lore.kernel.org/r/20220127073304.42399-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- usr/include/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/usr/include/Makefile b/usr/include/Makefile index 7be7468c177b..83822c33e9e7 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -28,6 +28,7 @@ no-header-test += linux/am437x-vpfe.h no-header-test += linux/android/binder.h no-header-test += linux/android/binderfs.h no-header-test += linux/coda.h +no-header-test += linux/cyclades.h no-header-test += linux/errqueue.h no-header-test += linux/fsmap.h no-header-test += linux/hdlc/ioctl.h From fa62f39dc7e25fc16371b958ac59b9a6fd260bea Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Tue, 25 Jan 2022 15:19:44 +0100 Subject: [PATCH 229/367] MIPS: Fix build error due to PTR used in more places Use PTR_WD instead of PTR to avoid clashes with other parts. Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/asm.h | 4 +- arch/mips/include/asm/ftrace.h | 4 +- arch/mips/include/asm/r4kcache.h | 4 +- arch/mips/include/asm/unaligned-emul.h | 176 ++++++++++++------------- arch/mips/kernel/mips-r2-to-r6-emul.c | 104 +++++++-------- arch/mips/kernel/r2300_fpu.S | 6 +- arch/mips/kernel/r4k_fpu.S | 2 +- arch/mips/kernel/relocate_kernel.S | 22 ++-- arch/mips/kernel/scall32-o32.S | 10 +- arch/mips/kernel/scall64-n32.S | 2 +- arch/mips/kernel/scall64-n64.S | 2 +- arch/mips/kernel/scall64-o32.S | 10 +- arch/mips/kernel/syscall.c | 8 +- arch/mips/lib/csum_partial.S | 4 +- arch/mips/lib/memcpy.S | 4 +- arch/mips/lib/memset.S | 2 +- arch/mips/lib/strncpy_user.S | 4 +- arch/mips/lib/strnlen_user.S | 2 +- 18 files changed, 185 insertions(+), 185 deletions(-) diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h index 6ffdd4b5e1d0..336ac9b65235 100644 --- a/arch/mips/include/asm/asm.h +++ b/arch/mips/include/asm/asm.h @@ -285,7 +285,7 @@ symbol = value #define PTR_SCALESHIFT 2 -#define PTR .word +#define PTR_WD .word #define PTRSIZE 4 #define PTRLOG 2 #endif @@ -310,7 +310,7 @@ symbol = value #define PTR_SCALESHIFT 3 -#define PTR .dword +#define PTR_WD .dword #define PTRSIZE 8 #define PTRLOG 3 #endif diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h index b463f2aa5a61..db497a8167da 100644 --- a/arch/mips/include/asm/ftrace.h +++ b/arch/mips/include/asm/ftrace.h @@ -32,7 +32,7 @@ do { \ ".previous\n" \ \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR) "\t1b, 3b\n\t" \ + STR(PTR_WD) "\t1b, 3b\n\t" \ ".previous\n" \ \ : [tmp_dst] "=&r" (dst), [tmp_err] "=r" (error)\ @@ -54,7 +54,7 @@ do { \ ".previous\n" \ \ ".section\t__ex_table,\"a\"\n\t"\ - STR(PTR) "\t1b, 3b\n\t" \ + STR(PTR_WD) "\t1b, 3b\n\t" \ ".previous\n" \ \ : [tmp_err] "=r" (error) \ diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h index af3788589ee6..431a1c9d53fc 100644 --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -119,7 +119,7 @@ static inline void flush_scache_line(unsigned long addr) " j 2b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ - " "STR(PTR)" 1b, 3b \n" \ + " "STR(PTR_WD)" 1b, 3b \n" \ " .previous" \ : "+r" (__err) \ : "i" (op), "r" (addr), "i" (-EFAULT)); \ @@ -142,7 +142,7 @@ static inline void flush_scache_line(unsigned long addr) " j 2b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ - " "STR(PTR)" 1b, 3b \n" \ + " "STR(PTR_WD)" 1b, 3b \n" \ " .previous" \ : "+r" (__err) \ : "i" (op), "r" (addr), "i" (-EFAULT)); \ diff --git a/arch/mips/include/asm/unaligned-emul.h b/arch/mips/include/asm/unaligned-emul.h index 2022b18944b9..9af0f4d3d288 100644 --- a/arch/mips/include/asm/unaligned-emul.h +++ b/arch/mips/include/asm/unaligned-emul.h @@ -20,8 +20,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -41,8 +41,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -74,10 +74,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -102,8 +102,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -125,8 +125,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -145,8 +145,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -178,10 +178,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -223,14 +223,14 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t5b, 11b\n\t" \ + STR(PTR_WD)"\t6b, 11b\n\t" \ + STR(PTR_WD)"\t7b, 11b\n\t" \ + STR(PTR_WD)"\t8b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -255,8 +255,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT));\ @@ -276,8 +276,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ @@ -296,8 +296,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ @@ -325,10 +325,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT) \ @@ -365,14 +365,14 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t5b, 11b\n\t" \ + STR(PTR_WD)"\t6b, 11b\n\t" \ + STR(PTR_WD)"\t7b, 11b\n\t" \ + STR(PTR_WD)"\t8b, 11b\n\t" \ ".previous" \ : "=&r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT) \ @@ -398,8 +398,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -419,8 +419,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -452,10 +452,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -481,8 +481,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -504,8 +504,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -524,8 +524,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -557,10 +557,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -602,14 +602,14 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t5b, 11b\n\t" \ + STR(PTR_WD)"\t6b, 11b\n\t" \ + STR(PTR_WD)"\t7b, 11b\n\t" \ + STR(PTR_WD)"\t8b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -632,8 +632,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT));\ @@ -653,8 +653,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ @@ -673,8 +673,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ @@ -703,10 +703,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT) \ @@ -743,14 +743,14 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t5b, 11b\n\t" \ + STR(PTR_WD)"\t6b, 11b\n\t" \ + STR(PTR_WD)"\t7b, 11b\n\t" \ + STR(PTR_WD)"\t8b, 11b\n\t" \ ".previous" \ : "=&r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT) \ diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index a39ec755e4c2..750fe569862b 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -1258,10 +1258,10 @@ fpu_emul: " j 10b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1333,10 +1333,10 @@ fpu_emul: " j 10b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1404,10 +1404,10 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1474,10 +1474,10 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1589,14 +1589,14 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" - STR(PTR) " 5b,8b\n" - STR(PTR) " 6b,8b\n" - STR(PTR) " 7b,8b\n" - STR(PTR) " 0b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" + STR(PTR_WD) " 5b,8b\n" + STR(PTR_WD) " 6b,8b\n" + STR(PTR_WD) " 7b,8b\n" + STR(PTR_WD) " 0b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1708,14 +1708,14 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" - STR(PTR) " 5b,8b\n" - STR(PTR) " 6b,8b\n" - STR(PTR) " 7b,8b\n" - STR(PTR) " 0b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" + STR(PTR_WD) " 5b,8b\n" + STR(PTR_WD) " 6b,8b\n" + STR(PTR_WD) " 7b,8b\n" + STR(PTR_WD) " 0b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1827,14 +1827,14 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" - STR(PTR) " 5b,8b\n" - STR(PTR) " 6b,8b\n" - STR(PTR) " 7b,8b\n" - STR(PTR) " 0b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" + STR(PTR_WD) " 5b,8b\n" + STR(PTR_WD) " 6b,8b\n" + STR(PTR_WD) " 7b,8b\n" + STR(PTR_WD) " 0b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1945,14 +1945,14 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" - STR(PTR) " 5b,8b\n" - STR(PTR) " 6b,8b\n" - STR(PTR) " 7b,8b\n" - STR(PTR) " 0b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" + STR(PTR_WD) " 5b,8b\n" + STR(PTR_WD) " 6b,8b\n" + STR(PTR_WD) " 7b,8b\n" + STR(PTR_WD) " 0b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -2007,7 +2007,7 @@ fpu_emul: "j 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" - STR(PTR) " 1b,3b\n" + STR(PTR_WD) " 1b,3b\n" ".previous\n" : "=&r"(res), "+&r"(err) : "r"(vaddr), "i"(SIGSEGV) @@ -2065,7 +2065,7 @@ fpu_emul: "j 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" - STR(PTR) " 1b,3b\n" + STR(PTR_WD) " 1b,3b\n" ".previous\n" : "+&r"(res), "+&r"(err) : "r"(vaddr), "i"(SIGSEGV)); @@ -2126,7 +2126,7 @@ fpu_emul: "j 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" - STR(PTR) " 1b,3b\n" + STR(PTR_WD) " 1b,3b\n" ".previous\n" : "=&r"(res), "+&r"(err) : "r"(vaddr), "i"(SIGSEGV) @@ -2189,7 +2189,7 @@ fpu_emul: "j 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" - STR(PTR) " 1b,3b\n" + STR(PTR_WD) " 1b,3b\n" ".previous\n" : "+&r"(res), "+&r"(err) : "r"(vaddr), "i"(SIGSEGV)); diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index cbf6db98cfb3..2748c55820c2 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -23,14 +23,14 @@ #define EX(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,fault; \ + PTR_WD 9b,fault; \ .previous #define EX2(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,fault; \ - PTR 9b+4,fault; \ + PTR_WD 9b,fault; \ + PTR_WD 9b+4,fault; \ .previous .set mips1 diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S index b91e91106475..2e687c60bc4f 100644 --- a/arch/mips/kernel/r4k_fpu.S +++ b/arch/mips/kernel/r4k_fpu.S @@ -31,7 +31,7 @@ .ex\@: \insn \reg, \src .set pop .section __ex_table,"a" - PTR .ex\@, fault + PTR_WD .ex\@, fault .previous .endm diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index f3c908abdbb8..cfde14b48fd8 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -147,10 +147,10 @@ LEAF(kexec_smp_wait) kexec_args: EXPORT(kexec_args) -arg0: PTR 0x0 -arg1: PTR 0x0 -arg2: PTR 0x0 -arg3: PTR 0x0 +arg0: PTR_WD 0x0 +arg1: PTR_WD 0x0 +arg2: PTR_WD 0x0 +arg3: PTR_WD 0x0 .size kexec_args,PTRSIZE*4 #ifdef CONFIG_SMP @@ -161,10 +161,10 @@ arg3: PTR 0x0 */ secondary_kexec_args: EXPORT(secondary_kexec_args) -s_arg0: PTR 0x0 -s_arg1: PTR 0x0 -s_arg2: PTR 0x0 -s_arg3: PTR 0x0 +s_arg0: PTR_WD 0x0 +s_arg1: PTR_WD 0x0 +s_arg2: PTR_WD 0x0 +s_arg3: PTR_WD 0x0 .size secondary_kexec_args,PTRSIZE*4 kexec_flag: LONG 0x1 @@ -173,17 +173,17 @@ kexec_flag: kexec_start_address: EXPORT(kexec_start_address) - PTR 0x0 + PTR_WD 0x0 .size kexec_start_address, PTRSIZE kexec_indirection_page: EXPORT(kexec_indirection_page) - PTR 0 + PTR_WD 0 .size kexec_indirection_page, PTRSIZE relocate_new_kernel_end: relocate_new_kernel_size: EXPORT(relocate_new_kernel_size) - PTR relocate_new_kernel_end - relocate_new_kernel + PTR_WD relocate_new_kernel_end - relocate_new_kernel .size relocate_new_kernel_size, PTRSIZE diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index b1b2e106f711..9bfce5f75f60 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -72,10 +72,10 @@ loads_done: .set pop .section __ex_table,"a" - PTR load_a4, bad_stack_a4 - PTR load_a5, bad_stack_a5 - PTR load_a6, bad_stack_a6 - PTR load_a7, bad_stack_a7 + PTR_WD load_a4, bad_stack_a4 + PTR_WD load_a5, bad_stack_a5 + PTR_WD load_a6, bad_stack_a6 + PTR_WD load_a7, bad_stack_a7 .previous lw t0, TI_FLAGS($28) # syscall tracing enabled? @@ -216,7 +216,7 @@ einval: li v0, -ENOSYS #endif /* CONFIG_MIPS_MT_FPAFF */ #define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) -#define __SYSCALL(nr, entry) PTR entry +#define __SYSCALL(nr, entry) PTR_WD entry .align 2 .type sys_call_table, @object EXPORT(sys_call_table) diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index f650c55a17dc..97456b2ca7dc 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -101,7 +101,7 @@ not_n32_scall: END(handle_sysn32) -#define __SYSCALL(nr, entry) PTR entry +#define __SYSCALL(nr, entry) PTR_WD entry .type sysn32_call_table, @object EXPORT(sysn32_call_table) #include diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S index 5d7bfc65e4d0..5f6ed4b4c399 100644 --- a/arch/mips/kernel/scall64-n64.S +++ b/arch/mips/kernel/scall64-n64.S @@ -109,7 +109,7 @@ illegal_syscall: j n64_syscall_exit END(handle_sys64) -#define __SYSCALL(nr, entry) PTR entry +#define __SYSCALL(nr, entry) PTR_WD entry .align 3 .type sys_call_table, @object EXPORT(sys_call_table) diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index cedc8bd88804..d3c2616cba22 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -73,10 +73,10 @@ load_a7: lw a7, 28(t0) # argument #8 from usp loads_done: .section __ex_table,"a" - PTR load_a4, bad_stack_a4 - PTR load_a5, bad_stack_a5 - PTR load_a6, bad_stack_a6 - PTR load_a7, bad_stack_a7 + PTR_WD load_a4, bad_stack_a4 + PTR_WD load_a5, bad_stack_a5 + PTR_WD load_a6, bad_stack_a6 + PTR_WD load_a7, bad_stack_a7 .previous li t1, _TIF_WORK_SYSCALL_ENTRY @@ -214,7 +214,7 @@ einval: li v0, -ENOSYS END(sys32_syscall) #define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat) -#define __SYSCALL(nr, entry) PTR entry +#define __SYSCALL(nr, entry) PTR_WD entry .align 3 .type sys32_call_table,@object EXPORT(sys32_call_table) diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 5512cd586e6e..ae93a607ddf7 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -122,8 +122,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) " j 3b \n" " .previous \n" " .section __ex_table,\"a\" \n" - " "STR(PTR)" 1b, 4b \n" - " "STR(PTR)" 2b, 4b \n" + " "STR(PTR_WD)" 1b, 4b \n" + " "STR(PTR_WD)" 2b, 4b \n" " .previous \n" " .set pop \n" : [old] "=&r" (old), @@ -152,8 +152,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) " j 3b \n" " .previous \n" " .section __ex_table,\"a\" \n" - " "STR(PTR)" 1b, 5b \n" - " "STR(PTR)" 2b, 5b \n" + " "STR(PTR_WD)" 1b, 5b \n" + " "STR(PTR_WD)" 2b, 5b \n" " .previous \n" " .set pop \n" : [old] "=&r" (old), diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index a46db0807195..7767137c3e49 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -347,7 +347,7 @@ EXPORT_SYMBOL(csum_partial) .if \mode == LEGACY_MODE; \ 9: insn reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, .L_exc; \ + PTR_WD 9b, .L_exc; \ .previous; \ /* This is enabled in EVA mode */ \ .else; \ @@ -356,7 +356,7 @@ EXPORT_SYMBOL(csum_partial) ((\to == USEROP) && (type == ST_INSN)); \ 9: __BUILD_EVA_INSN(insn##e, reg, addr); \ .section __ex_table,"a"; \ - PTR 9b, .L_exc; \ + PTR_WD 9b, .L_exc; \ .previous; \ .else; \ /* EVA without exception */ \ diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index 277c32296636..18a43f2e29c8 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -116,7 +116,7 @@ .if \mode == LEGACY_MODE; \ 9: insn reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous; \ /* This is assembled in EVA mode */ \ .else; \ @@ -125,7 +125,7 @@ ((\to == USEROP) && (type == ST_INSN)); \ 9: __BUILD_EVA_INSN(insn##e, reg, addr); \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous; \ .else; \ /* \ diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index b0baa3c79fad..0b342bae9a98 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -52,7 +52,7 @@ 9: ___BUILD_EVA_INSN(insn, reg, addr); \ .endif; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous .macro f_fill64 dst, offset, val, fixup, mode diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S index 556acf684d7b..13aaa9927ad1 100644 --- a/arch/mips/lib/strncpy_user.S +++ b/arch/mips/lib/strncpy_user.S @@ -15,7 +15,7 @@ #define EX(insn,reg,addr,handler) \ 9: insn reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous /* @@ -59,7 +59,7 @@ LEAF(__strncpy_from_user_asm) jr ra .section __ex_table,"a" - PTR 1b, .Lfault + PTR_WD 1b, .Lfault .previous EXPORT_SYMBOL(__strncpy_from_user_asm) diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S index 92b63f20ec05..6de31b616f9c 100644 --- a/arch/mips/lib/strnlen_user.S +++ b/arch/mips/lib/strnlen_user.S @@ -14,7 +14,7 @@ #define EX(insn,reg,addr,handler) \ 9: insn reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous /* From d19a7af73b5ecaac8168712d18be72b9db166768 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 18 Jan 2022 17:00:51 -0500 Subject: [PATCH 230/367] lockd: fix failure to cleanup client locks In my testing, we're sometimes hitting the request->fl_flags & FL_EXISTS case in posix_lock_inode, presumably just by random luck since we're not actually initializing fl_flags here. This probably didn't matter before commit 7f024fcd5c97 ("Keep read and write fds with each nlm_file") since we wouldn't previously unlock unless we knew there were locks. But now it causes lockd to give up on removing more locks. We could just initialize fl_flags, but really it seems dubious to be calling vfs_lock_file with random values in some of the fields. Fixes: 7f024fcd5c97 ("Keep read and write fds with each nlm_file") Signed-off-by: J. Bruce Fields [ cel: fixed checkpatch.pl nit ] Signed-off-by: Chuck Lever --- fs/lockd/svcsubs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 54c2e42130ca..0a22a2faf552 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -180,6 +180,7 @@ static int nlm_unlock_files(struct nlm_file *file) { struct file_lock lock; + locks_init_lock(&lock); lock.fl_type = F_UNLCK; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; From b07f413732549e5a96e891411fbb5980f2d8e5a1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 27 Jan 2022 13:40:38 +0100 Subject: [PATCH 231/367] netfilter: nf_tables: remove assignment with no effect in chain blob builder cppcheck possible warnings: >> net/netfilter/nf_tables_api.c:2014:2: warning: Assignment of function parameter has no effect outside the function. Did you forget dereferencing it? [uselessAssignmentPtrArg] ptr += offsetof(struct nft_rule_dp, data); ^ Reported-by: kernel test robot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index cf454f8ca2b0..5fa16990da95 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2011,7 +2011,6 @@ static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr) prule = (struct nft_rule_dp *)ptr; prule->is_last = 1; - ptr += offsetof(struct nft_rule_dp, data); /* blob size does not include the trailer rule */ } From 10825410b956dc1ed8c5fbc8bbedaffdadde7f20 Mon Sep 17 00:00:00 2001 From: Laibin Qiu Date: Thu, 27 Jan 2022 18:00:47 +0800 Subject: [PATCH 232/367] blk-mq: Fix wrong wakeup batch configuration which will cause hang Commit 180dccb0dba4f ("blk-mq: fix tag_get wait task can't be awakened") will recalculate wake_batch when incrementing or decrementing active_queues to avoid wake_batch > hctx_max_depth. At the same time, in order to not affect performance as much as possible, the minimum wakeup batch is set to 4. But when the QD is small (such as QD=1), if inc or dec active_queues increases wakeup batch, that can lead to a hang: Fix this problem with the following strategies: QD : >= 32 | < 32 --------------------------------- wakeup batch: 8~4 | 3~1 Fixes: 180dccb0dba4f ("blk-mq: fix tag_get wait task can't be awakened") Link: https://lore.kernel.org/linux-block/78cafe94-a787-e006-8851-69906f0c2128@huawei.com/T/#t Reported-by: Alex Xu (Hello71) Signed-off-by: Laibin Qiu Tested-by: Alex Xu (Hello71) Link: https://lore.kernel.org/r/20220127100047.1763746-1-qiulaibin@huawei.com Signed-off-by: Jens Axboe --- lib/sbitmap.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 6220fa67fb7e..09d293c30fd2 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -488,9 +488,13 @@ void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, unsigned int users) { unsigned int wake_batch; + unsigned int min_batch; + unsigned int depth = (sbq->sb.depth + users - 1) / users; - wake_batch = clamp_val((sbq->sb.depth + users - 1) / - users, 4, SBQ_WAKE_BATCH); + min_batch = sbq->sb.depth >= (4 * SBQ_WAIT_QUEUES) ? 4 : 1; + + wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, + min_batch, SBQ_WAKE_BATCH); __sbitmap_queue_update_wake_batch(sbq, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch); From f6133fbd373811066c8441737e65f384c8f31974 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Thu, 27 Jan 2022 14:04:44 +0000 Subject: [PATCH 233/367] io_uring: remove unused argument from io_rsrc_node_alloc io_ring_ctx is not used in the function. Signed-off-by: Usama Arif Link: https://lore.kernel.org/r/20220127140444.4016585-1-usama.arif@bytedance.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index dd4c801c7afd..2e04f718319d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7846,7 +7846,7 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref) mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay); } -static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx) +static struct io_rsrc_node *io_rsrc_node_alloc(void) { struct io_rsrc_node *ref_node; @@ -7897,7 +7897,7 @@ static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx) { if (ctx->rsrc_backup_node) return 0; - ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx); + ctx->rsrc_backup_node = io_rsrc_node_alloc(); return ctx->rsrc_backup_node ? 0 : -ENOMEM; } From dede34b2c1a88e26f8353b433e381ea355f7258d Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 26 Jan 2022 13:13:41 -0700 Subject: [PATCH 234/367] docs/kselftest: clarify running mainline tests on stables Update the document to clarifiy support for running mainline kselftest on stable releases and the reasons for not removing test code that can test older kernels. Signed-off-by: Shuah Khan --- Documentation/dev-tools/kselftest.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst index dcefee707ccd..a833ecf12fbc 100644 --- a/Documentation/dev-tools/kselftest.rst +++ b/Documentation/dev-tools/kselftest.rst @@ -7,6 +7,14 @@ directory. These are intended to be small tests to exercise individual code paths in the kernel. Tests are intended to be run after building, installing and booting a kernel. +Kselftest from mainline can be run on older stable kernels. Running tests +from mainline offers the best coverage. Several test rings run mainline +kselftest suite on stable releases. The reason is that when a new test +gets added to test existing code to regression test a bug, we should be +able to run that test on an older kernel. Hence, it is important to keep +code that can still test an older kernel and make sure it skips the test +gracefully on newer releases. + You can find additional information on Kselftest framework, how to write new tests using the framework on Kselftest wiki: From fc4eb486a59d70bd35cf1209f0e68c2d8b979193 Mon Sep 17 00:00:00 2001 From: Yang Xu Date: Thu, 27 Jan 2022 17:11:35 +0800 Subject: [PATCH 235/367] selftests/zram: Skip max_comp_streams interface on newer kernel Since commit 43209ea2d17a ("zram: remove max_comp_streams internals"), zram has switched to per-cpu streams. Even kernel still keep this interface for some reasons, but writing to max_comp_stream doesn't take any effect. So skip it on newer kernel ie 4.7. The code that comparing kernel version is from xfstests testsuite ext4/053. Signed-off-by: Yang Xu Signed-off-by: Shuah Khan --- tools/testing/selftests/zram/zram_lib.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index 6f872f266fd1..f47fc0f27e99 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -11,6 +11,9 @@ dev_mounted=-1 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +kernel_version=`uname -r | cut -d'.' -f1,2` +kernel_major=${kernel_version%.*} +kernel_minor=${kernel_version#*.} trap INT @@ -25,6 +28,20 @@ check_prereqs() fi } +kernel_gte() +{ + major=${1%.*} + minor=${1#*.} + + if [ $kernel_major -gt $major ]; then + return 0 + elif [[ $kernel_major -eq $major && $kernel_minor -ge $minor ]]; then + return 0 + fi + + return 1 +} + zram_cleanup() { echo "zram cleanup" @@ -86,6 +103,13 @@ zram_max_streams() { echo "set max_comp_streams to zram device(s)" + kernel_gte 4.7 + if [ $? -eq 0 ]; then + echo "The device attribute max_comp_streams was"\ + "deprecated in 4.7" + return 0 + fi + local i=0 for max_s in $zram_max_streams; do local sys_path="/sys/block/zram${i}/max_comp_streams" From d18da7ec3719559d6e74937266d0416e6c7e0b31 Mon Sep 17 00:00:00 2001 From: Yang Xu Date: Thu, 27 Jan 2022 17:11:36 +0800 Subject: [PATCH 236/367] selftests/zram01.sh: Fix compression ratio calculation zram01 uses `free -m` to measure zram memory usage. The results are no sense because they are polluted by all running processes on the system. We Should only calculate the free memory delta for the current process. So use the third field of /sys/block/zram/mm_stat to measure memory usage instead. The file is available since kernel 4.1. orig_data_size(first): uncompressed size of data stored in this disk. compr_data_size(second): compressed size of data stored in this disk mem_used_total(third): the amount of memory allocated for this disk Also remove useless zram cleanup call in zram_fill_fs and so we don't need to cleanup zram twice if fails. Signed-off-by: Yang Xu Signed-off-by: Shuah Khan --- tools/testing/selftests/zram/zram01.sh | 34 ++++++++------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh index 114863d9fb87..e9e9eb777e2c 100755 --- a/tools/testing/selftests/zram/zram01.sh +++ b/tools/testing/selftests/zram/zram01.sh @@ -33,8 +33,6 @@ zram_algs="lzo" zram_fill_fs() { - local mem_free0=$(free -m | awk 'NR==2 {print $4}') - for i in $(seq 0 $(($dev_num - 1))); do echo "fill zram$i..." local b=0 @@ -45,29 +43,17 @@ zram_fill_fs() b=$(($b + 1)) done echo "zram$i can be filled with '$b' KB" + + local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"` + local v=$((100 * 1024 * $b / $mem_used_total)) + if [ "$v" -lt 100 ]; then + echo "FAIL compression ratio: 0.$v:1" + ERR_CODE=-1 + return + fi + + echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" done - - local mem_free1=$(free -m | awk 'NR==2 {print $4}') - local used_mem=$(($mem_free0 - $mem_free1)) - - local total_size=0 - for sm in $zram_sizes; do - local s=$(echo $sm | sed 's/M//') - total_size=$(($total_size + $s)) - done - - echo "zram used ${used_mem}M, zram disk sizes ${total_size}M" - - local v=$((100 * $total_size / $used_mem)) - - if [ "$v" -lt 100 ]; then - echo "FAIL compression ratio: 0.$v:1" - ERR_CODE=-1 - zram_cleanup - return - fi - - echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" } check_prereqs From 01dabed20573804750af5c7bf8d1598a6bf7bf6e Mon Sep 17 00:00:00 2001 From: Yang Xu Date: Thu, 27 Jan 2022 17:11:37 +0800 Subject: [PATCH 237/367] selftests/zram: Adapt the situation that /dev/zram0 is being used If zram-generator package is installed and works, then we can not remove zram module because zram swap is being used. This case needs a clean zram environment, change this test by using hot_add/hot_remove interface. So even zram device is being used, we still can add zram device and remove them in cleanup. The two interface was introduced since kernel commit 6566d1a32bf7("zram: add dynamic device add/remove functionality") in v4.2-rc1. If kernel supports these two interface, we use hot_add/hot_remove to slove this problem, if not, just check whether zram is being used or built in, then skip it on old kernel. Signed-off-by: Yang Xu Signed-off-by: Shuah Khan --- tools/testing/selftests/zram/zram.sh | 15 +-- tools/testing/selftests/zram/zram01.sh | 3 +- tools/testing/selftests/zram/zram02.sh | 1 - tools/testing/selftests/zram/zram_lib.sh | 112 +++++++++++++---------- 4 files changed, 67 insertions(+), 64 deletions(-) diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh index 232e958ec454..b0b91d9b0dc2 100755 --- a/tools/testing/selftests/zram/zram.sh +++ b/tools/testing/selftests/zram/zram.sh @@ -2,9 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 TCID="zram.sh" -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - . ./zram_lib.sh run_zram () { @@ -18,14 +15,4 @@ echo "" check_prereqs -# check zram module exists -MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko -if [ -f $MODULE_PATH ]; then - run_zram -elif [ -b /dev/zram0 ]; then - run_zram -else - echo "$TCID : No zram.ko module or /dev/zram0 device file not found" - echo "$TCID : CONFIG_ZRAM is not set" - exit $ksft_skip -fi +run_zram diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh index e9e9eb777e2c..8f4affe34f3e 100755 --- a/tools/testing/selftests/zram/zram01.sh +++ b/tools/testing/selftests/zram/zram01.sh @@ -33,7 +33,7 @@ zram_algs="lzo" zram_fill_fs() { - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "fill zram$i..." local b=0 while [ true ]; do @@ -67,7 +67,6 @@ zram_mount zram_fill_fs zram_cleanup -zram_unload if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh index e83b404807c0..2418b0c4ed13 100755 --- a/tools/testing/selftests/zram/zram02.sh +++ b/tools/testing/selftests/zram/zram02.sh @@ -36,7 +36,6 @@ zram_set_memlimit zram_makeswap zram_swapoff zram_cleanup -zram_unload if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index f47fc0f27e99..21ec1966de76 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -5,10 +5,12 @@ # Author: Alexey Kodanev # Modified: Naresh Kamboju -MODULE=0 dev_makeswap=-1 dev_mounted=-1 - +dev_start=0 +dev_end=-1 +module_load=-1 +sys_control=-1 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 kernel_version=`uname -r | cut -d'.' -f1,2` @@ -46,57 +48,72 @@ zram_cleanup() { echo "zram cleanup" local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_makeswap); do swapoff /dev/zram$i done - for i in $(seq 0 $dev_mounted); do + for i in $(seq $dev_start $dev_mounted); do umount /dev/zram$i done - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo 1 > /sys/block/zram${i}/reset rm -rf zram$i done -} + if [ $sys_control -eq 1 ]; then + for i in $(seq $dev_start $dev_end); do + echo $i > /sys/class/zram-control/hot_remove + done + fi -zram_unload() -{ - if [ $MODULE -ne 0 ] ; then - echo "zram rmmod zram" + if [ $module_load -eq 1 ]; then rmmod zram > /dev/null 2>&1 fi } zram_load() { - # check zram module exists - MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko - if [ -f $MODULE_PATH ]; then - MODULE=1 - echo "create '$dev_num' zram device(s)" - modprobe zram num_devices=$dev_num - if [ $? -ne 0 ]; then - echo "failed to insert zram module" - exit 1 - fi + echo "create '$dev_num' zram device(s)" - dev_num_created=$(ls /dev/zram* | wc -w) + # zram module loaded, new kernel + if [ -d "/sys/class/zram-control" ]; then + echo "zram modules already loaded, kernel supports" \ + "zram-control interface" + dev_start=$(ls /dev/zram* | wc -w) + dev_end=$(($dev_start + $dev_num - 1)) + sys_control=1 - if [ "$dev_num_created" -ne "$dev_num" ]; then - echo "unexpected num of devices: $dev_num_created" - ERR_CODE=-1 - else - echo "zram load module successful" - fi - elif [ -b /dev/zram0 ]; then - echo "/dev/zram0 device file found: OK" - else - echo "ERROR: No zram.ko module or no /dev/zram0 device found" - echo "$TCID : CONFIG_ZRAM is not set" - exit 1 + for i in $(seq $dev_start $dev_end); do + cat /sys/class/zram-control/hot_add > /dev/null + done + + echo "all zram devices (/dev/zram$dev_start~$dev_end" \ + "successfully created" + return 0 fi + + # detect old kernel or built-in + modprobe zram num_devices=$dev_num + if [ ! -d "/sys/class/zram-control" ]; then + if grep -q '^zram' /proc/modules; then + rmmod zram > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "zram module is being used on old kernel" \ + "without zram-control interface" + exit $ksft_skip + fi + else + echo "test needs CONFIG_ZRAM=m on old kernel without" \ + "zram-control interface" + exit $ksft_skip + fi + modprobe zram num_devices=$dev_num + fi + + module_load=1 + dev_end=$(($dev_num - 1)) + echo "all zram devices (/dev/zram0~$dev_end) successfully created" } zram_max_streams() @@ -110,7 +127,7 @@ zram_max_streams() return 0 fi - local i=0 + local i=$dev_start for max_s in $zram_max_streams; do local sys_path="/sys/block/zram${i}/max_comp_streams" echo $max_s > $sys_path || \ @@ -122,7 +139,7 @@ zram_max_streams() echo "FAIL can't set max_streams '$max_s', get $max_stream" i=$(($i + 1)) - echo "$sys_path = '$max_streams' ($i/$dev_num)" + echo "$sys_path = '$max_streams'" done echo "zram max streams: OK" @@ -132,15 +149,16 @@ zram_compress_alg() { echo "test that we can set compression algorithm" - local algs=$(cat /sys/block/zram0/comp_algorithm) + local i=$dev_start + local algs=$(cat /sys/block/zram${i}/comp_algorithm) echo "supported algs: $algs" - local i=0 + for alg in $zram_algs; do local sys_path="/sys/block/zram${i}/comp_algorithm" echo "$alg" > $sys_path || \ echo "FAIL can't set '$alg' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$alg' ($i/$dev_num)" + echo "$sys_path = '$alg'" done echo "zram set compression algorithm: OK" @@ -149,14 +167,14 @@ zram_compress_alg() zram_set_disksizes() { echo "set disk size to zram device(s)" - local i=0 + local i=$dev_start for ds in $zram_sizes; do local sys_path="/sys/block/zram${i}/disksize" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done echo "zram set disksizes: OK" @@ -166,14 +184,14 @@ zram_set_memlimit() { echo "set memory limit to zram device(s)" - local i=0 + local i=$dev_start for ds in $zram_mem_limits; do local sys_path="/sys/block/zram${i}/mem_limit" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done echo "zram set memory limit: OK" @@ -182,8 +200,8 @@ zram_set_memlimit() zram_makeswap() { echo "make swap with zram device(s)" - local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + local i=$dev_start + for i in $(seq $dev_start $dev_end); do mkswap /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -206,7 +224,7 @@ zram_makeswap() zram_swapoff() { local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_end); do swapoff /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -220,7 +238,7 @@ zram_swapoff() zram_makefs() { - local i=0 + local i=$dev_start for fs in $zram_filesystems; do # if requested fs not supported default it to ext2 which mkfs.$fs > /dev/null 2>&1 || fs=ext2 @@ -239,7 +257,7 @@ zram_makefs() zram_mount() { local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "mount /dev/zram$i" mkdir zram$i mount /dev/zram$i zram$i > /dev/null || \ From 908a26e139e8cf21093acc56d8e90ddad2ad1eff Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Thu, 27 Jan 2022 21:33:45 +0500 Subject: [PATCH 238/367] selftests/exec: Remove pipe from TEST_GEN_FILES pipe named FIFO special file is being created in execveat.c to perform some tests. Makefile doesn't need to do anything with the pipe. When it isn't found, Makefile generates the following build error: make: *** No rule to make target '../tools/testing/selftests/exec/pipe', needed by 'all'. Stop. pipe is created and removed during test run-time. Amended change log to add pipe remove info: Shuah Khan Fixes: 61016db15b8e ("selftests/exec: Verify execve of non-regular files fail") Signed-off-by: Muhammad Usama Anjum Reviewed-by: Shuah Khan Signed-off-by: Shuah Khan --- tools/testing/selftests/exec/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index dd61118df66e..12c5e27d32c1 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -5,7 +5,7 @@ CFLAGS += -D_GNU_SOURCE TEST_PROGS := binfmt_script non-regular TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 -TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe +TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile From 941518d6538afa5ea0edc26e6c009d0b3163d422 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 20 Jan 2022 17:00:33 -0700 Subject: [PATCH 239/367] docs: Hook the RTLA documents into the kernel docs build The RTLA documents were added to Documentation/ but never hooked into the rest of the docs build, leading to a bunch of warnings like: Documentation/tools/rtla/rtla-osnoise.rst: WARNING: document isn't included in any toctree Add some basic glue to wire these documents into the build so that they are available with the rest of the rendered docs. No attempt has been made to turn the RTLA docs into proper RST files rather than warmed-over man pages; that is an exercise for the future. Fixes: d40d48e1f1f2 ("rtla: Add Documentation") Acked-by: Daniel Bristot de Oliveira Acked-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/877dau555q.fsf@meer.lwn.net Signed-off-by: Jonathan Corbet --- Documentation/index.rst | 1 + Documentation/tools/index.rst | 20 ++++++++++++++++++++ Documentation/tools/rtla/index.rst | 26 ++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 Documentation/tools/index.rst create mode 100644 Documentation/tools/rtla/index.rst diff --git a/Documentation/index.rst b/Documentation/index.rst index 2b4de3926858..b58692d687f6 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -166,6 +166,7 @@ to ReStructured Text format, or are simply too old. .. toctree:: :maxdepth: 2 + tools/index staging/index watch_queue diff --git a/Documentation/tools/index.rst b/Documentation/tools/index.rst new file mode 100644 index 000000000000..0bb1e61bdcc0 --- /dev/null +++ b/Documentation/tools/index.rst @@ -0,0 +1,20 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============ +Kernel tools +============ + +This book covers user-space tools that are shipped with the kernel source; +more additions are needed here: + +.. toctree:: + :maxdepth: 1 + + rtla/index + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/tools/rtla/index.rst b/Documentation/tools/rtla/index.rst new file mode 100644 index 000000000000..840f0bf3e803 --- /dev/null +++ b/Documentation/tools/rtla/index.rst @@ -0,0 +1,26 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================ +The realtime Linux analysis tool +================================ + +RTLA provides a set of tools for the analysis of the kernel's realtime +behavior on specific hardware. + +.. toctree:: + :maxdepth: 1 + + rtla + rtla-osnoise + rtla-osnoise-hist + rtla-osnoise-top + rtla-timerlat + rtla-timerlat-hist + rtla-timerlat-top + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` From 10855b45a428d8888b1a111d7f607c32a6a49a06 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Mon, 24 Jan 2022 17:14:47 +0900 Subject: [PATCH 240/367] docs: fix typo in Documentation/kernel-hacking/locking.rst Change copy_from_user*( to copy_from_user() . Signed-off-by: Takahiro Itazuri Link: https://lore.kernel.org/r/20220124081447.34066-1-itazur@amazon.com Signed-off-by: Jonathan Corbet --- Documentation/kernel-hacking/locking.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst index e6cd40663ea5..4cbd50edf277 100644 --- a/Documentation/kernel-hacking/locking.rst +++ b/Documentation/kernel-hacking/locking.rst @@ -295,7 +295,7 @@ Pete Zaitcev gives the following summary: - If you are in a process context (any syscall) and want to lock other process out, use a mutex. You can take a mutex and sleep - (``copy_from_user*(`` or ``kmalloc(x,GFP_KERNEL)``). + (``copy_from_user()`` or ``kmalloc(x,GFP_KERNEL)``). - Otherwise (== data can be touched in an interrupt), use spin_lock_irqsave() and From 573fe46e398f4b451d075e854d221f6197941540 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 21 Jan 2022 12:58:04 +0100 Subject: [PATCH 241/367] Documentation: arm: marvell: Extend Avanta list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include another two SoCs from Avanta family. Signed-off-by: Pali Rohár Link: https://lore.kernel.org/r/20220121115804.28824-1-pali@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/arm/marvell.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/arm/marvell.rst b/Documentation/arm/marvell.rst index 9485a5a2e2e9..2f41caa0096c 100644 --- a/Documentation/arm/marvell.rst +++ b/Documentation/arm/marvell.rst @@ -266,10 +266,12 @@ Avanta family ------------- Flavors: + - 88F6500 - 88F6510 - 88F6530P - 88F6550 - 88F6560 + - 88F6601 Homepage: https://web.archive.org/web/20181005145041/http://www.marvell.com/broadband/ From 854d0982eef0e424e8108d09d9275aaf445b1597 Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Mon, 17 Jan 2022 12:13:37 +0100 Subject: [PATCH 242/367] docs/vm: Fix typo in *harden* Fixes: df4e817b7108 ("mm: page table check") Signed-off-by: Paul Menzel Link: https://lore.kernel.org/r/20220117111338.115455-1-pmenzel@molgen.mpg.de Signed-off-by: Jonathan Corbet --- Documentation/vm/page_table_check.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/vm/page_table_check.rst b/Documentation/vm/page_table_check.rst index 81f521ff7ea7..1a09472f10a3 100644 --- a/Documentation/vm/page_table_check.rst +++ b/Documentation/vm/page_table_check.rst @@ -9,7 +9,7 @@ Page Table Check Introduction ============ -Page table check allows to hardern the kernel by ensuring that some types of +Page table check allows to harden the kernel by ensuring that some types of the memory corruptions are prevented. Page table check performs extra verifications at the time when new pages become From 53960faf2b731dd2f9ed6e1334634b8ba6286850 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 25 Jan 2022 19:50:31 +0530 Subject: [PATCH 243/367] arm64: Add Cortex-A510 CPU part definition Add the CPU Partnumbers for the new Arm designs. Cc: Catalin Marinas Cc: Will Deacon Cc: Suzuki Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Suzuki K Poulose Acked-by: Catalin Marinas Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1643120437-14352-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Mathieu Poirier --- arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 19b8441aa8f2..e8fdc10395b6 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -73,6 +73,7 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 @@ -115,6 +116,7 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) From 607a9afaae09cde21ece458a8f10cb99d3f94f14 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 25 Jan 2022 19:50:32 +0530 Subject: [PATCH 244/367] arm64: errata: Add detection for TRBE ignored system register writes TRBE implementations affected by Arm erratum #2064142 might fail to write into certain system registers after the TRBE has been disabled. Under some conditions after TRBE has been disabled, writes into certain TRBE registers TRBLIMITR_EL1, TRBPTR_EL1, TRBBASER_EL1, TRBSR_EL1 and TRBTRG_EL1 will be ignored and not be effected. This adds a new errata ARM64_ERRATUM_2064142 in arm64 errata framework. Cc: Catalin Marinas Cc: Will Deacon Cc: Mathieu Poirier Cc: Suzuki Poulose Cc: coresight@lists.linaro.org Cc: linux-doc@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Suzuki K Poulose Acked-by: Catalin Marinas Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1643120437-14352-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Mathieu Poirier --- Documentation/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 18 ++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 9 +++++++++ arch/arm64/tools/cpucaps | 1 + 4 files changed, 30 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 5342e895fb60..c9b30e6c2b6c 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -52,6 +52,8 @@ stable kernels. | Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6978140edfa4..0033436e3416 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -778,6 +778,24 @@ config ARM64_ERRATUM_2224489 If unsure, say Y. +config ARM64_ERRATUM_2064142 + bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 2064142. + + Affected Cortex-A510 core might fail to write into system registers after the + TRBE has been disabled. Under some conditions after the TRBE has been disabled + writes into TRBE registers TRBLIMITR_EL1, TRBPTR_EL1, TRBBASER_EL1, TRBSR_EL1, + and TRBTRG_EL1 will be ignored and will not be effected. + + Work around this in the driver by executing TSB CSYNC and DSB after collection + is stopped and before performing a system register write to one of the affected + registers. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9e1c1aef9ebd..cbb7d5a9aee7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -597,6 +597,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus), }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_2064142 + { + .desc = "ARM erratum 2064142", + .capability = ARM64_WORKAROUND_2064142, + + /* Cortex-A510 r0p0 - r0p2 */ + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2) + }, #endif { } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 870c39537dd0..fca3cb329e1d 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -55,6 +55,7 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_2064142 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_TRBE_WRITE_OUT_OF_RANGE From 3bd94a8759de9b724b83a80942b0354acd7701eb Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 25 Jan 2022 19:50:33 +0530 Subject: [PATCH 245/367] arm64: errata: Add detection for TRBE invalid prohibited states TRBE implementations affected by Arm erratum #2038923 might get TRBE into an inconsistent view on whether trace is prohibited within the CPU. As a result, the trace buffer or trace buffer state might be corrupted. This happens after TRBE buffer has been enabled by setting TRBLIMITR_EL1.E, followed by just a single context synchronization event before execution changes from a context, in which trace is prohibited to one where it isn't, or vice versa. In these mentioned conditions, the view of whether trace is prohibited is inconsistent between parts of the CPU, and the trace buffer or the trace buffer state might be corrupted. This adds a new errata ARM64_ERRATUM_2038923 in arm64 errata framework. Cc: Catalin Marinas Cc: Will Deacon Cc: Mathieu Poirier Cc: Suzuki Poulose Cc: coresight@lists.linaro.org Cc: linux-doc@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Suzuki K Poulose Acked-by: Catalin Marinas Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1643120437-14352-4-git-send-email-anshuman.khandual@arm.com Signed-off-by: Mathieu Poirier --- Documentation/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 23 +++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 9 +++++++++ arch/arm64/tools/cpucaps | 1 + 4 files changed, 35 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index c9b30e6c2b6c..e0ef3e9a4b8b 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -54,6 +54,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2038923 | ARM64_ERRATUM_2038923 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0033436e3416..fecf2b09e870 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -796,6 +796,29 @@ config ARM64_ERRATUM_2064142 If unsure, say Y. +config ARM64_ERRATUM_2038923 + bool "Cortex-A510: 2038923: workaround TRBE corruption with enable" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 2038923. + + Affected Cortex-A510 core might cause an inconsistent view on whether trace is + prohibited within the CPU. As a result, the trace buffer or trace buffer state + might be corrupted. This happens after TRBE buffer has been enabled by setting + TRBLIMITR_EL1.E, followed by just a single context synchronization event before + execution changes from a context, in which trace is prohibited to one where it + isn't, or vice versa. In these mentioned conditions, the view of whether trace + is prohibited is inconsistent between parts of the CPU, and the trace buffer or + the trace buffer state might be corrupted. + + Work around this in the driver by preventing an inconsistent view of whether the + trace is prohibited or not based on TRBLIMITR_EL1.E by immediately following a + change to TRBLIMITR_EL1.E with at least one ISB instruction before an ERET, or + two ISB instructions if no ERET is to take place. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index cbb7d5a9aee7..60b0c1f1d912 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -607,6 +607,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2) }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2038923 + { + .desc = "ARM erratum 2038923", + .capability = ARM64_WORKAROUND_2038923, + + /* Cortex-A510 r0p0 - r0p2 */ + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2) + }, +#endif { } }; diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index fca3cb329e1d..45a06d36d080 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -56,6 +56,7 @@ WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 WORKAROUND_2064142 +WORKAROUND_2038923 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_TRBE_WRITE_OUT_OF_RANGE From 708e8af4924ec2fdd5b81fe09192c6bac2f86935 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 25 Jan 2022 19:50:34 +0530 Subject: [PATCH 246/367] arm64: errata: Add detection for TRBE trace data corruption TRBE implementations affected by Arm erratum #1902691 might corrupt trace data or deadlock, when it's being written into the memory. So effectively TRBE is broken and hence cannot be used to capture trace data. This adds a new errata ARM64_ERRATUM_1902691 in arm64 errata framework. Cc: Catalin Marinas Cc: Will Deacon Cc: Mathieu Poirier Cc: Suzuki Poulose Cc: coresight@lists.linaro.org Cc: linux-doc@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Suzuki K Poulose Acked-by: Catalin Marinas Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1643120437-14352-5-git-send-email-anshuman.khandual@arm.com Signed-off-by: Mathieu Poirier --- Documentation/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 18 ++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 9 +++++++++ arch/arm64/tools/cpucaps | 1 + 4 files changed, 30 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index e0ef3e9a4b8b..50018f60c4d4 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -56,6 +56,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2038923 | ARM64_ERRATUM_2038923 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #1902691 | ARM64_ERRATUM_1902691 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fecf2b09e870..9c3cf3875785 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -819,6 +819,24 @@ config ARM64_ERRATUM_2038923 If unsure, say Y. +config ARM64_ERRATUM_1902691 + bool "Cortex-A510: 1902691: workaround TRBE trace corruption" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 1902691. + + Affected Cortex-A510 core might cause trace data corruption, when being written + into the memory. Effectively TRBE is broken and hence cannot be used to capture + trace data. + + Work around this problem in the driver by just preventing TRBE initialization on + affected cpus. The firmware must have disabled the access to TRBE for the kernel + on such implementations. This will cover the kernel for any firmware that doesn't + do this already. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 60b0c1f1d912..a3336dfb5a8a 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -615,6 +615,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A510 r0p0 - r0p2 */ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2) }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1902691 + { + .desc = "ARM erratum 1902691", + .capability = ARM64_WORKAROUND_1902691, + + /* Cortex-A510 r0p0 - r0p1 */ + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1) + }, #endif { } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 45a06d36d080..e7719e8f18de 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -57,6 +57,7 @@ WORKAROUND_1508412 WORKAROUND_1542419 WORKAROUND_2064142 WORKAROUND_2038923 +WORKAROUND_1902691 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_TRBE_WRITE_OUT_OF_RANGE From b9199181a9ef8252e47e207be8c23e1f50662620 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Thu, 27 Jan 2022 22:44:46 +0500 Subject: [PATCH 247/367] selftests: futex: Use variable MAKE instead of make MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recursive make commands should always use the variable MAKE, not the explicit command name ‘make’. This has benefits and removes the following warning when multiple jobs are used for the build: make[2]: warning: jobserver unavailable: using -j1. Add '+' to parent make rule. Fixes: a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT") Signed-off-by: Muhammad Usama Anjum Reviewed-by: André Almeida Signed-off-by: Shuah Khan --- tools/testing/selftests/futex/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index 12631f0076a1..11e157d7533b 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -11,7 +11,7 @@ all: @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ if [ -e $$DIR/$(TEST_PROGS) ]; then \ rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \ fi \ @@ -32,6 +32,6 @@ override define CLEAN @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ done endef From e051cdf655fa016692008a446a060eff06222bb5 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 26 Jan 2022 10:27:21 +0000 Subject: [PATCH 248/367] selftests: openat2: Print also errno in failure messages In E_func() macro, on error, print also errno in order to aid debugging. Cc: Aleksa Sarai Signed-off-by: Cristian Marussi Signed-off-by: Shuah Khan --- tools/testing/selftests/openat2/helpers.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h index a6ea27344db2..ad5d0ba5b6ce 100644 --- a/tools/testing/selftests/openat2/helpers.h +++ b/tools/testing/selftests/openat2/helpers.h @@ -62,11 +62,12 @@ bool needs_openat2(const struct open_how *how); (similar to chroot(2)). */ #endif /* RESOLVE_IN_ROOT */ -#define E_func(func, ...) \ - do { \ - if (func(__VA_ARGS__) < 0) \ - ksft_exit_fail_msg("%s:%d %s failed\n", \ - __FILE__, __LINE__, #func);\ +#define E_func(func, ...) \ + do { \ + errno = 0; \ + if (func(__VA_ARGS__) < 0) \ + ksft_exit_fail_msg("%s:%d %s failed - errno:%d\n", \ + __FILE__, __LINE__, #func, errno); \ } while (0) #define E_asprintf(...) E_func(asprintf, __VA_ARGS__) From ea3396725aa143dd42fe388cb67e44c90d2fb719 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 26 Jan 2022 10:27:22 +0000 Subject: [PATCH 249/367] selftests: openat2: Add missing dependency in Makefile Add a dependency on header helpers.h to the main target; while at that add to helpers.h also a missing include for bool types. Cc: Aleksa Sarai Signed-off-by: Cristian Marussi Signed-off-by: Shuah Khan --- tools/testing/selftests/openat2/Makefile | 2 +- tools/testing/selftests/openat2/helpers.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile index 4b93b1417b86..843ba56d8e49 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/openat2/Makefile @@ -5,4 +5,4 @@ TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test include ../lib.mk -$(TEST_GEN_PROGS): helpers.c +$(TEST_GEN_PROGS): helpers.c helpers.h diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h index ad5d0ba5b6ce..7056340b9339 100644 --- a/tools/testing/selftests/openat2/helpers.h +++ b/tools/testing/selftests/openat2/helpers.h @@ -9,6 +9,7 @@ #define _GNU_SOURCE #include +#include #include #include #include "../kselftest.h" From ac9e0a250bb155078601a5b999aab05f2a04d1ab Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 26 Jan 2022 10:27:23 +0000 Subject: [PATCH 250/367] selftests: openat2: Skip testcases that fail with EOPNOTSUPP Skip testcases that fail since the requested valid flags combination is not supported by the underlying filesystem. Cc: Aleksa Sarai Signed-off-by: Cristian Marussi Signed-off-by: Shuah Khan --- tools/testing/selftests/openat2/openat2_test.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c index 1bddbe934204..7fb902099de4 100644 --- a/tools/testing/selftests/openat2/openat2_test.c +++ b/tools/testing/selftests/openat2/openat2_test.c @@ -259,6 +259,16 @@ void test_openat2_flags(void) unlink(path); fd = sys_openat2(AT_FDCWD, path, &test->how); + if (fd < 0 && fd == -EOPNOTSUPP) { + /* + * Skip the testcase if it failed because not supported + * by FS. (e.g. a valid O_TMPFILE combination on NFS) + */ + ksft_test_result_skip("openat2 with %s fails with %d (%s)\n", + test->name, fd, strerror(-fd)); + goto next; + } + if (test->err >= 0) failed = (fd < 0); else @@ -303,7 +313,7 @@ skip: else resultfn("openat2 with %s fails with %d (%s)\n", test->name, test->err, strerror(-test->err)); - +next: free(fdpath); fflush(stdout); } From dae1d8ac31896988e7313384c0370176a75e9b45 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 26 Jan 2022 10:27:19 +0000 Subject: [PATCH 251/367] selftests: skip mincore.check_file_mmap when fs lacks needed support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Report mincore.check_file_mmap as SKIP instead of FAIL if the underlying filesystem lacks support of O_TMPFILE or fallocate since such failures are not really related to mincore functionality. Cc: Ricardo Cañuelo Signed-off-by: Cristian Marussi Signed-off-by: Shuah Khan --- .../selftests/mincore/mincore_selftest.c | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index e54106643337..4c88238fc8f0 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -207,15 +207,21 @@ TEST(check_file_mmap) errno = 0; fd = open(".", O_TMPFILE | O_RDWR, 0600); - ASSERT_NE(-1, fd) { - TH_LOG("Can't create temporary file: %s", - strerror(errno)); + if (fd < 0) { + ASSERT_EQ(errno, EOPNOTSUPP) { + TH_LOG("Can't create temporary file: %s", + strerror(errno)); + } + SKIP(goto out_free, "O_TMPFILE not supported by filesystem."); } errno = 0; retval = fallocate(fd, 0, 0, FILE_SIZE); - ASSERT_EQ(0, retval) { - TH_LOG("Error allocating space for the temporary file: %s", - strerror(errno)); + if (retval) { + ASSERT_EQ(errno, EOPNOTSUPP) { + TH_LOG("Error allocating space for the temporary file: %s", + strerror(errno)); + } + SKIP(goto out_close, "fallocate not supported by filesystem."); } /* @@ -271,7 +277,9 @@ TEST(check_file_mmap) } munmap(addr, FILE_SIZE); +out_close: close(fd); +out_free: free(vec); } From 4ed308c445a1e3abac8f6c17928c1cb533867e38 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 25 Jan 2022 09:19:10 -0500 Subject: [PATCH 252/367] ftrace: Have architectures opt-in for mcount build time sorting First S390 complained that the sorting of the mcount sections at build time caused the kernel to crash on their architecture. Now PowerPC is complaining about it too. And also ARM64 appears to be having issues. It may be necessary to also update the relocation table for the values in the mcount table. Not only do we have to sort the table, but also update the relocations that may be applied to the items in the table. If the system is not relocatable, then it is fine to sort, but if it is, some architectures may have issues (although x86 does not as it shifts all addresses the same). Add a HAVE_BUILDTIME_MCOUNT_SORT that an architecture can set to say it is safe to do the sorting at build time. Also update the config to compile in build time sorting in the sorttable code in scripts/ to depend on CONFIG_BUILDTIME_MCOUNT_SORT. Link: https://lore.kernel.org/all/944D10DA-8200-4BA9-8D0A-3BED9AA99F82@linux.ibm.com/ Link: https://lkml.kernel.org/r/20220127153821.3bc1ac6e@gandalf.local.home Cc: Ingo Molnar Cc: Andrew Morton Cc: Russell King Cc: Yinan Liu Cc: Ard Biesheuvel Cc: Kees Cook Reported-by: Sachin Sant Reviewed-by: Mark Rutland Tested-by: Mark Rutland [arm64] Tested-by: Sachin Sant Fixes: 72b3942a173c ("scripts: ftrace - move the sort-processing in ftrace_init") Signed-off-by: Steven Rostedt (Google) --- arch/arm/Kconfig | 1 + arch/x86/Kconfig | 1 + kernel/trace/Kconfig | 8 +++++++- scripts/Makefile | 2 +- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fabe39169b12..4c97cb40eebb 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -83,6 +83,7 @@ config ARM select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 select HAVE_CONTEXT_TRACKING select HAVE_C_RECORDMCOUNT + select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_DEBUG_KMEMLEAK if !XIP_KERNEL select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ebe8fc76949a..9f5bd41bf660 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -186,6 +186,7 @@ config X86 select HAVE_CONTEXT_TRACKING_OFFSTACK if HAVE_CONTEXT_TRACKING select HAVE_C_RECORDMCOUNT select HAVE_OBJTOOL_MCOUNT if STACK_VALIDATION + select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 752ed89a293b..a5eb5e7fd624 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -70,10 +70,16 @@ config HAVE_C_RECORDMCOUNT help C version of recordmcount available? +config HAVE_BUILDTIME_MCOUNT_SORT + bool + help + An architecture selects this if it sorts the mcount_loc section + at build time. + config BUILDTIME_MCOUNT_SORT bool default y - depends on BUILDTIME_TABLE_SORT && !S390 + depends on HAVE_BUILDTIME_MCOUNT_SORT && DYNAMIC_FTRACE help Sort the mcount_loc section at build time. diff --git a/scripts/Makefile b/scripts/Makefile index ecd3acacd0ec..ce5aa9030b74 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -25,7 +25,7 @@ HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED endif -ifdef CONFIG_DYNAMIC_FTRACE +ifdef CONFIG_BUILDTIME_MCOUNT_SORT HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED endif From e629e7b525a179e29d53463d992bdee759c950fb Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Tue, 25 Jan 2022 12:07:15 +0800 Subject: [PATCH 253/367] tracing/histogram: Fix a potential memory leak for kstrdup() kfree() is missing on an error path to free the memory allocated by kstrdup(): p = param = kstrdup(data->params[i], GFP_KERNEL); So it is better to free it via kfree(p). Link: https://lkml.kernel.org/r/tencent_C52895FD37802832A3E5B272D05008866F0A@qq.com Cc: stable@vger.kernel.org Fixes: d380dcde9a07c ("tracing: Fix now invalid var_ref_vals assumption in trace action") Signed-off-by: Xiaoke Wang Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 5e6a988a8a51..cd9610688ddc 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3935,6 +3935,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data, var_ref_idx = find_var_ref_idx(hist_data, var_ref); if (WARN_ON(var_ref_idx < 0)) { + kfree(p); ret = var_ref_idx; goto err; } From 58c5724ec2cdd72b22107ec5de00d90cc4797796 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 25 Jan 2022 23:19:30 +0900 Subject: [PATCH 254/367] tracing: Avoid -Warray-bounds warning for __rel_loc macro Since -Warray-bounds checks the destination size from the type of given pointer, __assign_rel_str() macro gets warned because it passes the pointer to the 'u32' field instead of 'trace_event_raw_*' data structure. Pass the data address calculated from the 'trace_event_raw_*' instead of 'u32' __rel_loc field. Link: https://lkml.kernel.org/r/20220125233154.dac280ed36944c0c2fe6f3ac@kernel.org Cc: Stephen Rothwell Cc: Kees Cook Signed-off-by: Masami Hiramatsu [ This did not fix the warning, but is still a nice clean up ] Signed-off-by: Steven Rostedt (Google) --- include/trace/trace_events.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 8c6f7c433518..65d927e059d3 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -318,9 +318,10 @@ TRACE_MAKE_SYSTEM_STR(); #define __get_str(field) ((char *)__get_dynamic_array(field)) #undef __get_rel_dynamic_array -#define __get_rel_dynamic_array(field) \ - ((void *)(&__entry->__rel_loc_##field) + \ - sizeof(__entry->__rel_loc_##field) + \ +#define __get_rel_dynamic_array(field) \ + ((void *)__entry + \ + offsetof(typeof(*__entry), __rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ (__entry->__rel_loc_##field & 0xffff)) #undef __get_rel_dynamic_array_len From c6d777acdf8f62d4ebaef0e5c6cd8fedbd6e8546 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 25 Jan 2022 14:00:37 -0800 Subject: [PATCH 255/367] tracing/perf: Avoid -Warray-bounds warning for __rel_loc macro As done for trace_events.h, also fix the __rel_loc macro in perf.h, which silences the -Warray-bounds warning: In file included from ./include/linux/string.h:253, from ./include/linux/bitmap.h:11, from ./include/linux/cpumask.h:12, from ./include/linux/mm_types_task.h:14, from ./include/linux/mm_types.h:5, from ./include/linux/buildid.h:5, from ./include/linux/module.h:14, from samples/trace_events/trace-events-sample.c:2: In function '__fortify_strcpy', inlined from 'perf_trace_foo_rel_loc' at samples/trace_events/./trace-events-sample.h:519:1: ./include/linux/fortify-string.h:47:33: warning: '__builtin_strcpy' offset 12 is out of the bounds [ 0, 4] [-Warray-bounds] 47 | #define __underlying_strcpy __builtin_strcpy | ^ ./include/linux/fortify-string.h:445:24: note: in expansion of macro '__underlying_strcpy' 445 | return __underlying_strcpy(p, q); | ^~~~~~~~~~~~~~~~~~~ Also make __data struct member a proper flexible array to avoid future problems. Link: https://lkml.kernel.org/r/20220125220037.2738923-1-keescook@chromium.org Cc: Steven Rostedt Cc: Masami Hiramatsu Fixes: 55de2c0b5610c ("tracing: Add '__rel_loc' using trace event macros") Reported-by: Stephen Rothwell Signed-off-by: Kees Cook Signed-off-by: Steven Rostedt (Google) --- include/trace/perf.h | 5 +++-- include/trace/trace_events.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/trace/perf.h b/include/trace/perf.h index ea4405de175a..5d48c46a3008 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -23,8 +23,9 @@ #undef __get_rel_dynamic_array #define __get_rel_dynamic_array(field) \ - ((void *)(&__entry->__rel_loc_##field) + \ - sizeof(__entry->__rel_loc_##field) + \ + ((void *)__entry + \ + offsetof(typeof(*__entry), __rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ (__entry->__rel_loc_##field & 0xffff)) #undef __get_rel_dynamic_array_len diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 65d927e059d3..3d29919045af 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -128,7 +128,7 @@ TRACE_MAKE_SYSTEM_STR(); struct trace_event_raw_##name { \ struct trace_entry ent; \ tstruct \ - char __data[0]; \ + char __data[]; \ }; \ \ static struct trace_event_class event_class_##name; From 2201aea114d4c7eedd05865df545dbb987cee5c1 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 25 Jan 2022 17:13:01 -0700 Subject: [PATCH 256/367] rtla: Make doc build optional rtla build fails due to doc build dependency on rst2man. Make doc build optional so rtla could be built without docs. Leave the install dependency on doc_install alone. Link: https://lkml.kernel.org/r/20220126001301.79096-1-skhan@linuxfoundation.org Acked-by: Daniel Bristot de Oliveira Signed-off-by: Shuah Khan Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index 2d52ff0bff7d..7c39728d08de 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -59,7 +59,7 @@ endif .PHONY: all all: rtla -rtla: $(OBJ) doc +rtla: $(OBJ) $(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS) static: $(OBJ) From aa814c51ab7cb3ddeeeeedaa37d599aee7ba6649 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 25 Jan 2022 17:22:34 -0700 Subject: [PATCH 257/367] tools/tracing: Update Makefile to build rtla Update tracing Makefile to build/install/clean rtla tragets. Link: https://lkml.kernel.org/r/20220126002234.79337-1-skhan@linuxfoundation.org Reviewed-by: Daniel Bristot de Oliveira Signed-off-by: Shuah Khan Signed-off-by: Steven Rostedt (Google) --- tools/tracing/Makefile | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tools/tracing/Makefile b/tools/tracing/Makefile index 87e0ec48e2e7..95e485f12d97 100644 --- a/tools/tracing/Makefile +++ b/tools/tracing/Makefile @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 include ../scripts/Makefile.include -all: latency +all: latency rtla -clean: latency_clean +clean: latency_clean rtla_clean -install: latency_install +install: latency_install rtla_install latency: $(call descend,latency) @@ -16,4 +16,14 @@ latency_install: latency_clean: $(call descend,latency,clean) -.PHONY: all install clean latency latency_install latency_clean +rtla: + $(call descend,rtla) + +rtla_install: + $(call descend,rtla,install) + +rtla_clean: + $(call descend,rtla,clean) + +.PHONY: all install clean latency latency_install latency_clean \ + rtla rtla_install rtla_clean From 798a5b6c195d1c64fd5e9dd252381feb17e5ff22 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 27 Jan 2022 15:44:15 -0600 Subject: [PATCH 258/367] tracing: Fix smatch warning for null glob in event_hist_trigger_parse() The recent rename of event_hist_trigger_parse() caused smatch re-evaluation of trace_events_hist.c and as a result an old warning was found: kernel/trace/trace_events_hist.c:6174 event_hist_trigger_parse() error: we previously assumed 'glob' could be null (see line 6166) glob should never be null (and apparently smatch can also figure that out and skip the warning when using the cross-function DB (but which can't be used with a 0day build as it takes too much time to generate)). Nonetheless for clarity, remove the test but add a WARN_ON() in case the code ever changes. Link: https://lkml.kernel.org/r/96925e5c1f116654ada7ea0613d930b1266b5e1c.1643319703.git.zanussi@kernel.org Fixes: f404da6e1d46c ("tracing: Add 'last error' error facility for hist triggers") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index cd9610688ddc..e0860146dd39 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -6164,7 +6164,9 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, lockdep_assert_held(&event_mutex); - if (glob && strlen(glob)) { + WARN_ON(!glob); + + if (strlen(glob)) { hist_err_clear(); last_cmd_set(file, param); } From b59f2f2b865cedd6d1641394b9cd84399bd738ff Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 27 Jan 2022 15:44:16 -0600 Subject: [PATCH 259/367] tracing: Fix smatch warning for do while check in event_hist_trigger_parse() The patch ec5ce0987541: "tracing: Allow whitespace to surround hist trigger filter" from Jan 15, 2018, leads to the following Smatch static checker warning: kernel/trace/trace_events_hist.c:6199 event_hist_trigger_parse() warn: 'p' can't be NULL. Since p is always checked for a NULL value at the top of loop and nothing in the rest of the loop will set it to NULL, the warning is correct and might as well be 1 to silence the warning. Link: https://lkml.kernel.org/r/a1d4c79766c0cf61e20438dc35244d216633fef6.1643319703.git.zanussi@kernel.org Fixes: ec5ce09875410 ("tracing: Allow whitespace to surround hist trigger filter") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index e0860146dd39..b894d68082ea 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -6199,7 +6199,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, continue; } break; - } while (p); + } while (1); if (!p) param = NULL; From 097f1eefedeab528cecbd35586dfe293853ffb17 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 27 Jan 2022 15:44:17 -0600 Subject: [PATCH 260/367] tracing: Propagate is_signed to expression During expression parsing, a new expression field is created which should inherit the properties of the operands, such as size and is_signed. is_signed propagation was missing, causing spurious errors with signed operands. Add it in parse_expr() and parse_unary() to fix the problem. Link: https://lkml.kernel.org/r/f4dac08742fd7a0920bf80a73c6c44042f5eaa40.1643319703.git.zanussi@kernel.org Cc: stable@vger.kernel.org Fixes: 100719dcef447 ("tracing: Add simple expression support to hist triggers") Reported-by: Yordan Karadzhov BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215513 Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index b894d68082ea..ada87bfb5bb8 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -2503,6 +2503,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); expr->fn = hist_field_unary_minus; expr->operands[0] = operand1; + expr->size = operand1->size; + expr->is_signed = operand1->is_signed; expr->operator = FIELD_OP_UNARY_MINUS; expr->name = expr_str(expr, 0); expr->type = kstrdup_const(operand1->type, GFP_KERNEL); @@ -2719,6 +2721,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, /* The operand sizes should be the same, so just pick one */ expr->size = operand1->size; + expr->is_signed = operand1->is_signed; expr->operator = field_op; expr->type = kstrdup_const(operand1->type, GFP_KERNEL); From 67ab5eb71b37b55f7c5522d080a1b42823351776 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 27 Jan 2022 15:44:18 -0600 Subject: [PATCH 261/367] tracing: Don't inc err_log entry count if entry allocation fails tr->n_err_log_entries should only be increased if entry allocation succeeds. Doing it when it fails won't cause any problems other than wasting an entry, but should be fixed anyway. Link: https://lkml.kernel.org/r/cad1ab28f75968db0f466925e7cba5970cec6c29.1643319703.git.zanussi@kernel.org Cc: stable@vger.kernel.org Fixes: 2f754e771b1a6 ("tracing: Don't inc err_log entry count if entry allocation fails") Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a569a0cb81ee..c860f582b078 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7740,7 +7740,8 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) err = kzalloc(sizeof(*err), GFP_KERNEL); if (!err) err = ERR_PTR(-ENOMEM); - tr->n_err_log_entries++; + else + tr->n_err_log_entries++; return err; } From 5aac9108a180fc06e28d4e7fb00247ce603b72ee Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Thu, 27 Jan 2022 14:50:03 +0530 Subject: [PATCH 262/367] net: amd-xgbe: Fix skb data length underflow There will be BUG_ON() triggered in include/linux/skbuff.h leading to intermittent kernel panic, when the skb length underflow is detected. Fix this by dropping the packet if such length underflows are seen because of inconsistencies in the hardware descriptors. Fixes: 622c36f143fc ("amd-xgbe: Fix jumbo MTU processing on newer hardware") Suggested-by: Tom Lendacky Signed-off-by: Shyam Sundar S K Acked-by: Tom Lendacky Link: https://lore.kernel.org/r/20220127092003.2812745-1-Shyam-sundar.S-k@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 492ac383f16d..ec3b287e3a71 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2550,6 +2550,14 @@ read_again: buf2_len = xgbe_rx_buf2_len(rdata, packet, len); len += buf2_len; + if (buf2_len > rdata->rx.buf.dma_len) { + /* Hardware inconsistency within the descriptors + * that has resulted in a length underflow. + */ + error = 1; + goto skip_data; + } + if (!skb) { skb = xgbe_create_skb(pdata, napi, rdata, buf1_len); @@ -2579,8 +2587,10 @@ skip_data: if (!last || context_next) goto read_again; - if (!skb) + if (!skb || error) { + dev_kfree_skb(skb); goto next_packet; + } /* Be sure we don't exceed the configured MTU */ max_len = netdev->mtu + ETH_HLEN; From 7674b7b559b683478c3832527c59bceb169e701d Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 27 Jan 2022 11:32:22 +0530 Subject: [PATCH 263/367] net: amd-xgbe: ensure to reset the tx_timer_active flag Ensure to reset the tx_timer_active flag in xgbe_stop(), otherwise a port restart may result in tx timeout due to uncleared flag. Fixes: c635eaacbf77 ("amd-xgbe: Remove Tx coalescing") Co-developed-by: Sudheesh Mavila Signed-off-by: Sudheesh Mavila Signed-off-by: Raju Rangoju Acked-by: Tom Lendacky Link: https://lore.kernel.org/r/20220127060222.453371-1-Raju.Rangoju@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index ec3b287e3a71..a3593290886f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -721,7 +721,9 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata) if (!channel->tx_ring) break; + /* Deactivate the Tx timer */ del_timer_sync(&channel->tx_timer); + channel->tx_timer_active = 0; } } From 50806fd91428a28d5daa649310dd0ca05b39c118 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 24 Jan 2022 17:55:26 +0000 Subject: [PATCH 264/367] kselftest/arm64: Skip VL_INHERIT tests for unsupported vector types Currently we unconditionally test the ability to set the vector length inheritance flag via ptrace meaning that we generate false failures on systems that don't support SVE when we attempt to set the vector length there. Check the hwcap and mark the tests as skipped when it's not present. Fixes: 0ba1ce1e8605 ("selftests: arm64: Add coverage of ptrace flags for SVE VL inheritance") Signed-off-by: Mark Brown Reviewed-by: Shuah Khan Link: https://lore.kernel.org/r/20220124175527.3260234-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index af798b9d232c..0cf78360c5bc 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -557,7 +557,14 @@ static int do_parent(pid_t child) } /* prctl() flags */ - ptrace_set_get_inherit(child, &vec_types[i]); + if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { + ptrace_set_get_inherit(child, &vec_types[i]); + } else { + ksft_test_result_skip("%s SVE_PT_VL_INHERIT set\n", + vec_types[i].name); + ksft_test_result_skip("%s SVE_PT_VL_INHERIT cleared\n", + vec_types[i].name); + } /* Step through every possible VQ */ for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { From 9ae279ecabe3e5bb85567e6c7371f4d35cfa00d6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 24 Jan 2022 17:55:27 +0000 Subject: [PATCH 265/367] kselftest/arm64: Correct logging of FPSIMD register read via ptrace There's a cut'n'paste error in the logging for our test for reading register state back via ptrace, correctly say that we did a read instead of a write. Signed-off-by: Mark Brown Reviewed-by: Shuah Khan Link: https://lore.kernel.org/r/20220124175527.3260234-3-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 0cf78360c5bc..a3c1e67441f9 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -261,7 +261,7 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) } ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, - "Set FPSIMD registers via %s\n", type->name); + "Got FPSIMD registers via %s\n", type->name); if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) goto out; From 56f289a8d23addfa4408a08f07f42fcfe2a7bd69 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Jan 2022 07:31:53 -0800 Subject: [PATCH 266/367] KVM: x86: Add a helper to retrieve userspace address from kvm_device_attr Add a helper to handle converting the u64 userspace address embedded in struct kvm_device_attr into a userspace pointer, it's all too easy to forget the intermediate "unsigned long" cast as well as the truncation check. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5481d2259f02..76a5dcfcabdb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4332,7 +4332,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; } return r; +} +static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr) +{ + void __user *uaddr = (void __user*)(unsigned long)attr->addr; + + if ((u64)(unsigned long)uaddr != attr->addr) + return ERR_PTR(-EFAULT); + return uaddr; } long kvm_arch_dev_ioctl(struct file *filp, @@ -5025,11 +5033,11 @@ static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu, static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr; + u64 __user *uaddr = kvm_get_attr_addr(attr); int r; - if ((u64)(unsigned long)uaddr != attr->addr) - return -EFAULT; + if (IS_ERR(uaddr)) + return PTR_ERR(uaddr); switch (attr->attr) { case KVM_VCPU_TSC_OFFSET: @@ -5048,12 +5056,12 @@ static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu, static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr; + u64 __user *uaddr = kvm_get_attr_addr(attr); struct kvm *kvm = vcpu->kvm; int r; - if ((u64)(unsigned long)uaddr != attr->addr) - return -EFAULT; + if (IS_ERR(uaddr)) + return PTR_ERR(uaddr); switch (attr->attr) { case KVM_VCPU_TSC_OFFSET: { From dd6e631220181162478984d2d46dd979e04d8e75 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Jan 2022 07:49:45 -0500 Subject: [PATCH 267/367] KVM: x86: add system attribute to retrieve full set of supported xsave states Because KVM_GET_SUPPORTED_CPUID is meant to be passed (by simple-minded VMMs) to KVM_SET_CPUID2, it cannot include any dynamic xsave states that have not been enabled. Probing those, for example so that they can be passed to ARCH_REQ_XCOMP_GUEST_PERM, requires a new ioctl or arch_prctl. The latter is in fact worse, even though that is what the rest of the API uses, because it would require supported_xcr0 to be moved from the KVM module to the kernel just for this use. In addition, the value would be nonsensical (or an error would have to be returned) until the KVM module is loaded in. Therefore, to limit the growth of system ioctls, add a /dev/kvm variant of KVM_{GET,HAS}_DEVICE_ATTR, and implement it in x86 with just one group (0) and attribute (KVM_X86_XCOMP_GUEST_SUPP). Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 4 ++- arch/x86/include/uapi/asm/kvm.h | 3 ++ arch/x86/kvm/x86.c | 51 +++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 1 + 4 files changed, 58 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index bb8cfddbb22d..a4267104db50 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -3268,6 +3268,7 @@ number. :Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, KVM_CAP_VCPU_ATTRIBUTES for vcpu device + KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device (no set) :Type: device ioctl, vm ioctl, vcpu ioctl :Parameters: struct kvm_device_attr :Returns: 0 on success, -1 on error @@ -3302,7 +3303,8 @@ transferred is defined by the particular attribute. ------------------------ :Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, - KVM_CAP_VCPU_ATTRIBUTES for vcpu device + KVM_CAP_VCPU_ATTRIBUTES for vcpu device + KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device :Type: device ioctl, vm ioctl, vcpu ioctl :Parameters: struct kvm_device_attr :Returns: 0 on success, -1 on error diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 2da3316bb559..bf6e96011dfe 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -452,6 +452,9 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 +/* attributes for system fd (group 0) */ +#define KVM_X86_XCOMP_GUEST_SUPP 0 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 76a5dcfcabdb..c25a6ef0ff06 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4230,6 +4230,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SREGS2: case KVM_CAP_EXIT_ON_EMULATION_FAILURE: case KVM_CAP_VCPU_ATTRIBUTES: + case KVM_CAP_SYS_ATTRIBUTES: r = 1; break; case KVM_CAP_EXIT_HYPERCALL: @@ -4343,6 +4344,40 @@ static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr) return uaddr; } +static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr) +{ + u64 __user *uaddr = kvm_get_attr_addr(attr); + + if (attr->group) + return -ENXIO; + + if (IS_ERR(uaddr)) + return PTR_ERR(uaddr); + + switch (attr->attr) { + case KVM_X86_XCOMP_GUEST_SUPP: + if (put_user(supported_xcr0, uaddr)) + return -EFAULT; + return 0; + default: + return -ENXIO; + break; + } +} + +static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr) +{ + if (attr->group) + return -ENXIO; + + switch (attr->attr) { + case KVM_X86_XCOMP_GUEST_SUPP: + return 0; + default: + return -ENXIO; + } +} + long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4431,6 +4466,22 @@ long kvm_arch_dev_ioctl(struct file *filp, case KVM_GET_SUPPORTED_HV_CPUID: r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp); break; + case KVM_GET_DEVICE_ATTR: { + struct kvm_device_attr attr; + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_x86_dev_get_attr(&attr); + break; + } + case KVM_HAS_DEVICE_ATTR: { + struct kvm_device_attr attr; + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_x86_dev_has_attr(&attr); + break; + } default: r = -EINVAL; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 9563d294f181..b46bcdb0cab1 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 +#define KVM_CAP_SYS_ATTRIBUTES 209 #ifdef KVM_CAP_IRQ_ROUTING From b19c99b9f4486f23e3b7248dd4ce3d83e19b9032 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Jan 2022 07:51:00 -0500 Subject: [PATCH 268/367] selftests: kvm: check dynamic bits against KVM_X86_XCOMP_GUEST_SUPP Provide coverage for the new API. Signed-off-by: Paolo Bonzini --- tools/arch/x86/include/uapi/asm/kvm.h | 3 +++ tools/include/uapi/linux/kvm.h | 1 + .../testing/selftests/kvm/lib/x86_64/processor.c | 15 +++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 2da3316bb559..bf6e96011dfe 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -452,6 +452,9 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 +/* attributes for system fd (group 0) */ +#define KVM_X86_XCOMP_GUEST_SUPP 0 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 9563d294f181..b46bcdb0cab1 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 +#define KVM_CAP_SYS_ATTRIBUTES 209 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index c1d1c195a838..9f000dfb5594 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -667,8 +667,23 @@ static bool is_xfd_supported(void) void vm_xsave_req_perm(int bit) { + int kvm_fd; u64 bitmask; long rc; + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask + }; + + kvm_fd = open_kvm_dev_path_or_exit(); + rc = ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); + close(kvm_fd); + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) + exit(KSFT_SKIP); + TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); + if (!(bitmask & (1ULL << bit))) + exit(KSFT_SKIP); if (!is_xfd_supported()) exit(KSFT_SKIP); From f80ae0ef089a09e8c18da43a382c3caac9a424a7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 12 Jan 2022 18:01:30 +0100 Subject: [PATCH 269/367] KVM: nVMX: Also filter MSR_IA32_VMX_TRUE_PINBASED_CTLS when eVMCS Similar to MSR_IA32_VMX_EXIT_CTLS/MSR_IA32_VMX_TRUE_EXIT_CTLS, MSR_IA32_VMX_ENTRY_CTLS/MSR_IA32_VMX_TRUE_ENTRY_CTLS pair, MSR_IA32_VMX_TRUE_PINBASED_CTLS needs to be filtered the same way MSR_IA32_VMX_PINBASED_CTLS is currently filtered as guests may solely rely on 'true' MSR data. Note, none of the currently existing Windows/Hyper-V versions are known to stumble upon the unfiltered MSR_IA32_VMX_TRUE_PINBASED_CTLS, the change is aimed at making the filtering future proof. Signed-off-by: Vitaly Kuznetsov Message-Id: <20220112170134.1904308-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/evmcs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index ba6f99f584ac..a7ed30d5647a 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -362,6 +362,7 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata) case MSR_IA32_VMX_PROCBASED_CTLS2: ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC; break; + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: case MSR_IA32_VMX_PINBASED_CTLS: ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; break; From 7a601e2cf61558dfd534a9ecaad09f5853ad8204 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 12 Jan 2022 18:01:31 +0100 Subject: [PATCH 270/367] KVM: nVMX: eVMCS: Filter out VM_EXIT_SAVE_VMX_PREEMPTION_TIMER Enlightened VMCS v1 doesn't have VMX_PREEMPTION_TIMER_VALUE field, PIN_BASED_VMX_PREEMPTION_TIMER is also filtered out already so it makes sense to filter out VM_EXIT_SAVE_VMX_PREEMPTION_TIMER too. Note, none of the currently existing Windows/Hyper-V versions are known to enable 'save VMX-preemption timer value' when eVMCS is in use, the change is aimed at making the filtering future proof. Signed-off-by: Vitaly Kuznetsov Message-Id: <20220112170134.1904308-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/evmcs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index 16731d2cf231..3a461a32128b 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -59,7 +59,9 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs); SECONDARY_EXEC_SHADOW_VMCS | \ SECONDARY_EXEC_TSC_SCALING | \ SECONDARY_EXEC_PAUSE_LOOP_EXITING) -#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) +#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \ + (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \ + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) #define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) #define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING) From 2423a4c0d17418eca1ba1e3f48684cb2ab7523d5 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 12 Jan 2022 18:01:32 +0100 Subject: [PATCH 271/367] KVM: nVMX: Rename vmcs_to_field_offset{,_table} vmcs_to_field_offset{,_table} may sound misleading as VMCS is an opaque blob which is not supposed to be accessed directly. In fact, vmcs_to_field_offset{,_table} are related to KVM defined VMCS12 structure. Rename vmcs_field_to_offset() to get_vmcs12_field_offset() for clarity. No functional change intended. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Message-Id: <20220112170134.1904308-4-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 6 +++--- arch/x86/kvm/vmx/vmcs12.c | 4 ++-- arch/x86/kvm/vmx/vmcs12.h | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2777cea05cc0..e614a5555aab 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5113,7 +5113,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) /* Decode instruction info and find the field to read */ field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf)); - offset = vmcs_field_to_offset(field); + offset = get_vmcs12_field_offset(field); if (offset < 0) return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); @@ -5216,7 +5216,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf)); - offset = vmcs_field_to_offset(field); + offset = get_vmcs12_field_offset(field); if (offset < 0) return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); @@ -6464,7 +6464,7 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void) max_idx = 0; for (i = 0; i < nr_vmcs12_fields; i++) { /* The vmcs12 table is very, very sparsely populated. */ - if (!vmcs_field_to_offset_table[i]) + if (!vmcs12_field_offsets[i]) continue; idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i)); diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c index cab6ba7a5005..2251b60920f8 100644 --- a/arch/x86/kvm/vmx/vmcs12.c +++ b/arch/x86/kvm/vmx/vmcs12.c @@ -8,7 +8,7 @@ FIELD(number, name), \ [ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32) -const unsigned short vmcs_field_to_offset_table[] = { +const unsigned short vmcs12_field_offsets[] = { FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), FIELD(POSTED_INTR_NV, posted_intr_nv), FIELD(GUEST_ES_SELECTOR, guest_es_selector), @@ -151,4 +151,4 @@ const unsigned short vmcs_field_to_offset_table[] = { FIELD(HOST_RSP, host_rsp), FIELD(HOST_RIP, host_rip), }; -const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs_field_to_offset_table); +const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets); diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 2a45f026ee11..746129ddd5ae 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -361,10 +361,10 @@ static inline void vmx_check_vmcs12_offsets(void) CHECK_OFFSET(guest_pml_index, 996); } -extern const unsigned short vmcs_field_to_offset_table[]; +extern const unsigned short vmcs12_field_offsets[]; extern const unsigned int nr_vmcs12_fields; -static inline short vmcs_field_to_offset(unsigned long field) +static inline short get_vmcs12_field_offset(unsigned long field) { unsigned short offset; unsigned int index; @@ -377,7 +377,7 @@ static inline short vmcs_field_to_offset(unsigned long field) return -ENOENT; index = array_index_nospec(index, nr_vmcs12_fields); - offset = vmcs_field_to_offset_table[index]; + offset = vmcs12_field_offsets[index]; if (offset == 0) return -ENOENT; return offset; From 892a42c10ddb945d3a4dcf07dccdf9cb98b21548 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 12 Jan 2022 18:01:33 +0100 Subject: [PATCH 272/367] KVM: nVMX: Implement evmcs_field_offset() suitable for handle_vmread() In preparation to allowing reads from Enlightened VMCS from handle_vmread(), implement evmcs_field_offset() to get the correct read offset. get_evmcs_offset(), which is being used by KVM-on-Hyper-V, is almost what's needed but a few things need to be adjusted. First, WARN_ON() is unacceptable for handle_vmread() as any field can (in theory) be supplied by the guest and not all fields are defined in eVMCS v1. Second, we need to handle 'holes' in eVMCS (missing fields). It also sounds like a good idea to WARN_ON() if such fields are ever accessed by KVM-on-Hyper-V. Implement dedicated evmcs_field_offset() helper. No functional change intended. Signed-off-by: Vitaly Kuznetsov Message-Id: <20220112170134.1904308-5-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/evmcs.c | 3 +-- arch/x86/kvm/vmx/evmcs.h | 32 ++++++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index a7ed30d5647a..87e3dc10edf4 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -12,8 +12,6 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs); -#if IS_ENABLED(CONFIG_HYPERV) - #define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x) #define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \ {EVMCS1_OFFSET(name), clean_field} @@ -296,6 +294,7 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { }; const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1); +#if IS_ENABLED(CONFIG_HYPERV) __init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) { vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL; diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index 3a461a32128b..9bc2521b159e 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -65,8 +65,6 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs); #define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) #define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING) -#if IS_ENABLED(CONFIG_HYPERV) - struct evmcs_field { u16 offset; u16 clean_field; @@ -75,26 +73,44 @@ struct evmcs_field { extern const struct evmcs_field vmcs_field_to_evmcs_1[]; extern const unsigned int nr_evmcs_1_fields; -static __always_inline int get_evmcs_offset(unsigned long field, - u16 *clean_field) +static __always_inline int evmcs_field_offset(unsigned long field, + u16 *clean_field) { unsigned int index = ROL16(field, 6); const struct evmcs_field *evmcs_field; - if (unlikely(index >= nr_evmcs_1_fields)) { - WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n", - field); + if (unlikely(index >= nr_evmcs_1_fields)) return -ENOENT; - } evmcs_field = &vmcs_field_to_evmcs_1[index]; + /* + * Use offset=0 to detect holes in eVMCS. This offset belongs to + * 'revision_id' but this field has no encoding and is supposed to + * be accessed directly. + */ + if (unlikely(!evmcs_field->offset)) + return -ENOENT; + if (clean_field) *clean_field = evmcs_field->clean_field; return evmcs_field->offset; } +#if IS_ENABLED(CONFIG_HYPERV) + +static __always_inline int get_evmcs_offset(unsigned long field, + u16 *clean_field) +{ + int offset = evmcs_field_offset(field, clean_field); + + WARN_ONCE(offset < 0, "KVM: accessing unsupported EVMCS field %lx\n", + field); + + return offset; +} + static __always_inline void evmcs_write64(unsigned long field, u64 value) { u16 clean_field; From 6cbbaab60ff33f59355492c241318046befd9ffc Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 12 Jan 2022 18:01:34 +0100 Subject: [PATCH 273/367] KVM: nVMX: Allow VMREAD when Enlightened VMCS is in use Hyper-V TLFS explicitly forbids VMREAD and VMWRITE instructions when Enlightened VMCS interface is in use: "Any VMREAD or VMWRITE instructions while an enlightened VMCS is active is unsupported and can result in unexpected behavior."" Windows 11 + WSL2 seems to ignore this, attempts to VMREAD VMCS field 0x4404 ("VM-exit interruption information") are observed. Failing these attempts with nested_vmx_failInvalid() makes such guests unbootable. Microsoft confirms this is a Hyper-V bug and claims that it'll get fixed eventually but for the time being we need a workaround. (Temporary) allow VMREAD to get data from the currently loaded Enlightened VMCS. Note: VMWRITE instructions remain forbidden, it is not clear how to handle them properly and hopefully won't ever be needed. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Message-Id: <20220112170134.1904308-6-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/evmcs.h | 12 +++++++++ arch/x86/kvm/vmx/nested.c | 55 +++++++++++++++++++++++++++------------ 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index 9bc2521b159e..8d70f9aea94b 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -98,6 +98,18 @@ static __always_inline int evmcs_field_offset(unsigned long field, return evmcs_field->offset; } +static inline u64 evmcs_read_any(struct hv_enlightened_vmcs *evmcs, + unsigned long field, u16 offset) +{ + /* + * vmcs12_read_any() doesn't care whether the supplied structure + * is 'struct vmcs12' or 'struct hv_enlightened_vmcs' as it takes + * the exact offset of the required field, use it for convenience + * here. + */ + return vmcs12_read_any((void *)evmcs, field, offset); +} + #if IS_ENABLED(CONFIG_HYPERV) static __always_inline int get_evmcs_offset(unsigned long field, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e614a5555aab..ba34e94049c7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7,6 +7,7 @@ #include #include "cpuid.h" +#include "evmcs.h" #include "hyperv.h" #include "mmu.h" #include "nested.h" @@ -5101,27 +5102,49 @@ static int handle_vmread(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - /* - * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, - * any VMREAD sets the ALU flags for VMfailInvalid. - */ - if (vmx->nested.current_vmptr == INVALID_GPA || - (is_guest_mode(vcpu) && - get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) - return nested_vmx_failInvalid(vcpu); - /* Decode instruction info and find the field to read */ field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf)); - offset = get_vmcs12_field_offset(field); - if (offset < 0) - return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + if (!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) { + /* + * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, + * any VMREAD sets the ALU flags for VMfailInvalid. + */ + if (vmx->nested.current_vmptr == INVALID_GPA || + (is_guest_mode(vcpu) && + get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) + return nested_vmx_failInvalid(vcpu); - if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) - copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + offset = get_vmcs12_field_offset(field); + if (offset < 0) + return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); - /* Read the field, zero-extended to a u64 value */ - value = vmcs12_read_any(vmcs12, field, offset); + if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) + copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + + /* Read the field, zero-extended to a u64 value */ + value = vmcs12_read_any(vmcs12, field, offset); + } else { + /* + * Hyper-V TLFS (as of 6.0b) explicitly states, that while an + * enlightened VMCS is active VMREAD/VMWRITE instructions are + * unsupported. Unfortunately, certain versions of Windows 11 + * don't comply with this requirement which is not enforced in + * genuine Hyper-V. Allow VMREAD from an enlightened VMCS as a + * workaround, as misbehaving guests will panic on VM-Fail. + * Note, enlightened VMCS is incompatible with shadow VMCS so + * all VMREADs from L2 should go to L1. + */ + if (WARN_ON_ONCE(is_guest_mode(vcpu))) + return nested_vmx_failInvalid(vcpu); + + offset = evmcs_field_offset(field, NULL); + if (offset < 0) + return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + + /* Read the field, zero-extended to a u64 value */ + value = evmcs_read_any(vmx->nested.hv_evmcs, field, offset); + } /* * Now copy part of this value to register or memory, as requested. From 6a0c61703e3a5d67845a4b275e1d9d7bc1b5aad7 Mon Sep 17 00:00:00 2001 From: Hou Wenlong Date: Thu, 27 Jan 2022 14:54:49 +0800 Subject: [PATCH 274/367] KVM: eventfd: Fix false positive RCU usage warning Fix the following false positive warning: ============================= WARNING: suspicious RCU usage 5.16.0-rc4+ #57 Not tainted ----------------------------- arch/x86/kvm/../../../virt/kvm/eventfd.c:484 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by fc_vcpu 0/330: #0: ffff8884835fc0b0 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0x88/0x6f0 [kvm] #1: ffffc90004c0bb68 (&kvm->srcu){....}-{0:0}, at: vcpu_enter_guest+0x600/0x1860 [kvm] #2: ffffc90004c0c1d0 (&kvm->irq_srcu){....}-{0:0}, at: kvm_notify_acked_irq+0x36/0x180 [kvm] stack backtrace: CPU: 26 PID: 330 Comm: fc_vcpu 0 Not tainted 5.16.0-rc4+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x44/0x57 kvm_notify_acked_gsi+0x6b/0x70 [kvm] kvm_notify_acked_irq+0x8d/0x180 [kvm] kvm_ioapic_update_eoi+0x92/0x240 [kvm] kvm_apic_set_eoi_accelerated+0x2a/0xe0 [kvm] handle_apic_eoi_induced+0x3d/0x60 [kvm_intel] vmx_handle_exit+0x19c/0x6a0 [kvm_intel] vcpu_enter_guest+0x66e/0x1860 [kvm] kvm_arch_vcpu_ioctl_run+0x438/0x7f0 [kvm] kvm_vcpu_ioctl+0x38a/0x6f0 [kvm] __x64_sys_ioctl+0x89/0xc0 do_syscall_64+0x3a/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Since kvm_unregister_irq_ack_notifier() does synchronize_srcu(&kvm->irq_srcu), kvm->irq_ack_notifier_list is protected by kvm->irq_srcu. In fact, kvm->irq_srcu SRCU read lock is held in kvm_notify_acked_irq(), making it a false positive warning. So use hlist_for_each_entry_srcu() instead of hlist_for_each_entry_rcu(). Reviewed-by: Sean Christopherson Signed-off-by: Hou Wenlong Message-Id: Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 2ad013b8bde9..59b1dd4a549e 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -463,8 +463,8 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) idx = srcu_read_lock(&kvm->irq_srcu); gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) + hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list, + link, srcu_read_lock_held(&kvm->irq_srcu)) if (kian->gsi == gsi) { srcu_read_unlock(&kvm->irq_srcu, idx); return true; @@ -480,8 +480,8 @@ void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) { struct kvm_irq_ack_notifier *kian; - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) + hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list, + link, srcu_read_lock_held(&kvm->irq_srcu)) if (kian->gsi == gsi) kian->irq_acked(kian); } From 2cbd27267ffe020af1442b95ec57f59a157ba85c Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Thu, 27 Jan 2022 13:53:59 -0500 Subject: [PATCH 275/367] spi: bcm-qspi: check for valid cs before applying chip select Apply only valid chip select value. This change fixes case where chip select is set to initial value of '-1' during probe and PM supend and subsequent resume can try to use the value with undefined behaviour. Also in case where gpio based chip select, the check in bcm_qspi_chip_select() shall prevent undefined behaviour on resume. Fixes: fa236a7ef240 ("spi: bcm-qspi: Add Broadcom MSPI driver") Signed-off-by: Kamal Dasu Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20220127185359.27322-1-kdasu.kdev@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm-qspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index c9a769b8594b..86c76211b3d3 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -585,7 +585,7 @@ static void bcm_qspi_chip_select(struct bcm_qspi *qspi, int cs) u32 rd = 0; u32 wr = 0; - if (qspi->base[CHIP_SELECT]) { + if (cs >= 0 && qspi->base[CHIP_SELECT]) { rd = bcm_qspi_read(qspi, CHIP_SELECT, 0); wr = (rd & ~0xff) | (1 << cs); if (rd == wr) From 60b1e97140a487608b7cbde774b3cff1b5a99c00 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 26 Jan 2022 17:13:26 -0600 Subject: [PATCH 276/367] spi: dt-bindings: Fix 'reg' child node schema The schema for SPI child nodes' 'reg' property is not complete. 'reg' is a matrix of cells. The schema needs to define both the number of 'reg' entries and constraints on each entry. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220126231326.1636199-1-robh@kernel.org Signed-off-by: Mark Brown --- .../devicetree/bindings/spi/spi-peripheral-props.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml index 5dd209206e88..3ec2d7b83775 100644 --- a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml +++ b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml @@ -23,8 +23,9 @@ properties: minItems: 1 maxItems: 256 items: - minimum: 0 - maximum: 256 + items: + - minimum: 0 + maximum: 256 description: Chip select used by the device. From ab451ea952fe9d7afefae55ddb28943a148247fe Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 26 Jan 2022 13:13:38 -0800 Subject: [PATCH 277/367] nfsd: nfsd4_setclientid_confirm mistakenly expires confirmed client. From RFC 7530 Section 16.34.5: o The server has not recorded an unconfirmed { v, x, c, *, * } and has recorded a confirmed { v, x, c, *, s }. If the principals of the record and of SETCLIENTID_CONFIRM do not match, the server returns NFS4ERR_CLID_INUSE without removing any relevant leased client state, and without changing recorded callback and callback_ident values for client { x }. The current code intends to do what the spec describes above but it forgot to set 'old' to NULL resulting to the confirmed client to be expired. Fixes: 2b63482185e6 ("nfsd: fix clid_inuse on mount with security change") Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever Reviewed-by: Bruce Fields --- fs/nfsd/nfs4state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 72900b89cf84..32063733443d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4130,8 +4130,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, status = nfserr_clid_inuse; if (client_has_state(old) && !same_creds(&unconf->cl_cred, - &old->cl_cred)) + &old->cl_cred)) { + old = NULL; goto out; + } status = mark_client_expired_locked(old); if (status) { old = NULL; From 928d6fe996f69330ded6b887baf4534c5fac7988 Mon Sep 17 00:00:00 2001 From: Yuji Ishikawa Date: Thu, 27 Jan 2022 21:17:14 +0900 Subject: [PATCH 278/367] net: stmmac: dwmac-visconti: No change to ETHER_CLOCK_SEL for unexpected speed request. Variable clk_sel_val is not initialized in the default case of the first switch statement. In that case, the function should return immediately without any changes to the hardware. Reported-by: kernel test robot Reported-by: Dan Carpenter Fixes: b38dd98ff8d0 ("net: stmmac: Add Toshiba Visconti SoCs glue driver") Signed-off-by: Yuji Ishikawa Reviewed-by: Nobuhiro Iwamatsu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index dde5b772a5af..c3f10a92b62b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -49,13 +49,15 @@ struct visconti_eth { void __iomem *reg; u32 phy_intf_sel; struct clk *phy_ref_clk; + struct device *dev; spinlock_t lock; /* lock to protect register update */ }; static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed) { struct visconti_eth *dwmac = priv; - unsigned int val, clk_sel_val; + struct net_device *netdev = dev_get_drvdata(dwmac->dev); + unsigned int val, clk_sel_val = 0; unsigned long flags; spin_lock_irqsave(&dwmac->lock, flags); @@ -85,7 +87,9 @@ static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed) break; default: /* No bit control */ - break; + netdev_err(netdev, "Unsupported speed request (%d)", speed); + spin_unlock_irqrestore(&dwmac->lock, flags); + return; } writel(val, dwmac->reg + MAC_CTRL_REG); @@ -229,6 +233,7 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev) spin_lock_init(&dwmac->lock); dwmac->reg = stmmac_res.addr; + dwmac->dev = &pdev->dev; plat_dat->bsp_priv = dwmac; plat_dat->fix_mac_speed = visconti_eth_fix_mac_speed; From 500c77eed0feabddd5b3afb48e32c204614a8eab Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Thu, 27 Jan 2022 20:46:02 +0100 Subject: [PATCH 279/367] pinctrl: zynqmp: Revert "Unify pin naming" This reverts commit 54784ff24971ed5bd3f1056edce998148709d0a7. This patch changes the pin names from "MIO%d" to "MIO-%d", but all dts in arch/arm64/boot/dts/xilinx still use the old name. As a result my ZCU104 has no output on serial terminal and is not reachable over network. Signed-off-by: Gerhard Engleder Signed-off-by: Andy Shevchenko --- drivers/pinctrl/pinctrl-zynqmp.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/pinctrl-zynqmp.c b/drivers/pinctrl/pinctrl-zynqmp.c index 42da6bd399ee..e14012209992 100644 --- a/drivers/pinctrl/pinctrl-zynqmp.c +++ b/drivers/pinctrl/pinctrl-zynqmp.c @@ -809,7 +809,6 @@ static int zynqmp_pinctrl_prepare_pin_desc(struct device *dev, unsigned int *npins) { struct pinctrl_pin_desc *pins, *pin; - char **pin_names; int ret; int i; @@ -821,14 +820,13 @@ static int zynqmp_pinctrl_prepare_pin_desc(struct device *dev, if (!pins) return -ENOMEM; - pin_names = devm_kasprintf_strarray(dev, ZYNQMP_PIN_PREFIX, *npins); - if (IS_ERR(pin_names)) - return PTR_ERR(pin_names); - for (i = 0; i < *npins; i++) { pin = &pins[i]; pin->number = i; - pin->name = pin_names[i]; + pin->name = devm_kasprintf(dev, GFP_KERNEL, "%s%d", + ZYNQMP_PIN_PREFIX, i); + if (!pin->name) + return -ENOMEM; } *zynqmp_pins = pins; From 4e0f718daf97d47cf7dec122da1be970f145c809 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 28 Jan 2022 12:47:15 +0800 Subject: [PATCH 280/367] ax25: improve the incomplete fix to avoid UAF and NPD bugs The previous commit 1ade48d0c27d ("ax25: NPD bug when detaching AX25 device") introduce lock_sock() into ax25_kill_by_device to prevent NPD bug. But the concurrency NPD or UAF bug will occur, when lock_sock() or release_sock() dereferences the ax25_cb->sock. The NULL pointer dereference bug can be shown as below: ax25_kill_by_device() | ax25_release() | ax25_destroy_socket() | ax25_cb_del() ... | ... | ax25->sk=NULL; lock_sock(s->sk); //(1) | s->ax25_dev = NULL; | ... release_sock(s->sk); //(2) | ... | The root cause is that the sock is set to null before dereference site (1) or (2). Therefore, this patch extracts the ax25_cb->sock in advance, and uses ax25_list_lock to protect it, which can synchronize with ax25_cb_del() and ensure the value of sock is not null before dereference sites. The concurrency UAF bug can be shown as below: ax25_kill_by_device() | ax25_release() | ax25_destroy_socket() ... | ... | sock_put(sk); //FREE lock_sock(s->sk); //(1) | s->ax25_dev = NULL; | ... release_sock(s->sk); //(2) | ... | The root cause is that the sock is released before dereference site (1) or (2). Therefore, this patch uses sock_hold() to increase the refcount of sock and uses ax25_list_lock to protect it, which can synchronize with ax25_cb_del() in ax25_destroy_socket() and ensure the sock wil not be released before dereference sites. Signed-off-by: Duoming Zhou Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 02f43f3e2c56..44a8730c26ac 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -77,6 +77,7 @@ static void ax25_kill_by_device(struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *s; + struct sock *sk; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; @@ -85,13 +86,15 @@ static void ax25_kill_by_device(struct net_device *dev) again: ax25_for_each(s, &ax25_list) { if (s->ax25_dev == ax25_dev) { + sk = s->sk; + sock_hold(sk); spin_unlock_bh(&ax25_list_lock); - lock_sock(s->sk); + lock_sock(sk); s->ax25_dev = NULL; - release_sock(s->sk); + release_sock(sk); ax25_disconnect(s, ENETUNREACH); spin_lock_bh(&ax25_list_lock); - + sock_put(sk); /* The entry could have been deleted from the * list meanwhile and thus the next pointer is * no longer valid. Play it safe and restart From d01ffb9eee4af165d83b08dd73ebdf9fe94a519b Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 28 Jan 2022 12:47:16 +0800 Subject: [PATCH 281/367] ax25: add refcount in ax25_dev to avoid UAF bugs If we dereference ax25_dev after we call kfree(ax25_dev) in ax25_dev_device_down(), it will lead to concurrency UAF bugs. There are eight syscall functions suffer from UAF bugs, include ax25_bind(), ax25_release(), ax25_connect(), ax25_ioctl(), ax25_getname(), ax25_sendmsg(), ax25_getsockopt() and ax25_info_show(). One of the concurrency UAF can be shown as below: (USE) | (FREE) | ax25_device_event | ax25_dev_device_down ax25_bind | ... ... | kfree(ax25_dev) ax25_fillin_cb() | ... ax25_fillin_cb_from_dev() | ... | The root cause of UAF bugs is that kfree(ax25_dev) in ax25_dev_device_down() is not protected by any locks. When ax25_dev, which there are still pointers point to, is released, the concurrency UAF bug will happen. This patch introduces refcount into ax25_dev in order to guarantee that there are no pointers point to it when ax25_dev is released. Signed-off-by: Duoming Zhou Signed-off-by: David S. Miller --- include/net/ax25.h | 10 ++++++++++ net/ax25/af_ax25.c | 2 ++ net/ax25/ax25_dev.c | 12 ++++++++++-- net/ax25/ax25_route.c | 3 +++ 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/include/net/ax25.h b/include/net/ax25.h index 526e49589197..50b417df6221 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -239,6 +239,7 @@ typedef struct ax25_dev { #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) ax25_dama_info dama; #endif + refcount_t refcount; } ax25_dev; typedef struct ax25_cb { @@ -293,6 +294,15 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) } } +#define ax25_dev_hold(__ax25_dev) \ + refcount_inc(&((__ax25_dev)->refcount)) + +static __inline__ void ax25_dev_put(ax25_dev *ax25_dev) +{ + if (refcount_dec_and_test(&ax25_dev->refcount)) { + kfree(ax25_dev); + } +} static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->dev = dev; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 44a8730c26ac..32f61978ff29 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -91,6 +91,7 @@ again: spin_unlock_bh(&ax25_list_lock); lock_sock(sk); s->ax25_dev = NULL; + ax25_dev_put(ax25_dev); release_sock(sk); ax25_disconnect(s, ENETUNREACH); spin_lock_bh(&ax25_list_lock); @@ -439,6 +440,7 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) } out_put: + ax25_dev_put(ax25_dev); ax25_cb_put(ax25); return ret; diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 256fadb94df3..770b787fb7bb 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -37,6 +37,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr) for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; + ax25_dev_hold(ax25_dev); } spin_unlock_bh(&ax25_dev_lock); @@ -56,6 +57,7 @@ void ax25_dev_device_up(struct net_device *dev) return; } + refcount_set(&ax25_dev->refcount, 1); dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC); @@ -83,6 +85,7 @@ void ax25_dev_device_up(struct net_device *dev) spin_lock_bh(&ax25_dev_lock); ax25_dev->next = ax25_dev_list; ax25_dev_list = ax25_dev; + ax25_dev_hold(ax25_dev); spin_unlock_bh(&ax25_dev_lock); ax25_register_dev_sysctl(ax25_dev); @@ -112,20 +115,22 @@ void ax25_dev_device_down(struct net_device *dev) if ((s = ax25_dev_list) == ax25_dev) { ax25_dev_list = s->next; + ax25_dev_put(ax25_dev); spin_unlock_bh(&ax25_dev_lock); dev->ax25_ptr = NULL; dev_put_track(dev, &ax25_dev->dev_tracker); - kfree(ax25_dev); + ax25_dev_put(ax25_dev); return; } while (s != NULL && s->next != NULL) { if (s->next == ax25_dev) { s->next = ax25_dev->next; + ax25_dev_put(ax25_dev); spin_unlock_bh(&ax25_dev_lock); dev->ax25_ptr = NULL; dev_put_track(dev, &ax25_dev->dev_tracker); - kfree(ax25_dev); + ax25_dev_put(ax25_dev); return; } @@ -133,6 +138,7 @@ void ax25_dev_device_down(struct net_device *dev) } spin_unlock_bh(&ax25_dev_lock); dev->ax25_ptr = NULL; + ax25_dev_put(ax25_dev); } int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) @@ -149,6 +155,7 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) if (ax25_dev->forward != NULL) return -EINVAL; ax25_dev->forward = fwd_dev->dev; + ax25_dev_put(fwd_dev); break; case SIOCAX25DELFWD: @@ -161,6 +168,7 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) return -EINVAL; } + ax25_dev_put(ax25_dev); return 0; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index d0b2e094bd55..1e32693833e5 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -116,6 +116,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_rt->dev = ax25_dev->dev; ax25_rt->digipeat = NULL; ax25_rt->ip_mode = ' '; + ax25_dev_put(ax25_dev); if (route->digi_count != 0) { if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); @@ -172,6 +173,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route) } } } + ax25_dev_put(ax25_dev); write_unlock_bh(&ax25_route_lock); return 0; @@ -214,6 +216,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option) } out: + ax25_dev_put(ax25_dev); write_unlock_bh(&ax25_route_lock); return err; } From 1f84a9450d75e08af70d9e2f2d5e1c0ac0c881d2 Mon Sep 17 00:00:00 2001 From: Haiyue Wang Date: Fri, 28 Jan 2022 18:47:14 +0800 Subject: [PATCH 282/367] gve: fix the wrong AdminQ buffer queue index check The 'tail' and 'head' are 'unsigned int' type free-running count, when 'head' is overflow, the 'int i (= tail) < u32 head' will be false: Only '- loop 0: idx = 63' result is shown, so it needs to use 'int' type to compare, it can handle the overflow correctly. typedef uint32_t u32; int main() { u32 tail, head; int stail, shead; int i, loop; tail = 0xffffffff; head = 0x00000000; for (i = tail, loop = 0; i < head; i++) { unsigned int idx = i & 63; printf("+ loop %d: idx = %u\n", loop++, idx); } stail = tail; shead = head; for (i = stail, loop = 0; i < shead; i++) { unsigned int idx = i & 63; printf("- loop %d: idx = %u\n", loop++, idx); } return 0; } Fixes: 5cdad90de62c ("gve: Batch AQ commands for creating and destroying queues.") Signed-off-by: Haiyue Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_adminq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 2ad7f57f7e5b..f7621ab672b9 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -301,7 +301,7 @@ static int gve_adminq_parse_err(struct gve_priv *priv, u32 status) */ static int gve_adminq_kick_and_wait(struct gve_priv *priv) { - u32 tail, head; + int tail, head; int i; tail = ioread32be(&priv->reg_bar0->adminq_event_counter); From 6449520391dfc3d2cef134f11a91251a054ff7d0 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 28 Jan 2022 22:15:50 +0800 Subject: [PATCH 283/367] net: stmmac: properly handle with runtime pm in stmmac_dvr_remove() There are two issues with runtime pm handling in stmmac_dvr_remove(): 1. the mac is runtime suspended before stopping dma and rx/tx. We need to ensure the device is properly resumed back. 2. the stmmaceth clk enable/disable isn't balanced in both exit and error handling code path. Take the exit code path for example, when we unbind the driver or rmmod the driver module, the mac is runtime suspended as said above, so the stmmaceth clk is disabled, but stmmac_dvr_remove() stmmac_remove_config_dt() clk_disable_unprepare() CCF will complain this time. The error handling code path suffers from the similar situtaion. Here are kernel warnings in error handling code path on Allwinner D1 platform: [ 1.604695] ------------[ cut here ]------------ [ 1.609328] bus-emac already disabled [ 1.613015] WARNING: CPU: 0 PID: 38 at drivers/clk/clk.c:952 clk_core_disable+0xcc/0xec [ 1.621039] CPU: 0 PID: 38 Comm: kworker/u2:1 Not tainted 5.14.0-rc4#1 [ 1.627653] Hardware name: Allwinner D1 NeZha (DT) [ 1.632443] Workqueue: events_unbound deferred_probe_work_func [ 1.638286] epc : clk_core_disable+0xcc/0xec [ 1.642561] ra : clk_core_disable+0xcc/0xec [ 1.646835] epc : ffffffff8023c2ec ra : ffffffff8023c2ec sp : ffffffd00411bb10 [ 1.654054] gp : ffffffff80ec9988 tp : ffffffe00143a800 t0 : ffffffff80ed6a6f [ 1.661272] t1 : ffffffff80ed6a60 t2 : 0000000000000000 s0 : ffffffe001509e00 [ 1.668489] s1 : 0000000000000001 a0 : 0000000000000019 a1 : ffffffff80e80bd8 [ 1.675707] a2 : 00000000ffffefff a3 : 00000000000000f4 a4 : 0000000000000002 [ 1.682924] a5 : 0000000000000001 a6 : 0000000000000030 a7 : 00000000028f5c29 [ 1.690141] s2 : 0000000000000800 s3 : ffffffe001375000 s4 : ffffffe01fdf7a80 [ 1.697358] s5 : ffffffe001375010 s6 : ffffffff8001fc10 s7 : ffffffffffffffff [ 1.704577] s8 : 0000000000000001 s9 : ffffffff80ecb248 s10: ffffffe001b80000 [ 1.711794] s11: ffffffe001b80760 t3 : 0000000000000062 t4 : ffffffffffffffff [ 1.719012] t5 : ffffffff80e0f6d8 t6 : ffffffd00411b8f0 [ 1.724321] status: 8000000201800100 badaddr: 0000000000000000 cause: 0000000000000003 [ 1.732233] [] clk_core_disable+0xcc/0xec [ 1.737810] [] clk_disable+0x38/0x78 [ 1.742956] [] worker_thread+0x1a8/0x4d8 [ 1.748451] [] stmmac_remove_config_dt+0x1c/0x4c [ 1.754646] [] sun8i_dwmac_probe+0x378/0x82c [ 1.760484] [] worker_thread+0x1a8/0x4d8 [ 1.765975] [] platform_probe+0x64/0xf0 [ 1.771382] [] really_probe.part.0+0x8c/0x30c [ 1.777305] [] __driver_probe_device+0xa0/0x148 [ 1.783402] [] driver_probe_device+0x38/0x138 [ 1.789324] [] __device_attach_driver+0xd0/0x170 [ 1.795508] [] __driver_attach_async_helper+0xbc/0xc0 [ 1.802125] [] bus_for_each_drv+0x68/0xb4 [ 1.807701] [] __device_attach+0xd8/0x184 [ 1.813277] [] bus_probe_device+0x98/0xbc [ 1.818852] [] deferred_probe_work_func+0x90/0xd4 [ 1.825122] [] process_one_work+0x1e4/0x390 [ 1.830872] [] worker_thread+0x31c/0x4d8 [ 1.836362] [] kthreadd+0x94/0x188 [ 1.841335] [] kthreadd+0x94/0x188 [ 1.846304] [] process_one_work+0x38c/0x390 [ 1.852054] [] kthread+0x124/0x160 [ 1.857021] [] set_kthread_struct+0x5c/0x60 [ 1.862770] [] ret_from_syscall_rejected+0x8/0xc [ 1.868956] ---[ end trace 8d5c6046255f84a0 ]--- [ 1.873675] ------------[ cut here ]------------ [ 1.878366] bus-emac already unprepared [ 1.882378] WARNING: CPU: 0 PID: 38 at drivers/clk/clk.c:810 clk_core_unprepare+0xe4/0x168 [ 1.890673] CPU: 0 PID: 38 Comm: kworker/u2:1 Tainted: G W 5.14.0-rc4 #1 [ 1.898674] Hardware name: Allwinner D1 NeZha (DT) [ 1.903464] Workqueue: events_unbound deferred_probe_work_func [ 1.909305] epc : clk_core_unprepare+0xe4/0x168 [ 1.913840] ra : clk_core_unprepare+0xe4/0x168 [ 1.918375] epc : ffffffff8023d6cc ra : ffffffff8023d6cc sp : ffffffd00411bb10 [ 1.925593] gp : ffffffff80ec9988 tp : ffffffe00143a800 t0 : 0000000000000002 [ 1.932811] t1 : ffffffe01f743be0 t2 : 0000000000000040 s0 : ffffffe001509e00 [ 1.940029] s1 : 0000000000000001 a0 : 000000000000001b a1 : ffffffe00143a800 [ 1.947246] a2 : 0000000000000000 a3 : 00000000000000f4 a4 : 0000000000000001 [ 1.954463] a5 : 0000000000000000 a6 : 0000000005fce2a5 a7 : 0000000000000001 [ 1.961680] s2 : 0000000000000800 s3 : ffffffff80afeb90 s4 : ffffffe01fdf7a80 [ 1.968898] s5 : ffffffe001375010 s6 : ffffffff8001fc10 s7 : ffffffffffffffff [ 1.976115] s8 : 0000000000000001 s9 : ffffffff80ecb248 s10: ffffffe001b80000 [ 1.983333] s11: ffffffe001b80760 t3 : ffffffff80b39120 t4 : 0000000000000001 [ 1.990550] t5 : 0000000000000000 t6 : ffffffe001600002 [ 1.995859] status: 8000000201800120 badaddr: 0000000000000000 cause: 0000000000000003 [ 2.003771] [] clk_core_unprepare+0xe4/0x168 [ 2.009609] [] clk_unprepare+0x24/0x3c [ 2.014929] [] stmmac_remove_config_dt+0x24/0x4c [ 2.021125] [] sun8i_dwmac_probe+0x378/0x82c [ 2.026965] [] worker_thread+0x1a8/0x4d8 [ 2.032463] [] platform_probe+0x64/0xf0 [ 2.037871] [] really_probe.part.0+0x8c/0x30c [ 2.043795] [] __driver_probe_device+0xa0/0x148 [ 2.049892] [] driver_probe_device+0x38/0x138 [ 2.055815] [] __device_attach_driver+0xd0/0x170 [ 2.061999] [] __driver_attach_async_helper+0xbc/0xc0 [ 2.068616] [] bus_for_each_drv+0x68/0xb4 [ 2.074193] [] __device_attach+0xd8/0x184 [ 2.079769] [] bus_probe_device+0x98/0xbc [ 2.085345] [] deferred_probe_work_func+0x90/0xd4 [ 2.091616] [] process_one_work+0x1e4/0x390 [ 2.097367] [] worker_thread+0x31c/0x4d8 [ 2.102858] [] kthreadd+0x94/0x188 [ 2.107830] [] kthreadd+0x94/0x188 [ 2.112800] [] process_one_work+0x38c/0x390 [ 2.118551] [] kthread+0x124/0x160 [ 2.123520] [] set_kthread_struct+0x5c/0x60 [ 2.129268] [] ret_from_syscall_rejected+0x8/0xc [ 2.135455] ---[ end trace 8d5c6046255f84a1 ]--- Fixes: 5ec55823438e ("net: stmmac: add clocks management for gmac driver") Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 639a753266e6..bde76ea2deec 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7252,6 +7252,10 @@ int stmmac_dvr_remove(struct device *dev) netdev_info(priv->dev, "%s: removing driver", __func__); + pm_runtime_get_sync(dev); + pm_runtime_disable(dev); + pm_runtime_put_noidle(dev); + stmmac_stop_all_dma(priv); stmmac_mac_set(priv, priv->ioaddr, false); netif_carrier_off(ndev); @@ -7270,8 +7274,6 @@ int stmmac_dvr_remove(struct device *dev) if (priv->plat->stmmac_rst) reset_control_assert(priv->plat->stmmac_rst); reset_control_assert(priv->plat->stmmac_ahb_rst); - pm_runtime_put(dev); - pm_runtime_disable(dev); if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) stmmac_mdio_unregister(ndev); From f6a26318e3148289717b0f39ee976f9d95a93f4d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 28 Jan 2022 10:00:29 +0200 Subject: [PATCH 284/367] ocfs2: fix subdirectory registration with register_sysctl() The kernel test robot reports that commit c42ff46f97c1 ("ocfs2: simplify subdirectory registration with register_sysctl()") is broken, and results in kernel warning messages like sysctl table check failed: fs/ocfs2/nm Not a file sysctl table check failed: fs/ocfs2/nm No proc_handler sysctl table check failed: fs/ocfs2/nm bogus .mode 0555 and in fact this was already reported back in linux-next, but nobody seems to have reacted to that report. Possibly that original report only ever made it to the lkp list. The problem seems to be that the simplification didn't actually go far enough, and should have converted the whole directory path to the final sysctl file, rather than just the two first components. So take that last step. Fixes: c42ff46f97c1 ("ocfs2: simplify subdirectory registration with register_sysctl()") Reported-by: kernel test robot Link: https://lore.kernel.org/all/20220128065310.GF8421@xsang-OptiPlex-9020/ Link: https://lists.01.org/hyperkitty/list/lkp@lists.01.org/thread/KQ2F6TPJWMDVEXJM4WTUC4DU3EH3YJVT/ Tested-by: Jan Kara Reviewed-by: Jan Kara Cc: Luis Chamberlain Cc: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/stackglue.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 731558a6f27d..dd77b7aaabf5 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -661,17 +661,6 @@ static struct ctl_table ocfs2_nm_table[] = { { } }; -static struct ctl_table ocfs2_mod_table[] = { - { - .procname = "nm", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_nm_table - }, - { } -}; - static struct ctl_table_header *ocfs2_table_header; /* @@ -682,7 +671,7 @@ static int __init ocfs2_stack_glue_init(void) { strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); - ocfs2_table_header = register_sysctl("fs/ocfs2", ocfs2_mod_table); + ocfs2_table_header = register_sysctl("fs/ocfs2/nm", ocfs2_nm_table); if (!ocfs2_table_header) { printk(KERN_ERR "ocfs2 stack glue: unable to register sysctl\n"); From 297ae1eb23b04c5a46111ab53c8d0f69af43f402 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 25 Jan 2022 15:40:40 +0000 Subject: [PATCH 285/367] arm64: cpufeature: List early Cortex-A510 parts as having broken dbm Versions of Cortex-A510 before r0p3 are affected by a hardware erratum where the hardware update of the dirty bit is not correctly ordered. Add these cpus to the cpu_has_broken_dbm list. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20220125154040.549272-3-james.morse@arm.com Signed-off-by: Catalin Marinas --- Documentation/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 10 ++++++++++ arch/arm64/kernel/cpufeature.c | 3 +++ 3 files changed, 15 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 1b0e53ececda..0ec7b7f1524b 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -98,6 +98,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1d7036e10215..f2b5a4abef21 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -670,6 +670,16 @@ config ARM64_ERRATUM_1508412 config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE bool +config ARM64_ERRATUM_2051678 + bool "Cortex-A510: 2051678: disable Hardware Update of the page table dirty bit" + help + This options adds the workaround for ARM Cortex-A510 erratum ARM64_ERRATUM_2051678. + Affected Coretex-A510 might not respect the ordering rules for + hardware update of the page table's dirty bit. The workaround + is to not enable the feature on affected CPUs. + + If unsure, say Y. + config ARM64_ERRATUM_2119858 bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode" default y diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a46ab3b1c4d5..e5f23dab1c8d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1645,6 +1645,9 @@ static bool cpu_has_broken_dbm(void) MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), /* Kryo4xx Silver (rdpe => r1p0) */ MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2051678 + MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2), #endif {}, }; From e8cc7a5d1ad2d44e7f43664ef6a61e31c0545a5b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 26 Jan 2022 13:32:05 +0100 Subject: [PATCH 286/367] dt-bindings: irqchip: renesas-irqc: Add R-Car V3U support Document support for the Interrupt Controller for External Devices (INT-EC) in the Renesas R-Car V3U (r8a779a0) SoC. Signed-off-by: Geert Uytterhoeven Tested-by: Kieran Bingham Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/85b246cc0792663c72c1bb12a8576bd23d2299d3.1643200256.git.geert+renesas@glider.be --- .../devicetree/bindings/interrupt-controller/renesas,irqc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml index 79d0358e2f61..620f01775e42 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml @@ -36,6 +36,7 @@ properties: - renesas,intc-ex-r8a77980 # R-Car V3H - renesas,intc-ex-r8a77990 # R-Car E3 - renesas,intc-ex-r8a77995 # R-Car D3 + - renesas,intc-ex-r8a779a0 # R-Car V3U - const: renesas,irqc '#interrupt-cells': From 8fbc16d26d3a1ed3d80553b773be29408750987b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 28 Jan 2022 10:03:57 +0100 Subject: [PATCH 287/367] dt-bindings: interrupt-controller: sifive,plic: Fix number of interrupts The number of interrupts lacks an upper bound, thus assuming one, causing properly grouped "interrupts-extended" properties to be flagged as an error by "make dtbs_check". Fix this by adding the missing "maxItems", using the architectural maximum of 15872 interrupts. Signed-off-by: Geert Uytterhoeven Acked-by: Rob Herring Reviewed-by: Anup Patel Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/f73a0aead89e1426b146c4c64f797aa035868bf0.1643360419.git.geert@linux-m68k.org --- .../bindings/interrupt-controller/sifive,plic-1.0.0.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml index 08d5a57ce00f..5edaa08f576e 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml @@ -61,6 +61,7 @@ properties: interrupts-extended: minItems: 1 + maxItems: 15872 description: Specifies which contexts are connected to the PLIC, with "-1" specifying that a context is not present. Each node pointed to should be a From c89e5eb7dcf1519e5e084ee82e0d29d4e751ddb7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 28 Jan 2022 10:03:58 +0100 Subject: [PATCH 288/367] dt-bindings: interrupt-controller: sifive,plic: Group interrupt tuples To improve human readability and enable automatic validation, the tuples in "interrupts-extended" properties should be grouped using angle brackets. Signed-off-by: Geert Uytterhoeven Reviewed-by: Rob Herring Reviewed-by: Anup Patel Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/211705e74a2ce77de43d036c5dea032484119bf7.1643360419.git.geert@linux-m68k.org --- .../interrupt-controller/sifive,plic-1.0.0.yaml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml index 5edaa08f576e..058997c4e3cd 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml @@ -90,12 +90,11 @@ examples: #interrupt-cells = <1>; compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0"; interrupt-controller; - interrupts-extended = < - &cpu0_intc 11 - &cpu1_intc 11 &cpu1_intc 9 - &cpu2_intc 11 &cpu2_intc 9 - &cpu3_intc 11 &cpu3_intc 9 - &cpu4_intc 11 &cpu4_intc 9>; + interrupts-extended = <&cpu0_intc 11>, + <&cpu1_intc 11>, <&cpu1_intc 9>, + <&cpu2_intc 11>, <&cpu2_intc 9>, + <&cpu3_intc 11>, <&cpu3_intc 9>, + <&cpu4_intc 11>, <&cpu4_intc 9>; reg = <0xc000000 0x4000000>; riscv,ndev = <10>; }; From 7f5056b9e7b71149bf11073f00a57fa1ac2921a9 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 26 Jan 2022 15:35:14 -0500 Subject: [PATCH 289/367] security, lsm: dentry_init_security() Handle multi LSM registration A ceph user has reported that ceph is crashing with kernel NULL pointer dereference. Following is the backtrace. /proc/version: Linux version 5.16.2-arch1-1 (linux@archlinux) (gcc (GCC) 11.1.0, GNU ld (GNU Binutils) 2.36.1) #1 SMP PREEMPT Thu, 20 Jan 2022 16:18:29 +0000 distro / arch: Arch Linux / x86_64 SELinux is not enabled ceph cluster version: 16.2.7 (dd0603118f56ab514f133c8d2e3adfc983942503) relevant dmesg output: [ 30.947129] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 30.947206] #PF: supervisor read access in kernel mode [ 30.947258] #PF: error_code(0x0000) - not-present page [ 30.947310] PGD 0 P4D 0 [ 30.947342] Oops: 0000 [#1] PREEMPT SMP PTI [ 30.947388] CPU: 5 PID: 778 Comm: touch Not tainted 5.16.2-arch1-1 #1 86fbf2c313cc37a553d65deb81d98e9dcc2a3659 [ 30.947486] Hardware name: Gigabyte Technology Co., Ltd. B365M DS3H/B365M DS3H, BIOS F5 08/13/2019 [ 30.947569] RIP: 0010:strlen+0x0/0x20 [ 30.947616] Code: b6 07 38 d0 74 16 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 31 d2 89 d6 89 d7 c3 48 89 f8 31 d2 89 d6 89 d7 c3 0 f 1f 40 00 <80> 3f 00 74 12 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 31 ff [ 30.947782] RSP: 0018:ffffa4ed80ffbbb8 EFLAGS: 00010246 [ 30.947836] RAX: 0000000000000000 RBX: ffffa4ed80ffbc60 RCX: 0000000000000000 [ 30.947904] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 30.947971] RBP: ffff94b0d15c0ae0 R08: 0000000000000000 R09: 0000000000000000 [ 30.948040] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 30.948106] R13: 0000000000000001 R14: ffffa4ed80ffbc60 R15: 0000000000000000 [ 30.948174] FS: 00007fc7520f0740(0000) GS:ffff94b7ced40000(0000) knlGS:0000000000000000 [ 30.948252] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 30.948308] CR2: 0000000000000000 CR3: 0000000104a40001 CR4: 00000000003706e0 [ 30.948376] Call Trace: [ 30.948404] [ 30.948431] ceph_security_init_secctx+0x7b/0x240 [ceph 49f9c4b9bf5be8760f19f1747e26da33920bce4b] [ 30.948582] ceph_atomic_open+0x51e/0x8a0 [ceph 49f9c4b9bf5be8760f19f1747e26da33920bce4b] [ 30.948708] ? get_cached_acl+0x4d/0xa0 [ 30.948759] path_openat+0x60d/0x1030 [ 30.948809] do_filp_open+0xa5/0x150 [ 30.948859] do_sys_openat2+0xc4/0x190 [ 30.948904] __x64_sys_openat+0x53/0xa0 [ 30.948948] do_syscall_64+0x5c/0x90 [ 30.948989] ? exc_page_fault+0x72/0x180 [ 30.949034] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 30.949091] RIP: 0033:0x7fc7521e25bb [ 30.950849] Code: 25 00 00 41 00 3d 00 00 41 00 74 4b 64 8b 04 25 18 00 00 00 85 c0 75 67 44 89 e2 48 89 ee bf 9c ff ff ff b8 01 01 0 0 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 91 00 00 00 48 8b 54 24 28 64 48 2b 14 25 Core of the problem is that ceph checks for return code from security_dentry_init_security() and if return code is 0, it assumes everything is fine and continues to call strlen(name), which crashes. Typically SELinux LSM returns 0 and sets name to "security.selinux" and it is not a problem. Or if selinux is not compiled in or disabled, it returns -EOPNOTSUP and ceph deals with it. But somehow in this configuration, 0 is being returned and "name" is not being initialized and that's creating the problem. Our suspicion is that BPF LSM is registering a hook for dentry_init_security() and returns hook default of 0. LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry,...) I have not been able to reproduce it just by doing CONFIG_BPF_LSM=y. Stephen has tested the patch though and confirms it solves the problem for him. dentry_init_security() is written in such a way that it expects only one LSM to register the hook. Atleast that's the expectation with current code. If another LSM returns a hook and returns default, it will simply return 0 as of now and that will break ceph. Hence, suggestion is that change semantics of this hook a bit. If there are no LSMs or no LSM is taking ownership and initializing security context, then return -EOPNOTSUP. Also allow at max one LSM to initialize security context. This hook can't deal with multiple LSMs trying to init security context. This patch implements this new behavior. Reported-by: Stephen Muth Tested-by: Stephen Muth Suggested-by: Casey Schaufler Acked-by: Casey Schaufler Reviewed-by: Serge Hallyn Cc: Jeff Layton Cc: Christian Brauner Cc: Paul Moore Cc: # 5.16.0 Signed-off-by: Vivek Goyal Reviewed-by: Jeff Layton Acked-by: Paul Moore Acked-by: Christian Brauner Signed-off-by: James Morris --- include/linux/lsm_hook_defs.h | 2 +- security/security.c | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index df8de62f4710..f0c7b352340a 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -82,7 +82,7 @@ LSM_HOOK(int, 0, sb_add_mnt_opt, const char *option, const char *val, int len, void **mnt_opts) LSM_HOOK(int, 0, move_mount, const struct path *from_path, const struct path *to_path) -LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry, +LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, void **ctx, u32 *ctxlen) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, diff --git a/security/security.c b/security/security.c index c88167a414b4..64abdfb20bc2 100644 --- a/security/security.c +++ b/security/security.c @@ -1056,8 +1056,19 @@ int security_dentry_init_security(struct dentry *dentry, int mode, const char **xattr_name, void **ctx, u32 *ctxlen) { - return call_int_hook(dentry_init_security, -EOPNOTSUPP, dentry, mode, - name, xattr_name, ctx, ctxlen); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.dentry_init_security, list) { + rc = hp->hook.dentry_init_security(dentry, mode, name, + xattr_name, ctx, ctxlen); + if (rc != LSM_RET_DEFAULT(dentry_init_security)) + return rc; + } + return LSM_RET_DEFAULT(dentry_init_security); } EXPORT_SYMBOL(security_dentry_init_security); From e45c47d1f94e0cc7b6b079fdb4bcce2995e2adc4 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 28 Jan 2022 10:58:39 -0500 Subject: [PATCH 290/367] block: add bio_start_io_acct_time() to control start_time bio_start_io_acct_time() interface is like bio_start_io_acct() that allows start_time to be passed in. This gives drivers the ability to defer starting accounting until after IO is issued (but possibily not entirely due to bio splitting). Reviewed-by: Christoph Hellwig Signed-off-by: Mike Snitzer Link: https://lore.kernel.org/r/20220128155841.39644-2-snitzer@redhat.com Signed-off-by: Jens Axboe --- block/blk-core.c | 25 +++++++++++++++++++------ include/linux/blkdev.h | 1 + 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 97f8bc8d3a79..d93e3bb9a769 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1061,21 +1061,33 @@ again: } static unsigned long __part_start_io_acct(struct block_device *part, - unsigned int sectors, unsigned int op) + unsigned int sectors, unsigned int op, + unsigned long start_time) { const int sgrp = op_stat_group(op); - unsigned long now = READ_ONCE(jiffies); part_stat_lock(); - update_io_ticks(part, now, false); + update_io_ticks(part, start_time, false); part_stat_inc(part, ios[sgrp]); part_stat_add(part, sectors[sgrp], sectors); part_stat_local_inc(part, in_flight[op_is_write(op)]); part_stat_unlock(); - return now; + return start_time; } +/** + * bio_start_io_acct_time - start I/O accounting for bio based drivers + * @bio: bio to start account for + * @start_time: start time that should be passed back to bio_end_io_acct(). + */ +void bio_start_io_acct_time(struct bio *bio, unsigned long start_time) +{ + __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), + bio_op(bio), start_time); +} +EXPORT_SYMBOL_GPL(bio_start_io_acct_time); + /** * bio_start_io_acct - start I/O accounting for bio based drivers * @bio: bio to start account for @@ -1084,14 +1096,15 @@ static unsigned long __part_start_io_acct(struct block_device *part, */ unsigned long bio_start_io_acct(struct bio *bio) { - return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), bio_op(bio)); + return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), + bio_op(bio), jiffies); } EXPORT_SYMBOL_GPL(bio_start_io_acct); unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, unsigned int op) { - return __part_start_io_acct(disk->part0, sectors, op); + return __part_start_io_acct(disk->part0, sectors, op, jiffies); } EXPORT_SYMBOL(disk_start_io_acct); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9c95df26fc26..f35aea98bc35 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1258,6 +1258,7 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time); +void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); unsigned long bio_start_io_acct(struct bio *bio); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, struct block_device *orig_bdev); From f524d9c95fab54783d0038f7a3e8c014d5b56857 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 28 Jan 2022 10:58:40 -0500 Subject: [PATCH 291/367] dm: revert partial fix for redundant bio-based IO accounting Reverts a1e1cb72d9649 ("dm: fix redundant IO accounting for bios that need splitting") because it was too narrow in scope (only addressed redundant 'sectors[]' accounting and not ios, nsecs[], etc). Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Link: https://lore.kernel.org/r/20220128155841.39644-3-snitzer@redhat.com Signed-off-by: Jens Axboe --- drivers/md/dm.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c0ae8087c602..9849114b3c08 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1442,9 +1442,6 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md, ci->sector = bio->bi_iter.bi_sector; } -#define __dm_part_stat_sub(part, field, subnd) \ - (part_stat_get(part, field) -= (subnd)) - /* * Entry point to split a bio into clones and submit them to the targets. */ @@ -1480,18 +1477,6 @@ static void __split_and_process_bio(struct mapped_device *md, GFP_NOIO, &md->queue->bio_split); ci.io->orig_bio = b; - /* - * Adjust IO stats for each split, otherwise upon queue - * reentry there will be redundant IO accounting. - * NOTE: this is a stop-gap fix, a proper fix involves - * significant refactoring of DM core's bio splitting - * (by eliminating DM's splitting and just using bio_split) - */ - part_stat_lock(); - __dm_part_stat_sub(dm_disk(md)->part0, - sectors[op_stat_group(bio_op(bio))], ci.sector_count); - part_stat_unlock(); - bio_chain(b, bio); trace_block_split(b, bio->bi_iter.bi_sector); submit_bio_noacct(bio); From b879f915bc48a18d4f4462729192435bb0f17052 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 28 Jan 2022 10:58:41 -0500 Subject: [PATCH 292/367] dm: properly fix redundant bio-based IO accounting Record the start_time for a bio but defer the starting block core's IO accounting until after IO is submitted using bio_start_io_acct_time(). This approach avoids the need to mess around with any of the individual IO stats in response to a bio_split() that follows bio submission. Reported-by: Bud Brown Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Depends-on: e45c47d1f94e ("block: add bio_start_io_acct_time() to control start_time") Signed-off-by: Mike Snitzer Link: https://lore.kernel.org/r/20220128155841.39644-4-snitzer@redhat.com Signed-off-by: Jens Axboe --- drivers/md/dm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 9849114b3c08..dcbd6d201619 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -489,7 +489,7 @@ static void start_io_acct(struct dm_io *io) struct mapped_device *md = io->md; struct bio *bio = io->orig_bio; - io->start_time = bio_start_io_acct(bio); + bio_start_io_acct_time(bio, io->start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), @@ -535,7 +535,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) io->md = md; spin_lock_init(&io->endio_lock); - start_io_acct(io); + io->start_time = jiffies; return io; } @@ -1482,6 +1482,7 @@ static void __split_and_process_bio(struct mapped_device *md, submit_bio_noacct(bio); } } + start_io_acct(ci.io); /* drop the extra reference count */ dm_io_dec_pending(ci.io, errno_to_blk_status(error)); From 5297c693d8c8e08fa742e3112cf70723f7a04da2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 27 Jan 2022 13:50:31 -0800 Subject: [PATCH 293/367] pinctrl: bcm2835: Fix a few error paths After commit 266423e60ea1 ("pinctrl: bcm2835: Change init order for gpio hogs") a few error paths would not unwind properly the registration of gpio ranges. Correct that by assigning a single error label and goto it whenever we encounter a fatal error. Fixes: 266423e60ea1 ("pinctrl: bcm2835: Change init order for gpio hogs") Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20220127215033.267227-1-f.fainelli@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-bcm2835.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index c4ebfa852b42..47e433e09c5c 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -1269,16 +1269,18 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) sizeof(*girq->parents), GFP_KERNEL); if (!girq->parents) { - pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); - return -ENOMEM; + err = -ENOMEM; + goto out_remove; } if (is_7211) { pc->wake_irq = devm_kcalloc(dev, BCM2835_NUM_IRQS, sizeof(*pc->wake_irq), GFP_KERNEL); - if (!pc->wake_irq) - return -ENOMEM; + if (!pc->wake_irq) { + err = -ENOMEM; + goto out_remove; + } } /* @@ -1306,8 +1308,10 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) len = strlen(dev_name(pc->dev)) + 16; name = devm_kzalloc(pc->dev, len, GFP_KERNEL); - if (!name) - return -ENOMEM; + if (!name) { + err = -ENOMEM; + goto out_remove; + } snprintf(name, len, "%s:bank%d", dev_name(pc->dev), i); @@ -1326,11 +1330,14 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) err = gpiochip_add_data(&pc->gpio_chip, pc); if (err) { dev_err(dev, "could not add GPIO chip\n"); - pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); - return err; + goto out_remove; } return 0; + +out_remove: + pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); + return err; } static struct platform_driver bcm2835_pinctrl_driver = { From 3a5286955bf5febc3d151bcb2c5e272e383b64aa Mon Sep 17 00:00:00 2001 From: Julian Braha Date: Mon, 17 Jan 2022 01:25:57 -0500 Subject: [PATCH 294/367] pinctrl: bcm63xx: fix unmet dependency on REGMAP for GPIO_REGMAP When PINCTRL_BCM63XX is selected, and REGMAP is not selected, Kbuild gives the following warning: WARNING: unmet direct dependencies detected for GPIO_REGMAP Depends on [n]: GPIOLIB [=y] && REGMAP [=n] Selected by [y]: - PINCTRL_BCM63XX [=y] && PINCTRL [=y] This is because PINCTRL_BCM63XX selects GPIO_REGMAP without selecting or depending on REGMAP, despite GPIO_REGMAP depending on REGMAP. This unmet dependency bug was detected by Kismet, a static analysis tool for Kconfig. Please advise if this is not the appropriate solution. Signed-off-by: Julian Braha Link: https://lore.kernel.org/r/20220117062557.89568-1-julianbraha@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/bcm/Kconfig b/drivers/pinctrl/bcm/Kconfig index 5123f4c33854..ac1e400bbbac 100644 --- a/drivers/pinctrl/bcm/Kconfig +++ b/drivers/pinctrl/bcm/Kconfig @@ -35,6 +35,7 @@ config PINCTRL_BCM63XX select PINCONF select GENERIC_PINCONF select GPIOLIB + select REGMAP select GPIO_REGMAP config PINCTRL_BCM6318 From 6cb917411e028dcb66ce8f5db1b47361b78d7d3f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 29 Jan 2022 13:40:52 -0800 Subject: [PATCH 295/367] include/linux/sysctl.h: fix register_sysctl_mount_point() return type The CONFIG_SYSCTL=n stub returns the wrong type. Fixes: ee9efac48a082 ("sysctl: add helper to register a sysctl mount point") Reported-by: kernel test robot Acked-by: Luis Chamberlain Cc: Tong Zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 180adf7da785..6353d6db69b2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -265,7 +265,7 @@ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * return NULL; } -static inline struct sysctl_header *register_sysctl_mount_point(const char *path) +static inline struct ctl_table_header *register_sysctl_mount_point(const char *path) { return NULL; } From e7f1e8834b2b2144dfbe0b2235d05e4f6f95882e Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sat, 29 Jan 2022 13:40:55 -0800 Subject: [PATCH 296/367] binfmt_misc: fix crash when load/unload module We should unregister the table upon module unload otherwise something horrible will happen when we load binfmt_misc module again. Also note that we should keep value returned by register_sysctl_mount_point() and release it later, otherwise it will leak. Also, per Christian's comment, to fully restore the old behavior that won't break userspace the check(binfmt_misc_header) should be eliminated. To reproduce: modprobe binfmt_misc modprobe -r binfmt_misc modprobe binfmt_misc modprobe -r binfmt_misc modprobe binfmt_misc resulting in modprobe: can't load module binfmt_misc (kernel/fs/binfmt_misc.ko): Cannot allocate memory and an unhappy kernel: binfmt_misc: Failed to create fs/binfmt_misc sysctl mount point binfmt_misc: Failed to create fs/binfmt_misc sysctl mount point BUG: unable to handle page fault for address: fffffbfff8004802 Call Trace: init_misc_binfmt+0x2d/0x1000 [binfmt_misc] Link: https://lkml.kernel.org/r/20220124181812.1869535-2-ztong0001@gmail.com Fixes: 3ba442d5331f ("fs: move binfmt_misc sysctl to its own file") Signed-off-by: Tong Zhang Co-developed-by: Christian Brauner Acked-by: Luis Chamberlain Cc: Eric Biederman Cc: Kees Cook Cc: Iurii Zaikin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index ddea6acbddde..c07f35719ee3 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -817,20 +817,20 @@ static struct file_system_type bm_fs_type = { }; MODULE_ALIAS_FS("binfmt_misc"); +static struct ctl_table_header *binfmt_misc_header; + static int __init init_misc_binfmt(void) { int err = register_filesystem(&bm_fs_type); if (!err) insert_binfmt(&misc_format); - if (!register_sysctl_mount_point("fs/binfmt_misc")) { - pr_warn("Failed to create fs/binfmt_misc sysctl mount point"); - return -ENOMEM; - } + binfmt_misc_header = register_sysctl_mount_point("fs/binfmt_misc"); return 0; } static void __exit exit_misc_binfmt(void) { + unregister_sysctl_table(binfmt_misc_header); unregister_binfmt(&misc_format); unregister_filesystem(&bm_fs_type); } From dbecf9b8b8ce580f4e11afed9d61e8aa294cddd2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 29 Jan 2022 13:40:58 -0800 Subject: [PATCH 297/367] ia64: make IA64_MCA_RECOVERY bool instead of tristate In linux-next, IA64_MCA_RECOVERY uses the (new) function make_task_dead(), which is not exported for use by modules. Instead of exporting it for one user, convert IA64_MCA_RECOVERY to be a bool Kconfig symbol. In a config file from "kernel test robot " for a different problem, this linker error was exposed when CONFIG_IA64_MCA_RECOVERY=m. Fixes this build error: ERROR: modpost: "make_task_dead" [arch/ia64/kernel/mca_recovery.ko] undefined! Link: https://lkml.kernel.org/r/20220124213129.29306-1-rdunlap@infradead.org Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") Signed-off-by: Randy Dunlap Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: "Eric W. Biederman" Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 703952819e10..a7e01573abd8 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -318,7 +318,7 @@ config ARCH_PROC_KCORE_TEXT depends on PROC_KCORE config IA64_MCA_RECOVERY - tristate "MCA recovery from errors other than TLB." + bool "MCA recovery from errors other than TLB." config IA64_PALINFO tristate "/proc/pal support" From 61e28cf0543c7d8e6ef88c3c305f727c5a21ba5b Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Sat, 29 Jan 2022 13:41:01 -0800 Subject: [PATCH 298/367] memory-failure: fetch compound_head after pgmap_pfn_valid() memory_failure_dev_pagemap() at the moment assumes base pages (e.g. dax_lock_page()). For devmap with compound pages fetch the compound_head in case a tail page memory failure is being handled. Currently this is a nop, but in the advent of compound pages in dev_pagemap it allows memory_failure_dev_pagemap() to keep working. Without this fix memory-failure handling (i.e. MCEs on pmem) with device-dax configured namespaces will regress (and crash). Link: https://lkml.kernel.org/r/20211202204422.26777-2-joao.m.martins@oracle.com Reported-by: Jane Chu Signed-off-by: Joao Martins Reviewed-by: Naoya Horiguchi Reviewed-by: Dan Williams Reviewed-by: Muchun Song Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 14ae5c18e776..97a9ed8f87a9 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1595,6 +1595,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, goto out; } + /* + * Pages instantiated by device-dax (not filesystem-dax) + * may be compound pages. + */ + page = compound_head(page); + /* * Prevent the inode from being freed while we are interrogating * the address_space, typically this would be handled by From 536f4217ced62b671bd759f6b549621a5654a70f Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 29 Jan 2022 13:41:04 -0800 Subject: [PATCH 299/367] mm: page->mapping folio->mapping should have the same offset As with the other members of folio, the offset of page->mapping and folio->mapping must be the same. The compile-time check was inadvertently removed during development. Add it back. [willy@infradead.org: changelog redo] Link: https://lkml.kernel.org/r/20220104011734.21714-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9db36dc5d4cf..5140e5feb486 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -261,6 +261,7 @@ static_assert(sizeof(struct page) == sizeof(struct folio)); static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl)) FOLIO_MATCH(flags, flags); FOLIO_MATCH(lru, lru); +FOLIO_MATCH(mapping, mapping); FOLIO_MATCH(compound_head, lru); FOLIO_MATCH(index, index); FOLIO_MATCH(private, private); From 0226bd64da52aa23120d1450c37a424387827a21 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sat, 29 Jan 2022 13:41:07 -0800 Subject: [PATCH 300/367] tools/testing/scatterlist: add missing defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cited commits replaced preemptible with pagefault_disabled and flush_kernel_dcache_page with flush_dcache_page respectively, hence need to update the corresponding defines in the test. scatterlist.c: In function ‘sg_miter_stop’: scatterlist.c:919:4: warning: implicit declaration of function ‘flush_dcache_page’ [-Wimplicit-function-declaration] flush_dcache_page(miter->page); ^~~~~~~~~~~~~~~~~ In file included from linux/scatterlist.h:8:0, from scatterlist.c:9: scatterlist.c:922:18: warning: implicit declaration of function ‘pagefault_disabled’ [-Wimplicit-function-declaration] WARN_ON_ONCE(!pagefault_disabled()); ^ linux/mm.h:23:25: note: in definition of macro ‘WARN_ON_ONCE’ int __ret_warn_on = !!(condition); \ ^~~~~~~~~ Link: https://lkml.kernel.org/r/20220118082105.1737320-1-maorg@nvidia.com Fixes: 723aca208516 ("mm/scatterlist: replace the !preemptible warning in sg_miter_stop()") Fixes: 0e84f5dbf8d6 ("scatterlist: replace flush_kernel_dcache_page with flush_dcache_page") Signed-off-by: Maor Gottlieb Tested-by: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/scatterlist/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index 16ec895bbe5f..5bd9e6e80625 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -74,7 +74,7 @@ static inline unsigned long page_to_phys(struct page *page) __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ x, y) -#define preemptible() (1) +#define pagefault_disabled() (0) static inline void *kmap(struct page *page) { @@ -127,6 +127,7 @@ kmalloc_array(unsigned int n, unsigned int size, unsigned int flags) #define kmemleak_free(a) #define PageSlab(p) (0) +#define flush_dcache_page(p) #define MAX_ERRNO 4095 From 09c6304e38e440b93a9ebf3f3cf75cd6cb529f91 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Sat, 29 Jan 2022 13:41:11 -0800 Subject: [PATCH 301/367] kasan: test: fix compatibility with FORTIFY_SOURCE With CONFIG_FORTIFY_SOURCE enabled, string functions will also perform dynamic checks using __builtin_object_size(ptr), which when failed will panic the kernel. Because the KASAN test deliberately performs out-of-bounds operations, the kernel panics with FORTIFY_SOURCE, for example: | kernel BUG at lib/string_helpers.c:910! | invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI | CPU: 1 PID: 137 Comm: kunit_try_catch Tainted: G B 5.16.0-rc3+ #3 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 | RIP: 0010:fortify_panic+0x19/0x1b | ... | Call Trace: | kmalloc_oob_in_memset.cold+0x16/0x16 | ... Fix it by also hiding `ptr` from the optimizer, which will ensure that __builtin_object_size() does not return a valid size, preventing fortified string functions from panicking. Link: https://lkml.kernel.org/r/20220124160744.1244685-1-elver@google.com Signed-off-by: Marco Elver Reported-by: Nico Pache Reviewed-by: Nico Pache Reviewed-by: Andrey Konovalov Reviewed-by: Kees Cook Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Brendan Higgins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 847cdbefab46..26a5c9007653 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -492,6 +492,7 @@ static void kmalloc_oob_in_memset(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size + KASAN_GRANULE_SIZE)); @@ -515,6 +516,7 @@ static void kmalloc_memmove_negative_size(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset((char *)ptr, 0, 64); + OPTIMIZER_HIDE_VAR(ptr); OPTIMIZER_HIDE_VAR(invalid_size); KUNIT_EXPECT_KASAN_FAIL(test, memmove((char *)ptr, (char *)ptr + 4, invalid_size)); @@ -531,6 +533,7 @@ static void kmalloc_memmove_invalid_size(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset((char *)ptr, 0, 64); + OPTIMIZER_HIDE_VAR(ptr); KUNIT_EXPECT_KASAN_FAIL(test, memmove((char *)ptr, (char *)ptr + 4, invalid_size)); kfree(ptr); @@ -893,6 +896,7 @@ static void kasan_memchr(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, kasan_ptr_result = memchr(ptr, '1', size + 1)); @@ -919,6 +923,7 @@ static void kasan_memcmp(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset(arr, 0, sizeof(arr)); + OPTIMIZER_HIDE_VAR(ptr); OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = memcmp(ptr, arr, size+1)); From 27fe73394a1c6d0b07fa4d95f1bca116d1cc66e9 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Sat, 29 Jan 2022 13:41:14 -0800 Subject: [PATCH 302/367] mm, kasan: use compare-exchange operation to set KASAN page tag It has been reported that the tag setting operation on newly-allocated pages can cause the page flags to be corrupted when performed concurrently with other flag updates as a result of the use of non-atomic operations. Fix the problem by using a compare-exchange loop to update the tag. Link: https://lkml.kernel.org/r/20220120020148.1632253-1-pcc@google.com Link: https://linux-review.googlesource.com/id/I456b24a2b9067d93968d43b4bb3351c0cec63101 Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc") Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Cc: Peter Zijlstra Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e1a84b1e6787..213cc569b192 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1506,11 +1506,18 @@ static inline u8 page_kasan_tag(const struct page *page) static inline void page_kasan_tag_set(struct page *page, u8 tag) { - if (kasan_enabled()) { - tag ^= 0xff; - page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); - page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; - } + unsigned long old_flags, flags; + + if (!kasan_enabled()) + return; + + tag ^= 0xff; + old_flags = READ_ONCE(page->flags); + do { + flags = old_flags; + flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); + flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; + } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); } static inline void page_kasan_tag_reset(struct page *page) From 51e50fbd3efc6064c30ed73a5e009018b36e290a Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 29 Jan 2022 13:41:17 -0800 Subject: [PATCH 303/367] psi: fix "no previous prototype" warnings when CONFIG_CGROUPS=n When CONFIG_CGROUPS is disabled psi code generates the following warnings: kernel/sched/psi.c:1112:21: warning: no previous prototype for 'psi_trigger_create' [-Wmissing-prototypes] 1112 | struct psi_trigger *psi_trigger_create(struct psi_group *group, | ^~~~~~~~~~~~~~~~~~ kernel/sched/psi.c:1182:6: warning: no previous prototype for 'psi_trigger_destroy' [-Wmissing-prototypes] 1182 | void psi_trigger_destroy(struct psi_trigger *t) | ^~~~~~~~~~~~~~~~~~~ kernel/sched/psi.c:1249:10: warning: no previous prototype for 'psi_trigger_poll' [-Wmissing-prototypes] 1249 | __poll_t psi_trigger_poll(void **trigger_ptr, | ^~~~~~~~~~~~~~~~ Change the declarations of these functions in the header to provide the prototypes even when they are unused. Link: https://lkml.kernel.org/r/20220119223940.787748-2-surenb@google.com Fixes: 0e94682b73bf ("psi: introduce psi monitor") Signed-off-by: Suren Baghdasaryan Reported-by: kernel test robot Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/psi.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/include/linux/psi.h b/include/linux/psi.h index f8ce53bfdb2a..7f7d1d88c3bb 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -25,18 +25,17 @@ void psi_memstall_enter(unsigned long *flags); void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); - -#ifdef CONFIG_CGROUPS -int psi_cgroup_alloc(struct cgroup *cgrp); -void psi_cgroup_free(struct cgroup *cgrp); -void cgroup_move_task(struct task_struct *p, struct css_set *to); - struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res); void psi_trigger_destroy(struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait); + +#ifdef CONFIG_CGROUPS +int psi_cgroup_alloc(struct cgroup *cgrp); +void psi_cgroup_free(struct cgroup *cgrp); +void cgroup_move_task(struct task_struct *p, struct css_set *to); #endif #else /* CONFIG_PSI */ From 44585f7bc0cb01095bc2ad4258049c02bbad21ef Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 29 Jan 2022 13:41:20 -0800 Subject: [PATCH 304/367] psi: fix "defined but not used" warnings when CONFIG_PROC_FS=n When CONFIG_PROC_FS is disabled psi code generates the following warnings: kernel/sched/psi.c:1364:30: warning: 'psi_cpu_proc_ops' defined but not used [-Wunused-const-variable=] 1364 | static const struct proc_ops psi_cpu_proc_ops = { | ^~~~~~~~~~~~~~~~ kernel/sched/psi.c:1355:30: warning: 'psi_memory_proc_ops' defined but not used [-Wunused-const-variable=] 1355 | static const struct proc_ops psi_memory_proc_ops = { | ^~~~~~~~~~~~~~~~~~~ kernel/sched/psi.c:1346:30: warning: 'psi_io_proc_ops' defined but not used [-Wunused-const-variable=] 1346 | static const struct proc_ops psi_io_proc_ops = { | ^~~~~~~~~~~~~~~ Make definitions of these structures and related functions conditional on CONFIG_PROC_FS config. Link: https://lkml.kernel.org/r/20220119223940.787748-3-surenb@google.com Fixes: 0e94682b73bf ("psi: introduce psi monitor") Signed-off-by: Suren Baghdasaryan Reported-by: kernel test robot Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/psi.c | 79 ++++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index c137c4d6983e..e14358178849 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1082,44 +1082,6 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) return 0; } -static int psi_io_show(struct seq_file *m, void *v) -{ - return psi_show(m, &psi_system, PSI_IO); -} - -static int psi_memory_show(struct seq_file *m, void *v) -{ - return psi_show(m, &psi_system, PSI_MEM); -} - -static int psi_cpu_show(struct seq_file *m, void *v) -{ - return psi_show(m, &psi_system, PSI_CPU); -} - -static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *)) -{ - if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE)) - return -EPERM; - - return single_open(file, psi_show, NULL); -} - -static int psi_io_open(struct inode *inode, struct file *file) -{ - return psi_open(file, psi_io_show); -} - -static int psi_memory_open(struct inode *inode, struct file *file) -{ - return psi_open(file, psi_memory_show); -} - -static int psi_cpu_open(struct inode *inode, struct file *file) -{ - return psi_open(file, psi_cpu_show); -} - struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res) { @@ -1278,6 +1240,45 @@ __poll_t psi_trigger_poll(void **trigger_ptr, return ret; } +#ifdef CONFIG_PROC_FS +static int psi_io_show(struct seq_file *m, void *v) +{ + return psi_show(m, &psi_system, PSI_IO); +} + +static int psi_memory_show(struct seq_file *m, void *v) +{ + return psi_show(m, &psi_system, PSI_MEM); +} + +static int psi_cpu_show(struct seq_file *m, void *v) +{ + return psi_show(m, &psi_system, PSI_CPU); +} + +static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *)) +{ + if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + return single_open(file, psi_show, NULL); +} + +static int psi_io_open(struct inode *inode, struct file *file) +{ + return psi_open(file, psi_io_show); +} + +static int psi_memory_open(struct inode *inode, struct file *file) +{ + return psi_open(file, psi_memory_show); +} + +static int psi_cpu_open(struct inode *inode, struct file *file) +{ + return psi_open(file, psi_cpu_show); +} + static ssize_t psi_write(struct file *file, const char __user *user_buf, size_t nbytes, enum psi_res res) { @@ -1392,3 +1393,5 @@ static int __init psi_proc_init(void) return 0; } module_init(psi_proc_init); + +#endif /* CONFIG_PROC_FS */ From 4cd1103d8c66b2cdb7e64385c274edb0ac5e8887 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Sat, 29 Jan 2022 13:41:23 -0800 Subject: [PATCH 305/367] jbd2: export jbd2_journal_[grab|put]_journal_head Patch series "ocfs2: fix a deadlock case". This fixes a deadlock case in ocfs2. We firstly export jbd2 symbols jbd2_journal_[grab|put]_journal_head as preparation and later use them in ocfs2 insread of jbd_[lock|unlock]_bh_journal_head to fix the deadlock. This patch (of 2): This exports symbols jbd2_journal_[grab|put]_journal_head, which will be used outside modules, e.g. ocfs2. Link: https://lkml.kernel.org/r/20220121071205.100648-2-joseph.qi@linux.alibaba.com Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Andreas Dilger Cc: Gautham Ananthakrishna Cc: Saeed Mirzamohammadi Cc: "Theodore Ts'o" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd2/journal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index f13d548e4a7f..bf108d4493d2 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2972,6 +2972,7 @@ struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh) jbd_unlock_bh_journal_head(bh); return jh; } +EXPORT_SYMBOL(jbd2_journal_grab_journal_head); static void __journal_remove_journal_head(struct buffer_head *bh) { @@ -3024,6 +3025,7 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) jbd_unlock_bh_journal_head(bh); } } +EXPORT_SYMBOL(jbd2_journal_put_journal_head); /* * Initialize jbd inode head From ddf4b773aa40790dfa936bd845c18e735a49c61c Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Sat, 29 Jan 2022 13:41:27 -0800 Subject: [PATCH 306/367] ocfs2: fix a deadlock when commit trans commit 6f1b228529ae introduces a regression which can deadlock as follows: Task1: Task2: jbd2_journal_commit_transaction ocfs2_test_bg_bit_allocatable spin_lock(&jh->b_state_lock) jbd_lock_bh_journal_head __jbd2_journal_remove_checkpoint spin_lock(&jh->b_state_lock) jbd2_journal_put_journal_head jbd_lock_bh_journal_head Task1 and Task2 lock bh->b_state and jh->b_state_lock in different order, which finally result in a deadlock. So use jbd2_journal_[grab|put]_journal_head instead in ocfs2_test_bg_bit_allocatable() to fix it. Link: https://lkml.kernel.org/r/20220121071205.100648-3-joseph.qi@linux.alibaba.com Fixes: 6f1b228529ae ("ocfs2: fix race between searching chunks and release journal_head from buffer_head") Signed-off-by: Joseph Qi Reported-by: Gautham Ananthakrishna Tested-by: Gautham Ananthakrishna Reported-by: Saeed Mirzamohammadi Cc: "Theodore Ts'o" Cc: Andreas Dilger Cc: Changwei Ge Cc: Gang He Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/suballoc.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 481017e1dac5..166c8918c825 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1251,26 +1251,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, { struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct journal_head *jh; - int ret = 1; + int ret; if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) return 0; - if (!buffer_jbd(bg_bh)) + jh = jbd2_journal_grab_journal_head(bg_bh); + if (!jh) return 1; - jbd_lock_bh_journal_head(bg_bh); - if (buffer_jbd(bg_bh)) { - jh = bh2jh(bg_bh); - spin_lock(&jh->b_state_lock); - bg = (struct ocfs2_group_desc *) jh->b_committed_data; - if (bg) - ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); - else - ret = 1; - spin_unlock(&jh->b_state_lock); - } - jbd_unlock_bh_journal_head(bg_bh); + spin_lock(&jh->b_state_lock); + bg = (struct ocfs2_group_desc *) jh->b_committed_data; + if (bg) + ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); + else + ret = 1; + spin_unlock(&jh->b_state_lock); + jbd2_journal_put_journal_head(jh); return ret; } From 26291c54e111ff6ba87a164d85d4a4e134b7315c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 30 Jan 2022 15:37:07 +0200 Subject: [PATCH 307/367] Linux 5.17-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0fb4f94a6885..1fc3491096cb 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Gobble Gobble # *DOCUMENTATION* From 341adeec9adad0874f29a0a1af35638207352a39 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Wed, 26 Jan 2022 23:33:04 +0800 Subject: [PATCH 308/367] net/smc: Forward wakeup to smc socket waitqueue after fallback When we replace TCP with SMC and a fallback occurs, there may be some socket waitqueue entries remaining in smc socket->wq, such as eppoll_entries inserted by userspace applications. After the fallback, data flows over TCP/IP and only clcsocket->wq will be woken up. Applications can't be notified by the entries which were inserted in smc socket->wq before fallback. So we need a mechanism to wake up smc socket->wq at the same time if some entries remaining in it. The current workaround is to transfer the entries from smc socket->wq to clcsock->wq during the fallback. But this may cause a crash like this: general protection fault, probably for non-canonical address 0xdead000000000100: 0000 [#1] PREEMPT SMP PTI CPU: 3 PID: 0 Comm: swapper/3 Kdump: loaded Tainted: G E 5.16.0+ #107 RIP: 0010:__wake_up_common+0x65/0x170 Call Trace: __wake_up_common_lock+0x7a/0xc0 sock_def_readable+0x3c/0x70 tcp_data_queue+0x4a7/0xc40 tcp_rcv_established+0x32f/0x660 ? sk_filter_trim_cap+0xcb/0x2e0 tcp_v4_do_rcv+0x10b/0x260 tcp_v4_rcv+0xd2a/0xde0 ip_protocol_deliver_rcu+0x3b/0x1d0 ip_local_deliver_finish+0x54/0x60 ip_local_deliver+0x6a/0x110 ? tcp_v4_early_demux+0xa2/0x140 ? tcp_v4_early_demux+0x10d/0x140 ip_sublist_rcv_finish+0x49/0x60 ip_sublist_rcv+0x19d/0x230 ip_list_rcv+0x13e/0x170 __netif_receive_skb_list_core+0x1c2/0x240 netif_receive_skb_list_internal+0x1e6/0x320 napi_complete_done+0x11d/0x190 mlx5e_napi_poll+0x163/0x6b0 [mlx5_core] __napi_poll+0x3c/0x1b0 net_rx_action+0x27c/0x300 __do_softirq+0x114/0x2d2 irq_exit_rcu+0xb4/0xe0 common_interrupt+0xba/0xe0 The crash is caused by privately transferring waitqueue entries from smc socket->wq to clcsock->wq. The owners of these entries, such as epoll, have no idea that the entries have been transferred to a different socket wait queue and still use original waitqueue spinlock (smc socket->wq.wait.lock) to make the entries operation exclusive, but it doesn't work. The operations to the entries, such as removing from the waitqueue (now is clcsock->wq after fallback), may cause a crash when clcsock waitqueue is being iterated over at the moment. This patch tries to fix this by no longer transferring wait queue entries privately, but introducing own implementations of clcsock's callback functions in fallback situation. The callback functions will forward the wakeup to smc socket->wq if clcsock->wq is actually woken up and smc socket->wq has remaining entries. Fixes: 2153bd1e3d3d ("net/smc: Transfer remaining wait queue entries during fallback") Suggested-by: Karsten Graul Signed-off-by: Wen Gu Acked-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 133 +++++++++++++++++++++++++++++++++++++++++------ net/smc/smc.h | 20 ++++++- 2 files changed, 137 insertions(+), 16 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d5ea62b82bb8..8c89d0b0ca18 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -566,17 +566,115 @@ static void smc_stat_fallback(struct smc_sock *smc) mutex_unlock(&net->smc.mutex_fback_rsn); } +/* must be called under rcu read lock */ +static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key) +{ + struct socket_wq *wq; + __poll_t flags; + + wq = rcu_dereference(smc->sk.sk_wq); + if (!skwq_has_sleeper(wq)) + return; + + /* wake up smc sk->sk_wq */ + if (!key) { + /* sk_state_change */ + wake_up_interruptible_all(&wq->wait); + } else { + flags = key_to_poll(key); + if (flags & (EPOLLIN | EPOLLOUT)) + /* sk_data_ready or sk_write_space */ + wake_up_interruptible_sync_poll(&wq->wait, flags); + else if (flags & EPOLLERR) + /* sk_error_report */ + wake_up_interruptible_poll(&wq->wait, flags); + } +} + +static int smc_fback_mark_woken(wait_queue_entry_t *wait, + unsigned int mode, int sync, void *key) +{ + struct smc_mark_woken *mark = + container_of(wait, struct smc_mark_woken, wait_entry); + + mark->woken = true; + mark->key = key; + return 0; +} + +static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk, + void (*clcsock_callback)(struct sock *sk)) +{ + struct smc_mark_woken mark = { .woken = false }; + struct socket_wq *wq; + + init_waitqueue_func_entry(&mark.wait_entry, + smc_fback_mark_woken); + rcu_read_lock(); + wq = rcu_dereference(clcsk->sk_wq); + if (!wq) + goto out; + add_wait_queue(sk_sleep(clcsk), &mark.wait_entry); + clcsock_callback(clcsk); + remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry); + + if (mark.woken) + smc_fback_wakeup_waitqueue(smc, mark.key); +out: + rcu_read_unlock(); +} + +static void smc_fback_state_change(struct sock *clcsk) +{ + struct smc_sock *smc = + smc_clcsock_user_data(clcsk); + + if (!smc) + return; + smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change); +} + +static void smc_fback_data_ready(struct sock *clcsk) +{ + struct smc_sock *smc = + smc_clcsock_user_data(clcsk); + + if (!smc) + return; + smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready); +} + +static void smc_fback_write_space(struct sock *clcsk) +{ + struct smc_sock *smc = + smc_clcsock_user_data(clcsk); + + if (!smc) + return; + smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space); +} + +static void smc_fback_error_report(struct sock *clcsk) +{ + struct smc_sock *smc = + smc_clcsock_user_data(clcsk); + + if (!smc) + return; + smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report); +} + static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) { - wait_queue_head_t *smc_wait = sk_sleep(&smc->sk); - wait_queue_head_t *clc_wait; - unsigned long flags; + struct sock *clcsk; mutex_lock(&smc->clcsock_release_lock); if (!smc->clcsock) { mutex_unlock(&smc->clcsock_release_lock); return -EBADF; } + clcsk = smc->clcsock->sk; + smc->use_fallback = true; smc->fallback_rsn = reason_code; smc_stat_fallback(smc); @@ -587,16 +685,22 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) smc->clcsock->wq.fasync_list = smc->sk.sk_socket->wq.fasync_list; - /* There may be some entries remaining in - * smc socket->wq, which should be removed - * to clcsocket->wq during the fallback. + /* There might be some wait entries remaining + * in smc sk->sk_wq and they should be woken up + * as clcsock's wait queue is woken up. */ - clc_wait = sk_sleep(smc->clcsock->sk); - spin_lock_irqsave(&smc_wait->lock, flags); - spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING); - list_splice_init(&smc_wait->head, &clc_wait->head); - spin_unlock(&clc_wait->lock); - spin_unlock_irqrestore(&smc_wait->lock, flags); + smc->clcsk_state_change = clcsk->sk_state_change; + smc->clcsk_data_ready = clcsk->sk_data_ready; + smc->clcsk_write_space = clcsk->sk_write_space; + smc->clcsk_error_report = clcsk->sk_error_report; + + clcsk->sk_state_change = smc_fback_state_change; + clcsk->sk_data_ready = smc_fback_data_ready; + clcsk->sk_write_space = smc_fback_write_space; + clcsk->sk_error_report = smc_fback_error_report; + + smc->clcsock->sk->sk_user_data = + (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); } mutex_unlock(&smc->clcsock_release_lock); return 0; @@ -2115,10 +2219,9 @@ out: static void smc_clcsock_data_ready(struct sock *listen_clcsock) { - struct smc_sock *lsmc; + struct smc_sock *lsmc = + smc_clcsock_user_data(listen_clcsock); - lsmc = (struct smc_sock *) - ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY); if (!lsmc) return; lsmc->clcsk_data_ready(listen_clcsock); diff --git a/net/smc/smc.h b/net/smc/smc.h index 3d0b8e300deb..37b2001a0255 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -139,6 +139,12 @@ enum smc_urg_state { SMC_URG_READ = 3, /* data was already read */ }; +struct smc_mark_woken { + bool woken; + void *key; + wait_queue_entry_t wait_entry; +}; + struct smc_connection { struct rb_node alert_node; struct smc_link_group *lgr; /* link group of connection */ @@ -228,8 +234,14 @@ struct smc_connection { struct smc_sock { /* smc sock container */ struct sock sk; struct socket *clcsock; /* internal tcp socket */ + void (*clcsk_state_change)(struct sock *sk); + /* original stat_change fct. */ void (*clcsk_data_ready)(struct sock *sk); - /* original data_ready fct. **/ + /* original data_ready fct. */ + void (*clcsk_write_space)(struct sock *sk); + /* original write_space fct. */ + void (*clcsk_error_report)(struct sock *sk); + /* original error_report fct. */ struct smc_connection conn; /* smc connection */ struct smc_sock *listen_smc; /* listen parent */ struct work_struct connect_work; /* handle non-blocking connect*/ @@ -264,6 +276,12 @@ static inline struct smc_sock *smc_sk(const struct sock *sk) return (struct smc_sock *)sk; } +static inline struct smc_sock *smc_clcsock_user_data(struct sock *clcsk) +{ + return (struct smc_sock *) + ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY); +} + extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ extern struct workqueue_struct *smc_close_wq; /* wq for close work */ From baf927a833ca2c6717795ac131079f485cb7a5dc Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 31 Jan 2022 09:52:01 +0100 Subject: [PATCH 309/367] pinctrl: microchip-sgpio: Fix support for regmap Initially the driver accessed the registers using u32 __iomem but then in the blamed commit it changed it to use regmap. The problem is that now the offset of the registers is not calculated anymore at word offset but at byte offset. Therefore make sure to multiply the offset with word size. Acked-by: Steen Hegelund Reviewed-by: Colin Foster Fixes: 2afbbab45c261a ("pinctrl: microchip-sgpio: update to support regmap") Signed-off-by: Horatiu Vultur Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220131085201.307031-1-horatiu.vultur@microchip.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-microchip-sgpio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c index 8e081c90bdb2..639f1130e989 100644 --- a/drivers/pinctrl/pinctrl-microchip-sgpio.c +++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c @@ -137,7 +137,8 @@ static inline int sgpio_addr_to_pin(struct sgpio_priv *priv, int port, int bit) static inline u32 sgpio_get_addr(struct sgpio_priv *priv, u32 rno, u32 off) { - return priv->properties->regoff[rno] + off; + return (priv->properties->regoff[rno] + off) * + regmap_get_reg_stride(priv->regs); } static u32 sgpio_readl(struct sgpio_priv *priv, u32 rno, u32 off) From f83a96e5f033fbbd21764705cb9c04234b96218e Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 31 Jan 2022 15:17:08 +0100 Subject: [PATCH 310/367] spi: mediatek: Avoid NULL pointer crash in interrupt In some case, like after a transfer timeout, master->cur_msg pointer is NULL which led to a kernel crash when trying to use master->cur_msg->spi. mtk_spi_can_dma(), pointed by master->can_dma, doesn't use this parameter avoid the problem by setting NULL as second parameter. Fixes: a568231f46322 ("spi: mediatek: Add spi bus for Mediatek MT8173") Signed-off-by: Benjamin Gaignard Link: https://lore.kernel.org/r/20220131141708.888710-1-benjamin.gaignard@collabora.com Signed-off-by: Mark Brown --- drivers/spi/spi-mt65xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index a15de10ee286..753bd313e6fd 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -624,7 +624,7 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) else mdata->state = MTK_SPI_IDLE; - if (!master->can_dma(master, master->cur_msg->spi, trans)) { + if (!master->can_dma(master, NULL, trans)) { if (trans->rx_buf) { cnt = mdata->xfer_len / 4; ioread32_rep(mdata->base + SPI_RX_DATA_REG, From ec049891b2dc16591813eacaddc476b3d27c8c14 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 31 Jan 2022 11:34:05 +0000 Subject: [PATCH 311/367] kselftest: Fix vdso_test_abi return status vdso_test_abi contains a batch of tests that verify the validity of the vDSO ABI. When a vDSO symbol is not found the relevant test is skipped reporting KSFT_SKIP. All the tests return values are then added in a single variable which is checked to verify failures. This approach can have side effects which result in reporting the wrong kselftest exit status. Fix vdso_test_abi verifying the return code of each test separately. Cc: Shuah Khan Cc: Andy Lutomirski Cc: Thomas Gleixner Reported-by: Cristian Marussi Signed-off-by: Vincenzo Frascino Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/vdso_test_abi.c | 135 +++++++++---------- 1 file changed, 62 insertions(+), 73 deletions(-) diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c index 3d603f1394af..883ca85424bc 100644 --- a/tools/testing/selftests/vDSO/vdso_test_abi.c +++ b/tools/testing/selftests/vDSO/vdso_test_abi.c @@ -33,110 +33,114 @@ typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts); typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts); typedef time_t (*vdso_time_t)(time_t *t); -static int vdso_test_gettimeofday(void) +#define VDSO_TEST_PASS_MSG() "\n%s(): PASS\n", __func__ +#define VDSO_TEST_FAIL_MSG(x) "\n%s(): %s FAIL\n", __func__, x +#define VDSO_TEST_SKIP_MSG(x) "\n%s(): SKIP: Could not find %s\n", __func__, x + +static void vdso_test_gettimeofday(void) { /* Find gettimeofday. */ vdso_gettimeofday_t vdso_gettimeofday = (vdso_gettimeofday_t)vdso_sym(version, name[0]); if (!vdso_gettimeofday) { - printf("Could not find %s\n", name[0]); - return KSFT_SKIP; + ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[0])); + return; } struct timeval tv; long ret = vdso_gettimeofday(&tv, 0); if (ret == 0) { - printf("The time is %lld.%06lld\n", - (long long)tv.tv_sec, (long long)tv.tv_usec); + ksft_print_msg("The time is %lld.%06lld\n", + (long long)tv.tv_sec, (long long)tv.tv_usec); + ksft_test_result_pass(VDSO_TEST_PASS_MSG()); } else { - printf("%s failed\n", name[0]); - return KSFT_FAIL; + ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[0])); } - - return KSFT_PASS; } -static int vdso_test_clock_gettime(clockid_t clk_id) +static void vdso_test_clock_gettime(clockid_t clk_id) { /* Find clock_gettime. */ vdso_clock_gettime_t vdso_clock_gettime = (vdso_clock_gettime_t)vdso_sym(version, name[1]); if (!vdso_clock_gettime) { - printf("Could not find %s\n", name[1]); - return KSFT_SKIP; + ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[1])); + return; } struct timespec ts; long ret = vdso_clock_gettime(clk_id, &ts); if (ret == 0) { - printf("The time is %lld.%06lld\n", - (long long)ts.tv_sec, (long long)ts.tv_nsec); + ksft_print_msg("The time is %lld.%06lld\n", + (long long)ts.tv_sec, (long long)ts.tv_nsec); + ksft_test_result_pass(VDSO_TEST_PASS_MSG()); } else { - printf("%s failed\n", name[1]); - return KSFT_FAIL; + ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[1])); } - - return KSFT_PASS; } -static int vdso_test_time(void) +static void vdso_test_time(void) { /* Find time. */ vdso_time_t vdso_time = (vdso_time_t)vdso_sym(version, name[2]); if (!vdso_time) { - printf("Could not find %s\n", name[2]); - return KSFT_SKIP; + ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[2])); + return; } long ret = vdso_time(NULL); if (ret > 0) { - printf("The time in hours since January 1, 1970 is %lld\n", + ksft_print_msg("The time in hours since January 1, 1970 is %lld\n", (long long)(ret / 3600)); + ksft_test_result_pass(VDSO_TEST_PASS_MSG()); } else { - printf("%s failed\n", name[2]); - return KSFT_FAIL; + ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[2])); } - - return KSFT_PASS; } -static int vdso_test_clock_getres(clockid_t clk_id) +static void vdso_test_clock_getres(clockid_t clk_id) { + int clock_getres_fail = 0; + /* Find clock_getres. */ vdso_clock_getres_t vdso_clock_getres = (vdso_clock_getres_t)vdso_sym(version, name[3]); if (!vdso_clock_getres) { - printf("Could not find %s\n", name[3]); - return KSFT_SKIP; + ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[3])); + return; } struct timespec ts, sys_ts; long ret = vdso_clock_getres(clk_id, &ts); if (ret == 0) { - printf("The resolution is %lld %lld\n", - (long long)ts.tv_sec, (long long)ts.tv_nsec); + ksft_print_msg("The vdso resolution is %lld %lld\n", + (long long)ts.tv_sec, (long long)ts.tv_nsec); } else { - printf("%s failed\n", name[3]); - return KSFT_FAIL; + clock_getres_fail++; } ret = syscall(SYS_clock_getres, clk_id, &sys_ts); - if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec)) { - printf("%s failed\n", name[3]); - return KSFT_FAIL; - } + ksft_print_msg("The syscall resolution is %lld %lld\n", + (long long)sys_ts.tv_sec, (long long)sys_ts.tv_nsec); - return KSFT_PASS; + if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec)) + clock_getres_fail++; + + if (clock_getres_fail > 0) { + ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[3])); + } else { + ksft_test_result_pass(VDSO_TEST_PASS_MSG()); + } } const char *vdso_clock_name[12] = { @@ -158,36 +162,23 @@ const char *vdso_clock_name[12] = { * This function calls vdso_test_clock_gettime and vdso_test_clock_getres * with different values for clock_id. */ -static inline int vdso_test_clock(clockid_t clock_id) +static inline void vdso_test_clock(clockid_t clock_id) { - int ret0, ret1; + ksft_print_msg("\nclock_id: %s\n", vdso_clock_name[clock_id]); - ret0 = vdso_test_clock_gettime(clock_id); - /* A skipped test is considered passed */ - if (ret0 == KSFT_SKIP) - ret0 = KSFT_PASS; + vdso_test_clock_gettime(clock_id); - ret1 = vdso_test_clock_getres(clock_id); - /* A skipped test is considered passed */ - if (ret1 == KSFT_SKIP) - ret1 = KSFT_PASS; - - ret0 += ret1; - - printf("clock_id: %s", vdso_clock_name[clock_id]); - - if (ret0 > 0) - printf(" [FAIL]\n"); - else - printf(" [PASS]\n"); - - return ret0; + vdso_test_clock_getres(clock_id); } +#define VDSO_TEST_PLAN 16 + int main(int argc, char **argv) { unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); - int ret; + + ksft_print_header(); + ksft_set_plan(VDSO_TEST_PLAN); if (!sysinfo_ehdr) { printf("AT_SYSINFO_EHDR is not present!\n"); @@ -201,44 +192,42 @@ int main(int argc, char **argv) vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); - ret = vdso_test_gettimeofday(); + vdso_test_gettimeofday(); #if _POSIX_TIMERS > 0 #ifdef CLOCK_REALTIME - ret += vdso_test_clock(CLOCK_REALTIME); + vdso_test_clock(CLOCK_REALTIME); #endif #ifdef CLOCK_BOOTTIME - ret += vdso_test_clock(CLOCK_BOOTTIME); + vdso_test_clock(CLOCK_BOOTTIME); #endif #ifdef CLOCK_TAI - ret += vdso_test_clock(CLOCK_TAI); + vdso_test_clock(CLOCK_TAI); #endif #ifdef CLOCK_REALTIME_COARSE - ret += vdso_test_clock(CLOCK_REALTIME_COARSE); + vdso_test_clock(CLOCK_REALTIME_COARSE); #endif #ifdef CLOCK_MONOTONIC - ret += vdso_test_clock(CLOCK_MONOTONIC); + vdso_test_clock(CLOCK_MONOTONIC); #endif #ifdef CLOCK_MONOTONIC_RAW - ret += vdso_test_clock(CLOCK_MONOTONIC_RAW); + vdso_test_clock(CLOCK_MONOTONIC_RAW); #endif #ifdef CLOCK_MONOTONIC_COARSE - ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE); + vdso_test_clock(CLOCK_MONOTONIC_COARSE); #endif #endif - ret += vdso_test_time(); + vdso_test_time(); - if (ret > 0) - return KSFT_FAIL; - - return KSFT_PASS; + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; } From 3d2504663c41104b4359a15f35670cfa82de1bbf Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Tue, 14 Dec 2021 10:08:22 +0000 Subject: [PATCH 312/367] i40e: Fix reset bw limit when DCB enabled with 1 TC There was an AQ error I40E_AQ_RC_EINVAL when trying to reset bw limit as part of bw allocation setup. This was caused by trying to reset bw limit with DCB enabled. Bw limit should not be reset when DCB is enabled. The code was relying on the pf->flags to check if DCB is enabled but if only 1 TC is available this flag will not be set even though DCB is enabled. Add a check for number of TC and if it is 1 don't try to reset bw limit even if pf->flags shows DCB as disabled. Fixes: fa38e30ac73f ("i40e: Fix for Tx timeouts when interface is brought up if DCB is enabled") Suggested-by: Alexander Lobakin # Flatten the condition Signed-off-by: Sylwester Dziedziuch Signed-off-by: Jedrzej Jagielski Reviewed-by: Alexander Lobakin Tested-by: Imam Hassan Reza Biswas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f70c478dafdb..5cb4dc69fe87 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5372,7 +5372,15 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, /* There is no need to reset BW when mqprio mode is on. */ if (pf->flags & I40E_FLAG_TC_MQPRIO) return 0; - if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) { + + if (!vsi->mqprio_qopt.qopt.hw) { + if (pf->flags & I40E_FLAG_DCB_ENABLED) + goto skip_reset; + + if (IS_ENABLED(CONFIG_I40E_DCB) && + i40e_dcb_hw_get_num_tc(&pf->hw) == 1) + goto skip_reset; + ret = i40e_set_bw_limit(vsi, vsi->seid, 0); if (ret) dev_info(&pf->pdev->dev, @@ -5380,6 +5388,8 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, vsi->seid); return ret; } + +skip_reset: memset(&bw_data, 0, sizeof(bw_data)); bw_data.tc_valid_bits = enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) From 6533e558c6505e94c3e0ed4281ed5e31ec985f4d Mon Sep 17 00:00:00 2001 From: Karen Sornek Date: Wed, 12 Jan 2022 10:19:47 +0100 Subject: [PATCH 313/367] i40e: Fix reset path while removing the driver Fix the crash in kernel while dereferencing the NULL pointer, when the driver is unloaded and simultaneously the VSI rings are being stopped. The hardware requires 50msec in order to finish RX queues disable. For this purpose the driver spins in mdelay function for the operation to be completed. For example changing number of queues which requires reset would fail in the following call stack: 1) i40e_prep_for_reset 2) i40e_pf_quiesce_all_vsi 3) i40e_quiesce_vsi 4) i40e_vsi_close 5) i40e_down 6) i40e_vsi_stop_rings 7) i40e_vsi_control_rx -> disable requires the delay of 50msecs 8) continue back in i40e_down function where i40e_clean_tx_ring(vsi->tx_rings[i]) is going to crash When the driver was spinning vsi_release called i40e_vsi_free_arrays where the vsi->tx_rings resources were freed and the pointer was set to NULL. Fixes: 5b6d4a7f20b0 ("i40e: Fix crash during removing i40e driver") Signed-off-by: Slawomir Laba Signed-off-by: Sylwester Dziedziuch Signed-off-by: Karen Sornek Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 2e02cc68cd3f..80c5cecaf2b5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -144,6 +144,7 @@ enum i40e_state_t { __I40E_VIRTCHNL_OP_PENDING, __I40E_RECOVERY_MODE, __I40E_VF_RESETS_DISABLED, /* disable resets during i40e_remove */ + __I40E_IN_REMOVE, __I40E_VFS_RELEASING, /* This must be last as it determines the size of the BITMAP */ __I40E_STATE_SIZE__, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5cb4dc69fe87..0c4b7dfb3b35 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10863,6 +10863,9 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { int ret; + + if (test_bit(__I40E_IN_REMOVE, pf->state)) + return; /* Now we wait for GRST to settle out. * We don't have to delete the VEBs or VSIs from the hw switch * because the reset will make them disappear. @@ -12222,6 +12225,8 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) vsi->req_queue_pairs = queue_count; i40e_prep_for_reset(pf); + if (test_bit(__I40E_IN_REMOVE, pf->state)) + return pf->alloc_rss_size; pf->alloc_rss_size = new_rss_size; @@ -13048,6 +13053,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, if (need_reset) i40e_prep_for_reset(pf); + /* VSI shall be deleted in a moment, just return EINVAL */ + if (test_bit(__I40E_IN_REMOVE, pf->state)) + return -EINVAL; + old_prog = xchg(&vsi->xdp_prog, prog); if (need_reset) { @@ -15938,8 +15947,13 @@ static void i40e_remove(struct pci_dev *pdev) i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0); - while (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) + /* Grab __I40E_RESET_RECOVERY_PENDING and set __I40E_IN_REMOVE + * flags, once they are set, i40e_rebuild should not be called as + * i40e_prep_for_reset always returns early. + */ + while (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) usleep_range(1000, 2000); + set_bit(__I40E_IN_REMOVE, pf->state); if (pf->flags & I40E_FLAG_SRIOV_ENABLED) { set_bit(__I40E_VF_RESETS_DISABLED, pf->state); @@ -16138,6 +16152,9 @@ static void i40e_pci_error_reset_done(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); + if (test_bit(__I40E_IN_REMOVE, pf->state)) + return; + i40e_reset_and_rebuild(pf, false, false); } From 4223f86512877b04c932e7203648b37eec931731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 29 Jan 2022 09:27:04 +0300 Subject: [PATCH 314/367] net: dsa: mt7530: make NET_DSA_MT7530 select MEDIATEK_GE_PHY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make MediaTek MT753x DSA driver enable MediaTek Gigabit PHYs driver to properly control MT7530 and MT7531 switch PHYs. A noticeable change is that the behaviour of switchport interfaces going up-down-up-down is no longer there. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Signed-off-by: Arınç ÜNAL Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20220129062703.595-1-arinc.unal@arinc9.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 7b1457a6e327..c0c91440340a 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -36,6 +36,7 @@ config NET_DSA_LANTIQ_GSWIP config NET_DSA_MT7530 tristate "MediaTek MT753x and MT7621 Ethernet switch support" select NET_DSA_TAG_MTK + select MEDIATEK_GE_PHY help This enables support for the MediaTek MT7530, MT7531, and MT7621 Ethernet switch chips. From 7af037c39b600bac2c716dd1228e8ddbe149573f Mon Sep 17 00:00:00 2001 From: Camel Guo Date: Mon, 31 Jan 2022 09:38:40 +0100 Subject: [PATCH 315/367] net: stmmac: dump gmac4 DMA registers correctly Unlike gmac100, gmac1000, gmac4 has 27 DMA registers and they are located at DMA_CHAN_BASE_ADDR (0x1100). In order for ethtool to dump gmac4 DMA registers correctly, this commit checks if a net_device has gmac4 and uses different logic to dump its DMA registers. This fixes the following KASAN warning, which can normally be triggered by a command similar like "ethtool -d eth0": BUG: KASAN: vmalloc-out-of-bounds in dwmac4_dump_dma_regs+0x6d4/0xb30 Write of size 4 at addr ffffffc010177100 by task ethtool/1839 kasan_report+0x200/0x21c __asan_report_store4_noabort+0x34/0x60 dwmac4_dump_dma_regs+0x6d4/0xb30 stmmac_ethtool_gregs+0x110/0x204 ethtool_get_regs+0x200/0x4b0 dev_ethtool+0x1dac/0x3800 dev_ioctl+0x7c0/0xb50 sock_ioctl+0x298/0x6c4 ... Fixes: fbf68229ffe7 ("net: stmmac: unify registers dumps methods") Signed-off-by: Camel Guo Link: https://lore.kernel.org/r/20220131083841.3346801-1-camel.guo@axis.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac_dma.h | 1 + .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h index 1914ad698cab..acd70b9a3173 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h @@ -150,6 +150,7 @@ #define NUM_DWMAC100_DMA_REGS 9 #define NUM_DWMAC1000_DMA_REGS 23 +#define NUM_DWMAC4_DMA_REGS 27 void dwmac_enable_dma_transmission(void __iomem *ioaddr); void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 164dff5ec32e..abfb3cd5958d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -21,10 +21,18 @@ #include "dwxgmac2.h" #define REG_SPACE_SIZE 0x1060 +#define GMAC4_REG_SPACE_SIZE 0x116C #define MAC100_ETHTOOL_NAME "st_mac100" #define GMAC_ETHTOOL_NAME "st_gmac" #define XGMAC_ETHTOOL_NAME "st_xgmac" +/* Same as DMA_CHAN_BASE_ADDR defined in dwmac4_dma.h + * + * It is here because dwmac_dma.h and dwmac4_dam.h can not be included at the + * same time due to the conflicting macro names. + */ +#define GMAC4_DMA_CHAN_BASE_ADDR 0x00001100 + #define ETHTOOL_DMA_OFFSET 55 struct stmmac_stats { @@ -434,6 +442,8 @@ static int stmmac_ethtool_get_regs_len(struct net_device *dev) if (priv->plat->has_xgmac) return XGMAC_REGSIZE * 4; + else if (priv->plat->has_gmac4) + return GMAC4_REG_SPACE_SIZE; return REG_SPACE_SIZE; } @@ -446,8 +456,13 @@ static void stmmac_ethtool_gregs(struct net_device *dev, stmmac_dump_mac_regs(priv, priv->hw, reg_space); stmmac_dump_dma_regs(priv, priv->ioaddr, reg_space); - if (!priv->plat->has_xgmac) { - /* Copy DMA registers to where ethtool expects them */ + /* Copy DMA registers to where ethtool expects them */ + if (priv->plat->has_gmac4) { + /* GMAC4 dumps its DMA registers at its DMA_CHAN_BASE_ADDR */ + memcpy(®_space[ETHTOOL_DMA_OFFSET], + ®_space[GMAC4_DMA_CHAN_BASE_ADDR / 4], + NUM_DWMAC4_DMA_REGS * 4); + } else if (!priv->plat->has_xgmac) { memcpy(®_space[ETHTOOL_DMA_OFFSET], ®_space[DMA_BUS_MODE / 4], NUM_DWMAC1000_DMA_REGS * 4); From 9cef24c8b76c1f6effe499d2f131807c90f7ce9a Mon Sep 17 00:00:00 2001 From: Lior Nahmanson Date: Sun, 30 Jan 2022 13:29:01 +0200 Subject: [PATCH 316/367] net: macsec: Fix offload support for NETDEV_UNREGISTER event Current macsec netdev notify handler handles NETDEV_UNREGISTER event by releasing relevant SW resources only, this causes resources leak in case of macsec HW offload, as the underlay driver was not notified to clean it's macsec offload resources. Fix by calling the underlay driver to clean it's relevant resources by moving offload handling from macsec_dellink() to macsec_common_dellink() when handling NETDEV_UNREGISTER event. Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure") Signed-off-by: Lior Nahmanson Reviewed-by: Raed Salem Signed-off-by: Raed Salem Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/1643542141-28956-1-git-send-email-raeds@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 16aa3a478e9e..33ff33c05aab 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3870,6 +3870,18 @@ static void macsec_common_dellink(struct net_device *dev, struct list_head *head struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; + /* If h/w offloading is available, propagate to the device */ + if (macsec_is_offloaded(macsec)) { + const struct macsec_ops *ops; + struct macsec_context ctx; + + ops = macsec_get_ops(netdev_priv(dev), &ctx); + if (ops) { + ctx.secy = &macsec->secy; + macsec_offload(ops->mdo_del_secy, &ctx); + } + } + unregister_netdevice_queue(dev, head); list_del_rcu(&macsec->secys); macsec_del_dev(macsec); @@ -3884,18 +3896,6 @@ static void macsec_dellink(struct net_device *dev, struct list_head *head) struct net_device *real_dev = macsec->real_dev; struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); - /* If h/w offloading is available, propagate to the device */ - if (macsec_is_offloaded(macsec)) { - const struct macsec_ops *ops; - struct macsec_context ctx; - - ops = macsec_get_ops(netdev_priv(dev), &ctx); - if (ops) { - ctx.secy = &macsec->secy; - macsec_offload(ops->mdo_del_secy, &ctx); - } - } - macsec_common_dellink(dev, head); if (list_empty(&rxd->secys)) { From 50317b636e7184d15126e2dfc83db0963a38d31e Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Mon, 31 Jan 2022 11:07:02 +0100 Subject: [PATCH 317/367] MIPS: octeon: Fix missed PTR->PTR_WD conversion Fixes: fa62f39dc7e2 ("MIPS: Fix build error due to PTR used in more places") Signed-off-by: Thomas Bogendoerfer --- arch/mips/cavium-octeon/octeon-memcpy.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S index 0a515cde1c18..25860fba6218 100644 --- a/arch/mips/cavium-octeon/octeon-memcpy.S +++ b/arch/mips/cavium-octeon/octeon-memcpy.S @@ -74,7 +74,7 @@ #define EXC(inst_reg,addr,handler) \ 9: inst_reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous /* From 2161ba070999a709f975910b6b9ad6b51cd6f120 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 29 Jan 2022 12:58:19 -0800 Subject: [PATCH 318/367] MIPS: KVM: fix vz.c kernel-doc notation Fix all kernel-doc warnings in mips/kvm/vz.c as reported by the kernel test robot: arch/mips/kvm/vz.c:471: warning: Function parameter or member 'out_compare' not described in '_kvm_vz_save_htimer' arch/mips/kvm/vz.c:471: warning: Function parameter or member 'out_cause' not described in '_kvm_vz_save_htimer' arch/mips/kvm/vz.c:471: warning: Excess function parameter 'compare' description in '_kvm_vz_save_htimer' arch/mips/kvm/vz.c:471: warning: Excess function parameter 'cause' description in '_kvm_vz_save_htimer' arch/mips/kvm/vz.c:1551: warning: No description found for return value of 'kvm_trap_vz_handle_cop_unusable' arch/mips/kvm/vz.c:1552: warning: expecting prototype for kvm_trap_vz_handle_cop_unusuable(). Prototype was for kvm_trap_vz_handle_cop_unusable() instead arch/mips/kvm/vz.c:1597: warning: No description found for return value of 'kvm_trap_vz_handle_msa_disabled' Fixes: c992a4f6a9b0 ("KVM: MIPS: Implement VZ support") Fixes: f4474d50c7d4 ("KVM: MIPS/VZ: Support hardware guest timer") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Cc: Huacai Chen Cc: Aleksandar Markovic Cc: James Hogan Cc: kvm@vger.kernel.org Signed-off-by: Thomas Bogendoerfer --- arch/mips/kvm/vz.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 4adca5abbc72..c706f5890a05 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -458,8 +458,8 @@ void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu) /** * _kvm_vz_save_htimer() - Switch to software emulation of guest timer. * @vcpu: Virtual CPU. - * @compare: Pointer to write compare value to. - * @cause: Pointer to write cause value to. + * @out_compare: Pointer to write compare value to. + * @out_cause: Pointer to write cause value to. * * Save VZ guest timer state and switch to software emulation of guest CP0 * timer. The hard timer must already be in use, so preemption should be @@ -1541,11 +1541,14 @@ static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu) } /** - * kvm_trap_vz_handle_cop_unusuable() - Guest used unusable coprocessor. + * kvm_trap_vz_handle_cop_unusable() - Guest used unusable coprocessor. * @vcpu: Virtual CPU context. * * Handle when the guest attempts to use a coprocessor which hasn't been allowed * by the root context. + * + * Return: value indicating whether to resume the host or the guest + * (RESUME_HOST or RESUME_GUEST) */ static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu) { @@ -1592,6 +1595,9 @@ static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu) * * Handle when the guest attempts to use MSA when it is disabled in the root * context. + * + * Return: value indicating whether to resume the host or the guest + * (RESUME_HOST or RESUME_GUEST) */ static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu) { From ee12595147ac1fbfb5bcb23837e26dd58d94b15d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Jan 2022 22:57:01 +0300 Subject: [PATCH 319/367] fanotify: Fix stale file descriptor in copy_event_to_user() This code calls fd_install() which gives the userspace access to the fd. Then if copy_info_records_to_user() fails it calls put_unused_fd(fd) but that will not release it and leads to a stale entry in the file descriptor table. Generally you can't trust the fd after a call to fd_install(). The fix is to delay the fd_install() until everything else has succeeded. Fortunately it requires CAP_SYS_ADMIN to reach this code so the security impact is less. Fixes: f644bc449b37 ("fanotify: fix copy_event_to_user() fid error clean up") Link: https://lore.kernel.org/r/20220128195656.GA26981@kili Signed-off-by: Dan Carpenter Reviewed-by: Mathias Krause Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 1026f67b1d1e..2ff6bd85ba8f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -701,9 +701,6 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (fanotify_is_perm_event(event->mask)) FANOTIFY_PERM(event)->fd = fd; - if (f) - fd_install(fd, f); - if (info_mode) { ret = copy_info_records_to_user(event, info, info_mode, pidfd, buf, count); @@ -711,6 +708,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, goto out_close_fd; } + if (f) + fd_install(fd, f); + return metadata.event_len; out_close_fd: From 881cc731df6af99a21622e9be25a23b81adcd10b Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Mon, 31 Jan 2022 13:56:41 +0000 Subject: [PATCH 320/367] net: phy: Fix qca8081 with speeds lower than 2.5Gb/s A typo in qca808x_read_status means we try to set SMII mode on the port rather than SGMII when the link speed is not 2.5Gb/s. This results in no traffic due to the mismatch in configuration between the phy and the mac. v2: Only change interface mode when the link is up Fixes: 79c7bc0521545 ("net: phy: add qca8081 read_status") Cc: stable@vger.kernel.org Signed-off-by: Jonathan McDowell Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 5b6c0d120e09..29aa811af430 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -1688,19 +1688,19 @@ static int qca808x_read_status(struct phy_device *phydev) if (ret < 0) return ret; - if (phydev->link && phydev->speed == SPEED_2500) - phydev->interface = PHY_INTERFACE_MODE_2500BASEX; - else - phydev->interface = PHY_INTERFACE_MODE_SMII; - - /* generate seed as a lower random value to make PHY linked as SLAVE easily, - * except for master/slave configuration fault detected. - * the reason for not putting this code into the function link_change_notify is - * the corner case where the link partner is also the qca8081 PHY and the seed - * value is configured as the same value, the link can't be up and no link change - * occurs. - */ - if (!phydev->link) { + if (phydev->link) { + if (phydev->speed == SPEED_2500) + phydev->interface = PHY_INTERFACE_MODE_2500BASEX; + else + phydev->interface = PHY_INTERFACE_MODE_SGMII; + } else { + /* generate seed as a lower random value to make PHY linked as SLAVE easily, + * except for master/slave configuration fault detected. + * the reason for not putting this code into the function link_change_notify is + * the corner case where the link partner is also the qca8081 PHY and the seed + * value is configured as the same value, the link can't be up and no link change + * occurs. + */ if (phydev->master_slave_state == MASTER_SLAVE_STATE_ERR) { qca808x_phy_ms_seed_enable(phydev, false); } else { From 836f35f79153ce09d813c83f341dba4481996966 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Thu, 27 Jan 2022 14:03:58 -0500 Subject: [PATCH 321/367] platform/x86: thinkpad_acpi: Fix incorrect use of platform profile on AMD platforms Lenovo AMD based platforms have been offering platform_profiles but they are not working correctly. This is because the mode we are using on the Intel platforms (MMC) is not available on the AMD platforms. This commit adds checking of the functional capabilities returned by the BIOS to confirm if MMC is supported or not. Profiles will not be available if the platform is not MMC capable. I'm investigating and working on an alternative for AMD platforms but that is still work-in-progress. Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20220127190358.4078-1-markpearson@lenovo.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 33f611af6e51..bd045486b933 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10119,6 +10119,9 @@ static struct ibm_struct proxsensor_driver_data = { #define DYTC_CMD_MMC_GET 8 /* To get current MMC function and mode */ #define DYTC_CMD_RESET 0x1ff /* To reset back to default */ +#define DYTC_CMD_FUNC_CAP 3 /* To get DYTC capabilities */ +#define DYTC_FC_MMC 27 /* MMC Mode supported */ + #define DYTC_GET_FUNCTION_BIT 8 /* Bits 8-11 - function setting */ #define DYTC_GET_MODE_BIT 12 /* Bits 12-15 - mode setting */ @@ -10331,6 +10334,15 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) if (dytc_version < 5) return -ENODEV; + /* Check what capabilities are supported. Currently MMC is needed */ + err = dytc_command(DYTC_CMD_FUNC_CAP, &output); + if (err) + return err; + if (!(output & BIT(DYTC_FC_MMC))) { + dbg_printk(TPACPI_DBG_INIT, " DYTC MMC mode not supported\n"); + return -ENODEV; + } + dbg_printk(TPACPI_DBG_INIT, "DYTC version %d: thermal mode available\n", dytc_version); /* From 68defd528f94ed1cf11f49a75cc1875dccd781fa Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Tue, 7 Dec 2021 13:23:06 +0200 Subject: [PATCH 322/367] e1000e: Separate ADP board type from TGP We have the same LAN controller on different PCH's. Separate ADP board type from a TGP which will allow for specific fixes to be applied for ADP platforms. Suggested-by: Kai-Heng Feng Suggested-by: Dima Ruinskiy Signed-off-by: Sasha Neftin Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/e1000.h | 4 ++- drivers/net/ethernet/intel/e1000e/ich8lan.c | 20 +++++++++++++ drivers/net/ethernet/intel/e1000e/netdev.c | 33 +++++++++++---------- 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index c3def0ee7788..8d06c9d8ff8b 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -115,7 +115,8 @@ enum e1000_boards { board_pch_lpt, board_pch_spt, board_pch_cnp, - board_pch_tgp + board_pch_tgp, + board_pch_adp }; struct e1000_ps_page { @@ -502,6 +503,7 @@ extern const struct e1000_info e1000_pch_lpt_info; extern const struct e1000_info e1000_pch_spt_info; extern const struct e1000_info e1000_pch_cnp_info; extern const struct e1000_info e1000_pch_tgp_info; +extern const struct e1000_info e1000_pch_adp_info; extern const struct e1000_info e1000_es2_info; void e1000e_ptp_init(struct e1000_adapter *adapter); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 5e4fc9b4e2ad..c908c84b86d2 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -6021,3 +6021,23 @@ const struct e1000_info e1000_pch_tgp_info = { .phy_ops = &ich8_phy_ops, .nvm_ops = &spt_nvm_ops, }; + +const struct e1000_info e1000_pch_adp_info = { + .mac = e1000_pch_adp, + .flags = FLAG_IS_ICH + | FLAG_HAS_WOL + | FLAG_HAS_HW_TIMESTAMP + | FLAG_HAS_CTRLEXT_ON_LOAD + | FLAG_HAS_AMT + | FLAG_HAS_FLASH + | FLAG_HAS_JUMBO_FRAMES + | FLAG_APME_IN_WUC, + .flags2 = FLAG2_HAS_PHY_STATS + | FLAG2_HAS_EEE, + .pba = 26, + .max_hw_frame_size = 9022, + .get_variants = e1000_get_variants_ich8lan, + .mac_ops = &ich8_mac_ops, + .phy_ops = &ich8_phy_ops, + .nvm_ops = &spt_nvm_ops, +}; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 635a95927e93..d2de8bc4c3b7 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -52,6 +52,7 @@ static const struct e1000_info *e1000_info_tbl[] = { [board_pch_spt] = &e1000_pch_spt_info, [board_pch_cnp] = &e1000_pch_cnp_info, [board_pch_tgp] = &e1000_pch_tgp_info, + [board_pch_adp] = &e1000_pch_adp_info, }; struct e1000_reg_info { @@ -7898,22 +7899,22 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_tgp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_tgp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_adp }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; From cad014b7b5a6897d8c4fad13e2888978bfb7a53f Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Tue, 7 Dec 2021 13:23:42 +0200 Subject: [PATCH 323/367] e1000e: Handshake with CSME starts from ADL platforms Handshake with CSME/AMT on none provisioned platforms during S0ix flow is not supported on TGL platform and can cause to HW unit hang. Update the handshake with CSME flow to start from the ADL platform. Fixes: 3e55d231716e ("e1000e: Add handshake with the CSME to support S0ix") Signed-off-by: Sasha Neftin Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/netdev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index d2de8bc4c3b7..a42aeb555f34 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6342,7 +6342,8 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) u32 mac_data; u16 phy_data; - if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) { + if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && + hw->mac.type >= e1000_pch_adp) { /* Request ME configure the device for S0ix */ mac_data = er32(H2ME); mac_data |= E1000_H2ME_START_DPG; @@ -6491,7 +6492,8 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) u16 phy_data; u32 i = 0; - if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) { + if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && + hw->mac.type >= e1000_pch_adp) { /* Request ME unconfigure the device from S0ix */ mac_data = er32(H2ME); mac_data &= ~E1000_H2ME_START_DPG; From 24f6008564183aa120d07c03d9289519c2fe02af Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 20 Jan 2022 11:04:01 -0600 Subject: [PATCH 324/367] cgroup-v1: Require capabilities to set release_agent The cgroup release_agent is called with call_usermodehelper. The function call_usermodehelper starts the release_agent with a full set fo capabilities. Therefore require capabilities when setting the release_agaent. Reported-by: Tabitha Sable Tested-by: Tabitha Sable Fixes: 81a6a5cdd2c5 ("Task Control Groups: automatic userspace notification of idle cgroups") Cc: stable@vger.kernel.org # v2.6.24+ Signed-off-by: "Eric W. Biederman" Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-v1.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 41e0837a5a0b..0e877dbcfeea 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -549,6 +549,14 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + /* + * Release agent gets called with all capabilities, + * require capabilities to set release agent. + */ + if ((of->file->f_cred->user_ns != &init_user_ns) || + !capable(CAP_SYS_ADMIN)) + return -EPERM; + cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENODEV; @@ -954,6 +962,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) /* Specifying two release agents is forbidden */ if (ctx->release_agent) return invalfc(fc, "release_agent respecified"); + /* + * Release agent gets called with all capabilities, + * require capabilities to set release agent. + */ + if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) + return invalfc(fc, "Setting release_agent not allowed"); ctx->release_agent = param->string; param->string = NULL; break; From 1a2beb3d5a0b4051067ecf49ea799bee340e0e7c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 31 Jan 2022 15:48:54 +0100 Subject: [PATCH 325/367] mailmap: update Christian Brauner's email address At least one of the addresses will stop functioning after February. Signed-off-by: Christian Brauner Signed-off-by: Linus Torvalds --- .mailmap | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.mailmap b/.mailmap index b76e520809d0..8cd44b0c6579 100644 --- a/.mailmap +++ b/.mailmap @@ -80,6 +80,9 @@ Chris Chiu Christian Borntraeger Christian Borntraeger Christian Borntraeger +Christian Brauner +Christian Brauner +Christian Brauner Christophe Ricard Christoph Hellwig Colin Ian King From b7892f7d5cb2b8187c603dd8ea3a7c44059ccfc2 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 1 Feb 2022 09:31:20 +0000 Subject: [PATCH 326/367] tools: Ignore errors from `which' when searching a GCC toolchain When cross-building tools with clang, we run `which $(CROSS_COMPILE)gcc` to detect whether a GCC toolchain provides the standard libraries. It is only a helper because some distros put libraries where LLVM does not automatically find them. On other systems, LLVM detects the libc automatically and does not need this. There, it is completely fine not to have a GCC at all, but some versions of `which' display an error when the command is not found: which: no aarch64-linux-gnu-gcc in ($PATH) Since the error can safely be ignored, throw it to /dev/null. Fixes: cebdb7374577 ("tools: Help cross-building with clang") Reported-by: Nathan Chancellor Signed-off-by: Jean-Philippe Brucker Signed-off-by: Daniel Borkmann Tested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/bpf/20220201093119.1713207-1-jean-philippe@linaro.org --- tools/scripts/Makefile.include | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index b0be5f40a3f1..79d102304470 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -90,7 +90,7 @@ EXTRA_WARNINGS += -Wstrict-aliasing=3 else ifneq ($(CROSS_COMPILE),) CLANG_CROSS_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) -GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc)) +GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc 2>/dev/null)) ifneq ($(GCC_TOOLCHAIN_DIR),) CLANG_CROSS_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) CLANG_CROSS_FLAGS += --sysroot=$(shell $(CROSS_COMPILE)gcc -print-sysroot) From 6dde7acdb3dc2e0b3bcb090aac0b3699396d309f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Jan 2022 13:17:30 -0800 Subject: [PATCH 327/367] ethernet: smc911x: fix indentation in get/set EEPROM Build bot produced a smatch indentation warning, the code looks correct but it mixes spaces and tabs. Reported-by: kernel test robot Link: https://lore.kernel.org/r/20220131211730.3940875-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/smsc/smc911x.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index dd6f69ced4ee..fc9cef9dcefc 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -1648,7 +1648,7 @@ static int smc911x_ethtool_geteeprom(struct net_device *dev, return ret; if ((ret=smc911x_ethtool_read_eeprom_byte(dev, &eebuf[i]))!=0) return ret; - } + } memcpy(data, eebuf+eeprom->offset, eeprom->len); return 0; } @@ -1667,11 +1667,11 @@ static int smc911x_ethtool_seteeprom(struct net_device *dev, return ret; /* write byte */ if ((ret=smc911x_ethtool_write_eeprom_byte(dev, *data))!=0) - return ret; + return ret; if ((ret=smc911x_ethtool_write_eeprom_cmd(dev, E2P_CMD_EPC_CMD_WRITE_, i ))!=0) return ret; - } - return 0; + } + return 0; } static int smc911x_ethtool_geteeprom_len(struct net_device *dev) From 04c2a47ffb13c29778e2a14e414ad4cb5a5db4b5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 Jan 2022 09:20:18 -0800 Subject: [PATCH 328/367] net: sched: fix use-after-free in tc_new_tfilter() Whenever tc_new_tfilter() jumps back to replay: label, we need to make sure @q and @chain local variables are cleared again, or risk use-after-free as in [1] For consistency, apply the same fix in tc_ctl_chain() BUG: KASAN: use-after-free in mini_qdisc_pair_swap+0x1b9/0x1f0 net/sched/sch_generic.c:1581 Write of size 8 at addr ffff8880985c4b08 by task syz-executor.4/1945 CPU: 0 PID: 1945 Comm: syz-executor.4 Not tainted 5.17.0-rc1-syzkaller-00495-gff58831fa02d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0x8d/0x336 mm/kasan/report.c:255 __kasan_report mm/kasan/report.c:442 [inline] kasan_report.cold+0x83/0xdf mm/kasan/report.c:459 mini_qdisc_pair_swap+0x1b9/0x1f0 net/sched/sch_generic.c:1581 tcf_chain_head_change_item net/sched/cls_api.c:372 [inline] tcf_chain0_head_change.isra.0+0xb9/0x120 net/sched/cls_api.c:386 tcf_chain_tp_insert net/sched/cls_api.c:1657 [inline] tcf_chain_tp_insert_unique net/sched/cls_api.c:1707 [inline] tc_new_tfilter+0x1e67/0x2350 net/sched/cls_api.c:2086 rtnetlink_rcv_msg+0x80d/0xb80 net/core/rtnetlink.c:5583 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:705 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:725 ____sys_sendmsg+0x331/0x810 net/socket.c:2413 ___sys_sendmsg+0xf3/0x170 net/socket.c:2467 __sys_sendmmsg+0x195/0x470 net/socket.c:2553 __do_sys_sendmmsg net/socket.c:2582 [inline] __se_sys_sendmmsg net/socket.c:2579 [inline] __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2579 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f2647172059 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f2645aa5168 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00007f2647285100 RCX: 00007f2647172059 RDX: 040000000000009f RSI: 00000000200002c0 RDI: 0000000000000006 RBP: 00007f26471cc08d R08: 0000000000000000 R09: 0000000000000000 R10: 9e00000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fffb3f7f02f R14: 00007f2645aa5300 R15: 0000000000022000 Allocated by task 1944: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:45 [inline] set_alloc_info mm/kasan/common.c:436 [inline] ____kasan_kmalloc mm/kasan/common.c:515 [inline] ____kasan_kmalloc mm/kasan/common.c:474 [inline] __kasan_kmalloc+0xa9/0xd0 mm/kasan/common.c:524 kmalloc_node include/linux/slab.h:604 [inline] kzalloc_node include/linux/slab.h:726 [inline] qdisc_alloc+0xac/0xa10 net/sched/sch_generic.c:941 qdisc_create.constprop.0+0xce/0x10f0 net/sched/sch_api.c:1211 tc_modify_qdisc+0x4c5/0x1980 net/sched/sch_api.c:1660 rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5592 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:705 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:725 ____sys_sendmsg+0x331/0x810 net/socket.c:2413 ___sys_sendmsg+0xf3/0x170 net/socket.c:2467 __sys_sendmmsg+0x195/0x470 net/socket.c:2553 __do_sys_sendmmsg net/socket.c:2582 [inline] __se_sys_sendmmsg net/socket.c:2579 [inline] __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2579 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Freed by task 3609: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:45 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:366 [inline] ____kasan_slab_free+0x130/0x160 mm/kasan/common.c:328 kasan_slab_free include/linux/kasan.h:236 [inline] slab_free_hook mm/slub.c:1728 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1754 slab_free mm/slub.c:3509 [inline] kfree+0xcb/0x280 mm/slub.c:4562 rcu_do_batch kernel/rcu/tree.c:2527 [inline] rcu_core+0x7b8/0x1540 kernel/rcu/tree.c:2778 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 Last potentially related work creation: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 __kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348 __call_rcu kernel/rcu/tree.c:3026 [inline] call_rcu+0xb1/0x740 kernel/rcu/tree.c:3106 qdisc_put_unlocked+0x6f/0x90 net/sched/sch_generic.c:1109 tcf_block_release+0x86/0x90 net/sched/cls_api.c:1238 tc_new_tfilter+0xc0d/0x2350 net/sched/cls_api.c:2148 rtnetlink_rcv_msg+0x80d/0xb80 net/core/rtnetlink.c:5583 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:705 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:725 ____sys_sendmsg+0x331/0x810 net/socket.c:2413 ___sys_sendmsg+0xf3/0x170 net/socket.c:2467 __sys_sendmmsg+0x195/0x470 net/socket.c:2553 __do_sys_sendmmsg net/socket.c:2582 [inline] __se_sys_sendmmsg net/socket.c:2579 [inline] __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2579 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff8880985c4800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 776 bytes inside of 1024-byte region [ffff8880985c4800, ffff8880985c4c00) The buggy address belongs to the page: page:ffffea0002617000 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x985c0 head:ffffea0002617000 order:3 compound_mapcount:0 compound_pincount:0 flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000010200 0000000000000000 dead000000000122 ffff888010c41dc0 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0x1d20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL), pid 1941, ts 1038999441284, free_ts 1033444432829 prep_new_page mm/page_alloc.c:2434 [inline] get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4165 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5389 alloc_pages+0x1aa/0x310 mm/mempolicy.c:2271 alloc_slab_page mm/slub.c:1799 [inline] allocate_slab mm/slub.c:1944 [inline] new_slab+0x28a/0x3b0 mm/slub.c:2004 ___slab_alloc+0x87c/0xe90 mm/slub.c:3018 __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3105 slab_alloc_node mm/slub.c:3196 [inline] slab_alloc mm/slub.c:3238 [inline] __kmalloc+0x2fb/0x340 mm/slub.c:4420 kmalloc include/linux/slab.h:586 [inline] kzalloc include/linux/slab.h:715 [inline] __register_sysctl_table+0x112/0x1090 fs/proc/proc_sysctl.c:1335 neigh_sysctl_register+0x2c8/0x5e0 net/core/neighbour.c:3787 devinet_sysctl_register+0xb1/0x230 net/ipv4/devinet.c:2618 inetdev_init+0x286/0x580 net/ipv4/devinet.c:278 inetdev_event+0xa8a/0x15d0 net/ipv4/devinet.c:1532 notifier_call_chain+0xb5/0x200 kernel/notifier.c:84 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1919 call_netdevice_notifiers_extack net/core/dev.c:1931 [inline] call_netdevice_notifiers net/core/dev.c:1945 [inline] register_netdevice+0x1073/0x1500 net/core/dev.c:9698 veth_newlink+0x59c/0xa90 drivers/net/veth.c:1722 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1352 [inline] free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1404 free_unref_page_prepare mm/page_alloc.c:3325 [inline] free_unref_page+0x19/0x690 mm/page_alloc.c:3404 release_pages+0x748/0x1220 mm/swap.c:956 tlb_batch_pages_flush mm/mmu_gather.c:50 [inline] tlb_flush_mmu_free mm/mmu_gather.c:243 [inline] tlb_flush_mmu+0xe9/0x6b0 mm/mmu_gather.c:250 zap_pte_range mm/memory.c:1441 [inline] zap_pmd_range mm/memory.c:1490 [inline] zap_pud_range mm/memory.c:1519 [inline] zap_p4d_range mm/memory.c:1540 [inline] unmap_page_range+0x1d1d/0x2a30 mm/memory.c:1561 unmap_single_vma+0x198/0x310 mm/memory.c:1606 unmap_vmas+0x16b/0x2f0 mm/memory.c:1638 exit_mmap+0x201/0x670 mm/mmap.c:3178 __mmput+0x122/0x4b0 kernel/fork.c:1114 mmput+0x56/0x60 kernel/fork.c:1135 exit_mm kernel/exit.c:507 [inline] do_exit+0xa3c/0x2a30 kernel/exit.c:793 do_group_exit+0xd2/0x2f0 kernel/exit.c:935 __do_sys_exit_group kernel/exit.c:946 [inline] __se_sys_exit_group kernel/exit.c:944 [inline] __x64_sys_exit_group+0x3a/0x50 kernel/exit.c:944 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Memory state around the buggy address: ffff8880985c4a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880985c4a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8880985c4b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8880985c4b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880985c4c00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc Fixes: 470502de5bdb ("net: sched: unlock rules update API") Signed-off-by: Eric Dumazet Cc: Vlad Buslov Cc: Jiri Pirko Cc: Cong Wang Reported-by: syzbot Link: https://lore.kernel.org/r/20220131172018.3704490-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/cls_api.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d4e27c679123..5f0f346b576f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1945,9 +1945,9 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, bool prio_allocate; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; + struct Qdisc *q; struct tcf_chain_info chain_info; - struct tcf_chain *chain = NULL; + struct tcf_chain *chain; struct tcf_block *block; struct tcf_proto *tp; unsigned long cl; @@ -1976,6 +1976,8 @@ replay: tp = NULL; cl = 0; block = NULL; + q = NULL; + chain = NULL; flags = 0; if (prio == 0) { @@ -2798,8 +2800,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, struct tcmsg *t; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; - struct tcf_chain *chain = NULL; + struct Qdisc *q; + struct tcf_chain *chain; struct tcf_block *block; unsigned long cl; int err; @@ -2809,6 +2811,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, return -EPERM; replay: + q = NULL; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) From c6f6f2444bdbe0079e41914a35081530d0409963 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 Jan 2022 17:21:06 -0800 Subject: [PATCH 329/367] rtnetlink: make sure to refresh master_dev/m_ops in __rtnl_newlink() While looking at one unrelated syzbot bug, I found the replay logic in __rtnl_newlink() to potentially trigger use-after-free. It is better to clear master_dev and m_ops inside the loop, in case we have to replay it. Fixes: ba7d49b1f0f8 ("rtnetlink: provide api for getting and setting slave info") Signed-off-by: Eric Dumazet Cc: Jiri Pirko Link: https://lore.kernel.org/r/20220201012106.216495-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e476403231f0..710da8a36729 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3275,8 +3275,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; unsigned char name_assign_type = NET_NAME_USER; struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; - const struct rtnl_link_ops *m_ops = NULL; - struct net_device *master_dev = NULL; + const struct rtnl_link_ops *m_ops; + struct net_device *master_dev; struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; struct nlattr *tb[IFLA_MAX + 1]; @@ -3314,6 +3314,8 @@ replay: else dev = NULL; + master_dev = NULL; + m_ops = NULL; if (dev) { master_dev = netdev_master_upper_dev_get(dev); if (master_dev) From e42e70ad6ae2ae511a6143d2e8da929366e58bd9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 Jan 2022 18:23:58 -0800 Subject: [PATCH 330/367] af_packet: fix data-race in packet_setsockopt / packet_setsockopt When packet_setsockopt( PACKET_FANOUT_DATA ) reads po->fanout, no lock is held, meaning that another thread can change po->fanout. Given that po->fanout can only be set once during the socket lifetime (it is only cleared from fanout_release()), we can use READ_ONCE()/WRITE_ONCE() to document the race. BUG: KCSAN: data-race in packet_setsockopt / packet_setsockopt write to 0xffff88813ae8e300 of 8 bytes by task 14653 on cpu 0: fanout_add net/packet/af_packet.c:1791 [inline] packet_setsockopt+0x22fe/0x24a0 net/packet/af_packet.c:3931 __sys_setsockopt+0x209/0x2a0 net/socket.c:2180 __do_sys_setsockopt net/socket.c:2191 [inline] __se_sys_setsockopt net/socket.c:2188 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff88813ae8e300 of 8 bytes by task 14654 on cpu 1: packet_setsockopt+0x691/0x24a0 net/packet/af_packet.c:3935 __sys_setsockopt+0x209/0x2a0 net/socket.c:2180 __do_sys_setsockopt net/socket.c:2191 [inline] __se_sys_setsockopt net/socket.c:2188 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x0000000000000000 -> 0xffff888106f8c000 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 14654 Comm: syz-executor.3 Not tainted 5.16.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 47dceb8ecdc1 ("packet: add classic BPF fanout mode") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reported-by: syzbot Link: https://lore.kernel.org/r/20220201022358.330621-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 85ea7ddb48db..ab87f22cc7ec 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1789,7 +1789,10 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) err = -ENOSPC; if (refcount_read(&match->sk_ref) < match->max_num_members) { __dev_remove_pack(&po->prot_hook); - po->fanout = match; + + /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ + WRITE_ONCE(po->fanout, match); + po->rollover = rollover; rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); @@ -3934,7 +3937,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, } case PACKET_FANOUT_DATA: { - if (!po->fanout) + /* Paired with the WRITE_ONCE() in fanout_add() */ + if (!READ_ONCE(po->fanout)) return -EINVAL; return fanout_set_data(po, optval, optlen); From 479f5547239d970d3833f15f54a6481fffdb91ec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 Jan 2022 22:52:54 -0800 Subject: [PATCH 331/367] tcp: fix mem under-charging with zerocopy sendmsg() We got reports of following warning in inet_sock_destruct() WARN_ON(sk_forward_alloc_get(sk)); Whenever we add a non zero-copy fragment to a pure zerocopy skb, we have to anticipate that whole skb->truesize will be uncharged when skb is finally freed. skb->data_len is the payload length. But the memory truesize estimated by __zerocopy_sg_from_iter() is page aligned. Fixes: 9b65b17db723 ("net: avoid double accounting for pure zerocopy skbs") Signed-off-by: Eric Dumazet Cc: Talal Ahmad Cc: Arjun Roy Cc: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Link: https://lore.kernel.org/r/20220201065254.680532-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 78e81465f5f3..bdf108f544a4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1322,10 +1322,13 @@ new_segment: /* skb changing from pure zc to mixed, must charge zc */ if (unlikely(skb_zcopy_pure(skb))) { - if (!sk_wmem_schedule(sk, skb->data_len)) + u32 extra = skb->truesize - + SKB_TRUESIZE(skb_end_offset(skb)); + + if (!sk_wmem_schedule(sk, extra)) goto wait_for_space; - sk_mem_charge(sk, skb->data_len); + sk_mem_charge(sk, extra); skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY; } From 63e4b45c82ed1bde979da7052229a4229ce9cabf Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 1 Feb 2022 08:16:18 +0100 Subject: [PATCH 332/367] ipheth: fix EOVERFLOW in ipheth_rcvbulk_callback When rx_buf is allocated we need to account for IPHETH_IP_ALIGN, which reduces the usable size by 2 bytes. Otherwise we have 1512 bytes usable instead of 1514, and if we receive more than 1512 bytes, ipheth_rcvbulk_callback is called with status -EOVERFLOW, after which the driver malfunctiones and all communication stops. Resolves ipheth 2-1:4.2: ipheth_rcvbulk_callback: urb status: -75 Fixes: f33d9e2b48a3 ("usbnet: ipheth: fix connectivity with iOS 14") Signed-off-by: Georgi Valkov Tested-by: Jan Kiszka Link: https://lore.kernel.org/all/B60B8A4B-92A0-49B3-805D-809A2433B46C@abv.bg/ Link: https://lore.kernel.org/all/24851bd2769434a5fc24730dce8e8a984c5a4505.1643699778.git.jan.kiszka@siemens.com/ Signed-off-by: Jakub Kicinski --- drivers/net/usb/ipheth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index cd33955df0b6..6a769df0b421 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -121,7 +121,7 @@ static int ipheth_alloc_urbs(struct ipheth_device *iphone) if (tx_buf == NULL) goto free_rx_urb; - rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE, + rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, GFP_KERNEL, &rx_urb->transfer_dma); if (rx_buf == NULL) goto free_tx_buf; @@ -146,7 +146,7 @@ error_nomem: static void ipheth_free_urbs(struct ipheth_device *iphone) { - usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->rx_buf, + usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, iphone->rx_buf, iphone->rx_urb->transfer_dma); usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->tx_buf, iphone->tx_urb->transfer_dma); @@ -317,7 +317,7 @@ static int ipheth_rx_submit(struct ipheth_device *dev, gfp_t mem_flags) usb_fill_bulk_urb(dev->rx_urb, udev, usb_rcvbulkpipe(udev, dev->bulk_in), - dev->rx_buf, IPHETH_BUF_SIZE, + dev->rx_buf, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, ipheth_rcvbulk_callback, dev); dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; From d0cfa548dbde354de986911d3913897b5448faad Mon Sep 17 00:00:00 2001 From: Lior Nahmanson Date: Sun, 30 Jan 2022 13:37:52 +0200 Subject: [PATCH 333/367] net: macsec: Verify that send_sci is on when setting Tx sci explicitly When setting Tx sci explicit, the Rx side is expected to use this sci and not recalculate it from the packet.However, in case of Tx sci is explicit and send_sci is off, the receiver is wrongly recalculate the sci from the source MAC address which most likely be different than the explicit sci. Fix by preventing such configuration when macsec newlink is established and return EINVAL error code on such cases. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Lior Nahmanson Reviewed-by: Raed Salem Signed-off-by: Raed Salem Link: https://lore.kernel.org/r/1643542672-29403-1-git-send-email-raeds@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 33ff33c05aab..3d0874331763 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -4018,6 +4018,15 @@ static int macsec_newlink(struct net *net, struct net_device *dev, !macsec_check_offload(macsec->offload, macsec)) return -EOPNOTSUPP; + /* send_sci must be set to true when transmit sci explicitly is set */ + if ((data && data[IFLA_MACSEC_SCI]) && + (data && data[IFLA_MACSEC_INC_SCI])) { + u8 send_sci = !!nla_get_u8(data[IFLA_MACSEC_INC_SCI]); + + if (!send_sci) + return -EINVAL; + } + if (data && data[IFLA_MACSEC_ICV_LEN]) icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); mtu = real_dev->mtu - icv_len - macsec_extra_len(true); From 04f8c12f031fcd0ffa0c72822eb665ceb2c872e7 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 6 Jan 2022 16:40:18 +0200 Subject: [PATCH 334/367] net/mlx5: Bridge, take rtnl lock in init error handler The mlx5_esw_bridge_cleanup() is expected to be called with rtnl lock taken, which is true for mlx5e_rep_bridge_cleanup() function but not for error handling code in mlx5e_rep_bridge_init(). Add missing rtnl lock/unlock calls and extend both mlx5_esw_bridge_cleanup() and its dual function mlx5_esw_bridge_init() with ASSERT_RTNL() to verify the invariant from now on. Fixes: 7cd6a54a8285 ("net/mlx5: Bridge, handle FDB events") Fixes: 19e9bfa044f3 ("net/mlx5: Bridge, add offload infrastructure") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index c6d2f8c78db7..d5cb27667005 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -509,7 +509,9 @@ err_register_swdev_blk: err_register_swdev: destroy_workqueue(br_offloads->wq); err_alloc_wq: + rtnl_lock(); mlx5_esw_bridge_cleanup(esw); + rtnl_unlock(); } void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index f690f430f40f..05e08cec5a8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -1574,6 +1574,8 @@ struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw) { struct mlx5_esw_bridge_offloads *br_offloads; + ASSERT_RTNL(); + br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL); if (!br_offloads) return ERR_PTR(-ENOMEM); @@ -1590,6 +1592,8 @@ void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw) { struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads; + ASSERT_RTNL(); + if (!br_offloads) return; From 350d9a823734b5a7e767cddc3bdde5f0bcbb7ff4 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 6 Jan 2022 18:45:26 +0200 Subject: [PATCH 335/367] net/mlx5: Bridge, ensure dev_name is null-terminated Even though net_device->name is guaranteed to be null-terminated string of size<=IFNAMSIZ, the test robot complains that return value of netdev_name() can be larger: In file included from include/trace/define_trace.h:102, from drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h:113, from drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c:12: drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h: In function 'trace_event_raw_event_mlx5_esw_bridge_fdb_template': >> drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h:24:29: warning: 'strncpy' output may be truncated copying 16 bytes from a string of length 20 [-Wstringop-truncation] 24 | strncpy(__entry->dev_name, | ^~~~~~~~~~~~~~~~~~~~~~~~~~ 25 | netdev_name(fdb->dev), | ~~~~~~~~~~~~~~~~~~~~~~ 26 | IFNAMSIZ); | ~~~~~~~~~ This is caused by the fact that default value of IFNAMSIZ is 16, while placeholder value that is returned by netdev_name() for unnamed net devices is larger than that. The offending code is in a tracing function that is only called for mlx5 representors, so there is no straightforward way to reproduce the issue but let's fix it for correctness sake by replacing strncpy() with strscpy() to ensure that resulting string is always null-terminated. Fixes: 9724fd5d9c2a ("net/mlx5: Bridge, add tracepoints") Reported-by: kernel test robot Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h index 3401188e0a60..51ac24e6ec3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h @@ -21,7 +21,7 @@ DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_template, __field(unsigned int, used) ), TP_fast_assign( - strncpy(__entry->dev_name, + strscpy(__entry->dev_name, netdev_name(fdb->dev), IFNAMSIZ); memcpy(__entry->addr, fdb->key.addr, ETH_ALEN); From a2446bc77a16cefd27de712d28af2396d6287593 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 4 Jan 2022 10:38:02 +0200 Subject: [PATCH 336/367] net/mlx5e: TC, Reject rules with drop and modify hdr action This kind of action is not supported by firmware and generates a syndrome. kernel: mlx5_core 0000:08:00.0: mlx5_cmd_check:777:(pid 102063): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x8708c3) Fixes: d7e75a325cb2 ("net/mlx5e: Add offloading of E-Switch TC pedit (header re-write) actions") Signed-off-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3d908a7e1406..671f76c350db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3191,6 +3191,12 @@ actions_match_supported(struct mlx5e_priv *priv, return false; } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); + return false; + } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions, ct_flow, ct_clear, extack)) From 4a08a131351e375a2969b98e46df260ed04dcba7 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 16 Jan 2022 09:07:22 +0200 Subject: [PATCH 337/367] net/mlx5e: Fix module EEPROM query When querying the module EEPROM, there was a misusage of the 'offset' variable vs the 'query.offset' field. Fix that by always using 'offset' and assigning its value to 'query.offset' right before the mcia register read call. While at it, the cross-pages read size adjustment was changed to be more intuitive. Fixes: e19b0a3474ab ("net/mlx5: Refactor module EEPROM query") Reported-by: Wang Yugui Signed-off-by: Gal Pressman Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 1ef2b6a848c1..7b16a1188aab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -406,23 +406,24 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, switch (module_id) { case MLX5_MODULE_ID_SFP: - mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset); + mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &offset); break; case MLX5_MODULE_ID_QSFP: case MLX5_MODULE_ID_QSFP_PLUS: case MLX5_MODULE_ID_QSFP28: - mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset); + mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset); break; default: mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); return -EINVAL; } - if (query.offset + size > MLX5_EEPROM_PAGE_LENGTH) + if (offset + size > MLX5_EEPROM_PAGE_LENGTH) /* Cross pages read, read until offset 256 in low page */ - size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; + size = MLX5_EEPROM_PAGE_LENGTH - offset; query.size = size; + query.offset = offset; return mlx5_query_mcia(dev, &query, data); } From 3c5193a87b0fea090aa3f769d020337662d87b5e Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Thu, 13 Jan 2022 15:48:48 +0200 Subject: [PATCH 338/367] net/mlx5: Use del_timer_sync in fw reset flow of halting poll Substitute del_timer() with del_timer_sync() in fw reset polling deactivation flow, in order to prevent a race condition which occurs when del_timer() is called and timer is deactivated while another process is handling the timer interrupt. A situation that led to the following call trace: RIP: 0010:run_timer_softirq+0x137/0x420 recalibrate_cpu_khz+0x10/0x10 ktime_get+0x3e/0xa0 ? sched_clock_cpu+0xb/0xc0 __do_softirq+0xf5/0x2ea irq_exit_rcu+0xc1/0xf0 sysvec_apic_timer_interrupt+0x9e/0xc0 asm_sysvec_apic_timer_interrupt+0x12/0x20 Fixes: 38b9f903f22b ("net/mlx5: Handle sync reset request event") Signed-off-by: Maher Sanalla Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 0b0234f9d694..84dbe46d5ede 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -132,7 +132,7 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; - del_timer(&fw_reset->timer); + del_timer_sync(&fw_reset->timer); } static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) From 5623ef8a118838aae65363750dfafcba734dc8cb Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 17 Jan 2022 15:00:30 +0200 Subject: [PATCH 339/367] net/mlx5e: TC, Reject rules with forward and drop actions Such rules are redundant but allowed and passed to the driver. The driver does not support offloading such rules so return an error. Fixes: 03a9d11e6eeb ("net/mlx5e: Add TC drop and mirred/redirect action parsing for SRIOV offloads") Signed-off-by: Roi Dayan Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 671f76c350db..4c6e3c26c1ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3191,6 +3191,12 @@ actions_match_supported(struct mlx5e_priv *priv, return false; } + if (!(~actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); + return false; + } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); From 55b2ca702cfa744a9eb108915996a2294da47e71 Mon Sep 17 00:00:00 2001 From: Dima Chumak Date: Mon, 17 Jan 2022 15:32:16 +0200 Subject: [PATCH 340/367] net/mlx5: Fix offloading with ESWITCH_IPV4_TTL_MODIFY_ENABLE Only prio 1 is supported for nic mode when there is no ignore flow level support in firmware. But for switchdev mode, which supports fixed number of statically pre-allocated prios, this restriction is not relevant so it can be relaxed. Fixes: d671e109bd85 ("net/mlx5: Fix tc max supported prio for nic mode") Signed-off-by: Dima Chumak Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index d5e47630e284..e94233a12a32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -121,12 +121,13 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) { - if (!mlx5_chains_prios_supported(chains)) - return 1; - if (mlx5_chains_ignore_flow_level_supported(chains)) return UINT_MAX; + if (!chains->dev->priv.eswitch || + chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS) + return 1; + /* We should get here only for eswitch case */ return FDB_TC_MAX_PRIO; } From 880b517691908fb753019b9b27cd082e7617debd Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 24 Jan 2022 13:56:26 +0200 Subject: [PATCH 341/367] net/mlx5: Bridge, Fix devlink deadlock on net namespace deletion When changing mode to switchdev, rep bridge init registered to netdevice notifier holds the devlink lock and then takes pernet_ops_rwsem. At that time deleting a netns holds pernet_ops_rwsem and then takes the devlink lock. Example sequence is: $ ip netns add foo $ devlink dev eswitch set pci/0000:00:08.0 mode switchdev & $ ip netns del foo deleting netns trace: [ 1185.365555] ? devlink_pernet_pre_exit+0x74/0x1c0 [ 1185.368331] ? mutex_lock_io_nested+0x13f0/0x13f0 [ 1185.370984] ? xt_find_table+0x40/0x100 [ 1185.373244] ? __mutex_lock+0x24a/0x15a0 [ 1185.375494] ? net_generic+0xa0/0x1c0 [ 1185.376844] ? wait_for_completion_io+0x280/0x280 [ 1185.377767] ? devlink_pernet_pre_exit+0x74/0x1c0 [ 1185.378686] devlink_pernet_pre_exit+0x74/0x1c0 [ 1185.379579] ? devlink_nl_cmd_get_dumpit+0x3a0/0x3a0 [ 1185.380557] ? xt_find_table+0xda/0x100 [ 1185.381367] cleanup_net+0x372/0x8e0 changing mode to switchdev trace: [ 1185.411267] down_write+0x13a/0x150 [ 1185.412029] ? down_write_killable+0x180/0x180 [ 1185.413005] register_netdevice_notifier+0x1e/0x210 [ 1185.414000] mlx5e_rep_bridge_init+0x181/0x360 [mlx5_core] [ 1185.415243] mlx5e_uplink_rep_enable+0x269/0x480 [mlx5_core] [ 1185.416464] ? mlx5e_uplink_rep_disable+0x210/0x210 [mlx5_core] [ 1185.417749] mlx5e_attach_netdev+0x232/0x400 [mlx5_core] [ 1185.418906] mlx5e_netdev_attach_profile+0x15b/0x1e0 [mlx5_core] [ 1185.420172] mlx5e_netdev_change_profile+0x15a/0x1d0 [mlx5_core] [ 1185.421459] mlx5e_vport_rep_load+0x557/0x780 [mlx5_core] [ 1185.422624] ? mlx5e_stats_grp_vport_rep_num_stats+0x10/0x10 [mlx5_core] [ 1185.424006] mlx5_esw_offloads_rep_load+0xdb/0x190 [mlx5_core] [ 1185.425277] esw_offloads_enable+0xd74/0x14a0 [mlx5_core] Fix this by registering rep bridges for per net netdev notifier instead of global one, which operats on the net namespace without holding the pernet_ops_rwsem. Fixes: 19e9bfa044f3 ("net/mlx5: Bridge, add offload infrastructure") Signed-off-by: Roi Dayan Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index d5cb27667005..48dc121b2cb4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -491,7 +491,7 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) } br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event; - err = register_netdevice_notifier(&br_offloads->netdev_nb); + err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb); if (err) { esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n", err); @@ -526,7 +526,7 @@ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) return; cancel_delayed_work_sync(&br_offloads->update_work); - unregister_netdevice_notifier(&br_offloads->netdev_nb); + unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb); unregister_switchdev_blocking_notifier(&br_offloads->nb_blk); unregister_switchdev_notifier(&br_offloads->nb); destroy_workqueue(br_offloads->wq); From b8d91145ed7cfa046cc07bcfb277465b9d45da73 Mon Sep 17 00:00:00 2001 From: Khalid Manaa Date: Wed, 26 Jan 2022 14:14:58 +0200 Subject: [PATCH 342/367] net/mlx5e: Fix wrong calculation of header index in HW_GRO The HW doesn't wrap the CQE.shampo.header_index field according to the headers buffer size, instead it always increases it until reaching overflow of u16 size. Thus the mlx5e_handle_rx_cqe_mpwrq_shampo handler should mask the CQE header_index field to find the actual header index in the headers buffer. Fixes: f97d5c2a453e ("net/mlx5e: Add handle SHAMPO cqe support") Signed-off-by: Khalid Manaa Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 5 +++++ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 4cdf8e5b24c2..b789af07829c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -167,6 +167,11 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size) return pi; } +static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1); +} + struct mlx5e_shampo_umr { u16 len; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e86ccc22fb82..3a79ecd38003 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1117,7 +1117,7 @@ static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct tcphdr *skb_tcp_hd) { - u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index); + u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe); struct tcphdr *last_tcp_hd; void *last_hd_addr; @@ -1973,7 +1973,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size; - u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index); + u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe); u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset); u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u32 data_offset = wqe_offset & (PAGE_SIZE - 1); From 7957837b816f11eecb9146235bb0715478f4c81f Mon Sep 17 00:00:00 2001 From: Khalid Manaa Date: Wed, 26 Jan 2022 14:25:55 +0200 Subject: [PATCH 343/367] net/mlx5e: Fix broken SKB allocation in HW-GRO In case the HW doesn't perform header-data split, it will write the whole packet into the data buffer in the WQ, in this case the SHAMPO CQE handler couldn't use the header entry to build the SKB, instead it should allocate a new memory to build the SKB using the function: mlx5e_skb_from_cqe_mpwrq_nonlinear. Fixes: f97d5c2a453e ("net/mlx5e: Add handle SHAMPO cqe support") Signed-off-by: Khalid Manaa Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3a79ecd38003..ee0a8f5206e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1871,7 +1871,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return skb; } -static void +static struct sk_buff * mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 header_index) { @@ -1895,7 +1895,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size); if (unlikely(!skb)) - return; + return NULL; /* queue up for recycling/reuse */ page_ref_inc(head->page); @@ -1907,7 +1907,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, ALIGN(head_size, sizeof(long))); if (unlikely(!skb)) { rq->stats->buff_alloc_err++; - return; + return NULL; } prefetchw(skb->data); @@ -1918,9 +1918,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, skb->tail += head_size; skb->len += head_size; } - rq->hw_gro_data->skb = skb; - NAPI_GRO_CB(skb)->count = 1; - skb_shinfo(skb)->gso_size = mpwrq_get_cqe_byte_cnt(cqe) - head_size; + return skb; } static void @@ -1980,6 +1978,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); u32 page_idx = wqe_offset >> PAGE_SHIFT; + u16 head_size = cqe->shampo.header_size; struct sk_buff **skb = &rq->hw_gro_data->skb; bool flush = cqe->shampo.flush; bool match = cqe->shampo.match; @@ -2011,9 +2010,16 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } if (!*skb) { - mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); + if (likely(head_size)) + *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); + else + *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset, + page_idx); if (unlikely(!*skb)) goto free_hd_entry; + + NAPI_GRO_CB(*skb)->count = 1; + skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size; } else { NAPI_GRO_CB(*skb)->count++; if (NAPI_GRO_CB(*skb)->count == 2 && @@ -2027,8 +2033,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } } - di = &wi->umr.dma_info[page_idx]; - mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset); + if (likely(head_size)) { + di = &wi->umr.dma_info[page_idx]; + mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset); + } mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb); if (flush) From ec41332e02bd0acf1f24206867bb6a02f5877a62 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Thu, 13 Jan 2022 15:11:42 +0200 Subject: [PATCH 344/367] net/mlx5e: Fix handling of wrong devices during bond netevent Current implementation of bond netevent handler only check if the handled netdev is VF representor and it missing a check if the VF representor is on the same phys device of the bond handling the netevent. Fix by adding the missing check and optimizing the check if the netdev is VF representor so it will not access uninitialized private data and crashes. BUG: kernel NULL pointer dereference, address: 000000000000036c PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI Workqueue: eth3bond0 bond_mii_monitor [bonding] RIP: 0010:mlx5e_is_uplink_rep+0xc/0x50 [mlx5_core] RSP: 0018:ffff88812d69fd60 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff8881cf800000 RCX: 0000000000000000 RDX: ffff88812d69fe10 RSI: 000000000000001b RDI: ffff8881cf800880 RBP: ffff8881cf800000 R08: 00000445cabccf2b R09: 0000000000000008 R10: 0000000000000004 R11: 0000000000000008 R12: ffff88812d69fe10 R13: 00000000fffffffe R14: ffff88820c0f9000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88846fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000036c CR3: 0000000103d80006 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: mlx5e_eswitch_uplink_rep+0x31/0x40 [mlx5_core] mlx5e_rep_is_lag_netdev+0x94/0xc0 [mlx5_core] mlx5e_rep_esw_bond_netevent+0xeb/0x3d0 [mlx5_core] raw_notifier_call_chain+0x41/0x60 call_netdevice_notifiers_info+0x34/0x80 netdev_lower_state_changed+0x4e/0xa0 bond_mii_monitor+0x56b/0x640 [bonding] process_one_work+0x1b9/0x390 worker_thread+0x4d/0x3d0 ? rescuer_thread+0x350/0x350 kthread+0x124/0x150 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x1f/0x30 Fixes: 7e51891a237f ("net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/rep/bond.c | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c index 9c076aa20306..b6f5c1bcdbcd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -183,18 +183,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) { - struct mlx5e_rep_priv *rpriv; - struct mlx5e_priv *priv; - - /* A given netdev is not a representor or not a slave of LAG configuration */ - if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev)) - return false; - - priv = netdev_priv(netdev); - rpriv = priv->ppriv; - - /* Egress acl forward to vport is supported only non-uplink representor */ - return rpriv->rep->vport != MLX5_VPORT_UPLINK; + return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev); } static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr) @@ -210,9 +199,6 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt u16 fwd_vport_num; int err; - if (!mlx5e_rep_is_lag_netdev(netdev)) - return; - info = ptr; lag_info = info->lower_state_info; /* This is not an event of a representor becoming active slave */ @@ -266,9 +252,6 @@ static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr) struct net_device *lag_dev; struct mlx5e_priv *priv; - if (!mlx5e_rep_is_lag_netdev(netdev)) - return; - priv = netdev_priv(netdev); rpriv = priv->ppriv; lag_dev = info->upper_dev; @@ -293,6 +276,19 @@ static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_bond *bond; + struct mlx5e_priv *priv; + + if (!mlx5e_rep_is_lag_netdev(netdev)) + return NOTIFY_DONE; + + bond = container_of(nb, struct mlx5e_rep_bond, nb); + priv = netdev_priv(netdev); + rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH); + /* Verify VF representor is on the same device of the bond handling the netevent. */ + if (rpriv->uplink_priv.bond != bond) + return NOTIFY_DONE; switch (event) { case NETDEV_CHANGELOWERSTATE: From d8e5883d694bb053b19c4142a2d1f43a34f6fe2c Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Sun, 30 Jan 2022 16:00:41 +0200 Subject: [PATCH 345/367] net/mlx5: E-Switch, Fix uninitialized variable modact The variable modact is not initialized before used in command modify header allocation which can cause command to fail. Fix by initializing modact with zeros. Addresses-Coverity: ("Uninitialized scalar variable") Fixes: 8f1e0b97cc70 ("net/mlx5: E-Switch, Mark miss packets with new chain id mapping") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index e94233a12a32..df58cba37930 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -212,7 +212,7 @@ static int create_chain_restore(struct fs_chain *chain) { struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch; - char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)]; + u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; struct mlx5_fs_chains *chains = chain->chains; enum mlx5e_tc_attr_to_reg chain_to_reg; struct mlx5_modify_hdr *mod_hdr; From 736dfe4e68b868829a1e89dfef4a44c1580d4478 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 18 Jan 2022 13:31:54 +0200 Subject: [PATCH 346/367] net/mlx5e: Don't treat small ceil values as unlimited in HTB offload The hardware spec defines max_average_bw == 0 as "unlimited bandwidth". max_average_bw is calculated as `ceil / BYTES_IN_MBIT`, which can become 0 when ceil is small, leading to an undesired effect of having no bandwidth limit. This commit fixes it by rounding up small values of ceil to 1 Mbit/s. Fixes: 214baf22870c ("net/mlx5e: Support HTB offload") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/qos.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 00449df98a5e..c1e07496c89c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -570,7 +570,8 @@ static int mlx5e_htb_convert_rate(struct mlx5e_priv *priv, u64 rate, static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw) { - *max_average_bw = div_u64(ceil, BYTES_IN_MBIT); + /* Hardware treats 0 as "unlimited", set at least 1. */ + *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1); qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n", ceil, *max_average_bw); From 5352859b3bfa0ca188b2f1d2c1436fddc781e3b6 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Thu, 2 Dec 2021 17:43:50 +0200 Subject: [PATCH 347/367] net/mlx5e: IPsec: Fix crypto offload for non TCP/UDP encapsulated traffic IPsec crypto offload always set the ethernet segment checksum flags with the inner L4 header checksum flag enabled for encapsulated IPsec offloaded packet regardless of the encapsulated L4 header type, and even if it doesn't exists in the first place, this breaks non TCP/UDP traffic as such. Set the inner L4 checksum flag only when the encapsulated L4 header protocol is TCP/UDP using software parser swp_inner_l4_offset field as indication. Fixes: 5cfb540ef27b ("net/mlx5e: Set IPsec WAs only in IP's non checksum partial case.") Signed-off-by: Raed Salem Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index b98db50c3418..428881e0adcb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -131,14 +131,17 @@ static inline bool mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) { - struct xfrm_offload *xo = xfrm_offload(skb); + u8 inner_ipproto; if (!mlx5e_ipsec_eseg_meta(eseg)) return false; eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; - if (xo->inner_ipproto) { - eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L3_INNER_CSUM; + inner_ipproto = xfrm_offload(skb)->inner_ipproto; + if (inner_ipproto) { + eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; + if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP) + eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; sq->stats->csum_partial_inner++; From de47db0cf7f4a9c555ad204e06baa70b50a70d08 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Thu, 2 Dec 2021 17:49:01 +0200 Subject: [PATCH 348/367] net/mlx5e: IPsec: Fix tunnel mode crypto offload for non TCP/UDP traffic IPsec Tunnel mode crypto offload software parser (SWP) setting in data path currently always set the inner L4 offset regardless of the encapsulated L4 header type and whether it exists in the first place, this breaks non TCP/UDP traffic as such. Set the SWP inner L4 offset only when the IPsec tunnel encapsulated L4 header protocol is TCP/UDP. While at it fix inner ip protocol read for setting MLX5_ETH_WQE_SWP_INNER_L4_UDP flag to address the case where the ip header protocol is IPv6. Fixes: f1267798c980 ("net/mlx5: Fix checksum issue of VXLAN and IPsec crypto offload") Signed-off-by: Raed Salem Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 2db9573a3fe6..b56fea142c24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -157,11 +157,20 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb, /* Tunnel mode */ if (mode == XFRM_MODE_TUNNEL) { eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; - eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; if (xo->proto == IPPROTO_IPV6) eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; - if (inner_ip_hdr(skb)->protocol == IPPROTO_UDP) + + switch (xo->inner_ipproto) { + case IPPROTO_UDP: eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + /* IP | ESP | IP | [TCP | UDP] */ + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + default: + break; + } return; } From 5b209d1a22afabfb7d644abb10510c5713a3e569 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 1 Feb 2022 15:27:48 +0200 Subject: [PATCH 349/367] net/mlx5e: Avoid implicit modify hdr for decap drop rule Currently the driver adds implicit modify hdr action for decap rules on tunnel devices if the port is an ovs port. This is also done if the action is drop and makes the modify hdr redundant and also the FW doesn't support it and will generate a syndrome. kernel: mlx5_core 0000:08:00.0: mlx5_cmd_check:777:(pid 102063): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x8708c3) Fix it by adding the implicit modify hdr only for fwd actions. Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device") Fixes: 077cdda764c7 ("net/mlx5e: TC, Fix memory leak with rules with internal port") Signed-off-by: Roi Dayan Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 4c6e3c26c1ab..2022fa4a9598 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1414,7 +1414,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_out; - if (!attr->chain && esw_attr->int_port) { + if (!attr->chain && esw_attr->int_port && + attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { /* If decap route device is internal port, change the * source vport value in reg_c0 back to uplink just in * case the rule performs goto chain > 0. If we have a miss From 6d5c900eb64107001e91e1f46bddc254dded8a59 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 Jan 2022 09:22:41 -0800 Subject: [PATCH 350/367] net/mlx5e: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Use struct_group() in struct vlan_ethhdr around members h_dest and h_source, so they can be referenced together. This will allow memcpy() and sizeof() to more easily reason about sizes, improve readability, and avoid future warnings about writing beyond the end of h_dest. "pahole" shows no size nor member offset changes to struct vlan_ethhdr. "objdump -d" shows no object code changes. Fixes: 34802a42b352 ("net/mlx5e: Do not modify the TX SKB") Signed-off-by: Kees Cook Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- include/linux/if_vlan.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 7fd33b356cc8..ee7ecb88adc1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -208,7 +208,7 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) int cpy1_sz = 2 * ETH_ALEN; int cpy2_sz = ihs - cpy1_sz; - memcpy(vhdr, skb->data, cpy1_sz); + memcpy(&vhdr->addrs, skb->data, cpy1_sz); vhdr->h_vlan_proto = skb->vlan_proto; vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 8420fe504927..2be4dd7e90a9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -46,8 +46,10 @@ struct vlan_hdr { * @h_vlan_encapsulated_proto: packet type ID or len */ struct vlan_ethhdr { - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; + struct_group(addrs, + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + ); __be16 h_vlan_proto; __be16 h_vlan_TCI; __be16 h_vlan_encapsulated_proto; From ad5185735f7dab342fdd0dd41044da4c9ccfef67 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 Jan 2022 09:20:28 -0800 Subject: [PATCH 351/367] net/mlx5e: Avoid field-overflowing memcpy() In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Use flexible arrays instead of zero-element arrays (which look like they are always overflowing) and split the cross-field memcpy() into two halves that can be appropriately bounds-checked by the compiler. We were doing: #define ETH_HLEN 14 #define VLAN_HLEN 4 ... #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) ... struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); ... struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg = wqe->data; ... memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); target is wqe->eth.inline_hdr.start (which the compiler sees as being 2 bytes in size), but copying 18, intending to write across start (really vlan_tci, 2 bytes). The remaining 16 bytes get written into wqe->data[0], covering byte_count (4 bytes), lkey (4 bytes), and addr (8 bytes). struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; /* 0 16 */ struct mlx5_wqe_eth_seg eth; /* 16 16 */ struct mlx5_wqe_data_seg data[]; /* 32 0 */ /* size: 32, cachelines: 1, members: 3 */ /* last cacheline: 32 bytes */ }; struct mlx5_wqe_eth_seg { u8 swp_outer_l4_offset; /* 0 1 */ u8 swp_outer_l3_offset; /* 1 1 */ u8 swp_inner_l4_offset; /* 2 1 */ u8 swp_inner_l3_offset; /* 3 1 */ u8 cs_flags; /* 4 1 */ u8 swp_flags; /* 5 1 */ __be16 mss; /* 6 2 */ __be32 flow_table_metadata; /* 8 4 */ union { struct { __be16 sz; /* 12 2 */ u8 start[2]; /* 14 2 */ } inline_hdr; /* 12 4 */ struct { __be16 type; /* 12 2 */ __be16 vlan_tci; /* 14 2 */ } insert; /* 12 4 */ __be32 trailer; /* 12 4 */ }; /* 12 4 */ /* size: 16, cachelines: 1, members: 9 */ /* last cacheline: 16 bytes */ }; struct mlx5_wqe_data_seg { __be32 byte_count; /* 0 4 */ __be32 lkey; /* 4 4 */ __be64 addr; /* 8 8 */ /* size: 16, cachelines: 1, members: 3 */ /* last cacheline: 16 bytes */ }; So, split the memcpy() so the compiler can reason about the buffer sizes. "pahole" shows no size nor member offset changes to struct mlx5e_tx_wqe nor struct mlx5e_umr_wqe. "objdump -d" shows no meaningful object code changes (i.e. only source line number induced differences and optimizations). Fixes: b5503b994ed5 ("net/mlx5e: XDP TX forwarding support") Signed-off-by: Kees Cook Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 812e6810cb3b..c14e06ca64d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -224,7 +224,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; - struct mlx5_wqe_data_seg data[0]; + struct mlx5_wqe_data_seg data[]; }; struct mlx5e_rx_wqe_ll { @@ -241,8 +241,8 @@ struct mlx5e_umr_wqe { struct mlx5_wqe_umr_ctrl_seg uctrl; struct mlx5_mkey_seg mkc; union { - struct mlx5_mtt inline_mtts[0]; - struct mlx5_klm inline_klms[0]; + DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts); + DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms); }; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 338d65e2c9ce..56e10c84a706 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -341,8 +341,10 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, /* copy the inline part if required */ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); + memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start)); eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start), + MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start)); dma_len -= MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE; dseg++; From c86d86131ab75696fc52d98571148842e067d620 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 2 Feb 2022 06:09:04 +0300 Subject: [PATCH 352/367] Partially revert "net/smc: Add netlink net namespace support" The change of sizeof(struct smc_diag_linkinfo) by commit 79d39fc503b4 ("net/smc: Add netlink net namespace support") introduced an ABI regression: since struct smc_diag_lgrinfo contains an object of type "struct smc_diag_linkinfo", offset of all subsequent members of struct smc_diag_lgrinfo was changed by that change. As result, applications compiled with the old version of struct smc_diag_linkinfo will receive garbage in struct smc_diag_lgrinfo.role if the kernel implements this new version of struct smc_diag_linkinfo. Fix this regression by reverting the part of commit 79d39fc503b4 that changes struct smc_diag_linkinfo. After all, there is SMC_GEN_NETLINK interface which is good enough, so there is probably no need to touch the smc_diag ABI in the first place. Fixes: 79d39fc503b4 ("net/smc: Add netlink net namespace support") Signed-off-by: Dmitry V. Levin Reviewed-by: Karsten Graul Link: https://lore.kernel.org/r/20220202030904.GA9742@altlinux.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/smc_diag.h | 11 +++++------ net/smc/smc_diag.c | 2 -- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index c7008d87f1a4..8cb3a6fef553 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -84,12 +84,11 @@ struct smc_diag_conninfo { /* SMC_DIAG_LINKINFO */ struct smc_diag_linkinfo { - __u8 link_id; /* link identifier */ - __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ - __u8 ibport; /* RDMA device port number */ - __u8 gid[40]; /* local GID */ - __u8 peer_gid[40]; /* peer GID */ - __aligned_u64 net_cookie; /* RDMA device net namespace */ + __u8 link_id; /* link identifier */ + __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ + __u8 ibport; /* RDMA device port number */ + __u8 gid[40]; /* local GID */ + __u8 peer_gid[40]; /* peer GID */ }; struct smc_diag_lgrinfo { diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index b8898c787d23..1fca2f90a9c7 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -146,13 +146,11 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_link *link = smc->conn.lnk; - struct net *net = read_pnet(&link->smcibdev->ibdev->coredev.rdma_net); struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, .lnk[0].ibport = link->ibport, .lnk[0].link_id = link->link_id, - .lnk[0].net_cookie = net->net_cookie, }; memcpy(linfo.lnk[0].ibname, From 186edf7e368c40d06cf727a1ad14698ea67b74ad Mon Sep 17 00:00:00 2001 From: Vratislav Bendel Date: Wed, 2 Feb 2022 12:25:11 +0100 Subject: [PATCH 353/367] selinux: fix double free of cond_list on error paths On error path from cond_read_list() and duplicate_policydb_cond_list() the cond_list_destroy() gets called a second time in caller functions, resulting in NULL pointer deref. Fix this by resetting the cond_list_len to 0 in cond_list_destroy(), making subsequent calls a noop. Also consistently reset the cond_list pointer to NULL after freeing. Cc: stable@vger.kernel.org Signed-off-by: Vratislav Bendel [PM: fix line lengths in the description] Signed-off-by: Paul Moore --- security/selinux/ss/conditional.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index 2ec6e5cd25d9..feb206f3acb4 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c @@ -152,6 +152,8 @@ static void cond_list_destroy(struct policydb *p) for (i = 0; i < p->cond_list_len; i++) cond_node_destroy(&p->cond_list[i]); kfree(p->cond_list); + p->cond_list = NULL; + p->cond_list_len = 0; } void cond_policydb_destroy(struct policydb *p) @@ -441,7 +443,6 @@ int cond_read_list(struct policydb *p, void *fp) return 0; err: cond_list_destroy(p); - p->cond_list = NULL; return rc; } From 81eb8b0b18789e647e65579303529fd52d861cc2 Mon Sep 17 00:00:00 2001 From: Steen Hegelund Date: Wed, 2 Feb 2022 09:30:39 +0100 Subject: [PATCH 354/367] net: sparx5: do not refer to skb after passing it on Do not try to use any SKB fields after the packet has been passed up in the receive stack. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Steen Hegelund Link: https://lore.kernel.org/r/20220202083039.3774851-1-steen.hegelund@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index dc7e5ea6ec15..148d431fcde4 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -145,9 +145,9 @@ static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap) skb_put(skb, byte_cnt - ETH_FCS_LEN); eth_skb_pad(skb); skb->protocol = eth_type_trans(skb, netdev); - netif_rx(skb); netdev->stats.rx_bytes += skb->len; netdev->stats.rx_packets++; + netif_rx(skb); } static int sparx5_inject(struct sparx5 *sparx5, From b67985be400969578d4d4b17299714c0e5d2c07b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Feb 2022 10:46:40 -0800 Subject: [PATCH 355/367] tcp: add missing tcp_skb_can_collapse() test in tcp_shift_skb_data() tcp_shift_skb_data() might collapse three packets into a larger one. P_A, P_B, P_C -> P_ABC Historically, it used a single tcp_skb_can_collapse_to(P_A) call, because it was enough. In commit 85712484110d ("tcp: coalesce/collapse must respect MPTCP extensions"), this call was replaced by a call to tcp_skb_can_collapse(P_A, P_B) But the now needed test over P_C has been missed. This probably broke MPTCP. Then later, commit 9b65b17db723 ("net: avoid double accounting for pure zerocopy skbs") added an extra condition to tcp_skb_can_collapse(), but the missing call from tcp_shift_skb_data() is also breaking TCP zerocopy, because P_A and P_C might have different skb_zcopy_pure() status. Fixes: 85712484110d ("tcp: coalesce/collapse must respect MPTCP extensions") Fixes: 9b65b17db723 ("net: avoid double accounting for pure zerocopy skbs") Signed-off-by: Eric Dumazet Cc: Mat Martineau Cc: Talal Ahmad Cc: Arjun Roy Cc: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20220201184640.756716-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dc49a3d551eb..bfe4112e000c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1660,6 +1660,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, (mss != tcp_skb_seglen(skb))) goto out; + if (!tcp_skb_can_collapse(prev, skb)) + goto out; len = skb->len; pcount = tcp_skb_pcount(skb); if (tcp_skb_shift(prev, skb, pcount, len)) From 4a81f6da9cb2d1ef911131a6fd8bd15cb61fc772 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 1 Feb 2022 20:39:42 +0100 Subject: [PATCH 356/367] net, neigh: Do not trigger immediate probes on NUD_FAILED from neigh_managed_work syzkaller was able to trigger a deadlock for NTF_MANAGED entries [0]: kworker/0:16/14617 is trying to acquire lock: ffffffff8d4dd370 (&tbl->lock){++-.}-{2:2}, at: ___neigh_create+0x9e1/0x2990 net/core/neighbour.c:652 [...] but task is already holding lock: ffffffff8d4dd370 (&tbl->lock){++-.}-{2:2}, at: neigh_managed_work+0x35/0x250 net/core/neighbour.c:1572 The neighbor entry turned to NUD_FAILED state, where __neigh_event_send() triggered an immediate probe as per commit cd28ca0a3dd1 ("neigh: reduce arp latency") via neigh_probe() given table lock was held. One option to fix this situation is to defer the neigh_probe() back to the neigh_timer_handler() similarly as pre cd28ca0a3dd1. For the case of NTF_MANAGED, this deferral is acceptable given this only happens on actual failure state and regular / expected state is NUD_VALID with the entry already present. The fix adds a parameter to __neigh_event_send() in order to communicate whether immediate probe is allowed or disallowed. Existing call-sites of neigh_event_send() default as-is to immediate probe. However, the neigh_managed_work() disables it via use of neigh_event_send_probe(). [0] __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_deadlock_bug kernel/locking/lockdep.c:2956 [inline] check_deadlock kernel/locking/lockdep.c:2999 [inline] validate_chain kernel/locking/lockdep.c:3788 [inline] __lock_acquire.cold+0x149/0x3ab kernel/locking/lockdep.c:5027 lock_acquire kernel/locking/lockdep.c:5639 [inline] lock_acquire+0x1ab/0x510 kernel/locking/lockdep.c:5604 __raw_write_lock_bh include/linux/rwlock_api_smp.h:202 [inline] _raw_write_lock_bh+0x2f/0x40 kernel/locking/spinlock.c:334 ___neigh_create+0x9e1/0x2990 net/core/neighbour.c:652 ip6_finish_output2+0x1070/0x14f0 net/ipv6/ip6_output.c:123 __ip6_finish_output net/ipv6/ip6_output.c:191 [inline] __ip6_finish_output+0x61e/0xe90 net/ipv6/ip6_output.c:170 ip6_finish_output+0x32/0x200 net/ipv6/ip6_output.c:201 NF_HOOK_COND include/linux/netfilter.h:296 [inline] ip6_output+0x1e4/0x530 net/ipv6/ip6_output.c:224 dst_output include/net/dst.h:451 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ndisc_send_skb+0xa99/0x17f0 net/ipv6/ndisc.c:508 ndisc_send_ns+0x3a9/0x840 net/ipv6/ndisc.c:650 ndisc_solicit+0x2cd/0x4f0 net/ipv6/ndisc.c:742 neigh_probe+0xc2/0x110 net/core/neighbour.c:1040 __neigh_event_send+0x37d/0x1570 net/core/neighbour.c:1201 neigh_event_send include/net/neighbour.h:470 [inline] neigh_managed_work+0x162/0x250 net/core/neighbour.c:1574 process_one_work+0x9ac/0x1650 kernel/workqueue.c:2307 worker_thread+0x657/0x1110 kernel/workqueue.c:2454 kthread+0x2e9/0x3a0 kernel/kthread.c:377 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Fixes: 7482e3841d52 ("net, neigh: Add NTF_MANAGED flag for managed neighbor entries") Reported-by: syzbot+5239d0e1778a500d477a@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Cc: Roopa Prabhu Tested-by: syzbot+5239d0e1778a500d477a@syzkaller.appspotmail.com Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220201193942.5055-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 18 +++++++++++++----- net/core/neighbour.c | 18 ++++++++++++------ 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 937389e04c8e..87419f7f5421 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -350,7 +350,8 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl, return __neigh_create(tbl, pkey, dev, true); } void neigh_destroy(struct neighbour *neigh); -int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); +int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, + const bool immediate_ok); int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); @@ -460,17 +461,24 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh) #define neigh_hold(n) refcount_inc(&(n)->refcnt) -static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +static __always_inline int neigh_event_send_probe(struct neighbour *neigh, + struct sk_buff *skb, + const bool immediate_ok) { unsigned long now = jiffies; - + if (READ_ONCE(neigh->used) != now) WRITE_ONCE(neigh->used, now); - if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) - return __neigh_event_send(neigh, skb); + if (!(neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))) + return __neigh_event_send(neigh, skb, immediate_ok); return 0; } +static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +{ + return neigh_event_send_probe(neigh, skb, true); +} + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6c2016f7f3d1..ec0bf737b076 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1133,7 +1133,8 @@ out: neigh_release(neigh); } -int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, + const bool immediate_ok) { int rc; bool immediate_probe = false; @@ -1154,12 +1155,17 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); neigh_del_timer(neigh); - neigh->nud_state = NUD_INCOMPLETE; + neigh->nud_state = NUD_INCOMPLETE; neigh->updated = now; - next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), - HZ/100); + if (!immediate_ok) { + next = now + 1; + } else { + immediate_probe = true; + next = now + max(NEIGH_VAR(neigh->parms, + RETRANS_TIME), + HZ / 100); + } neigh_add_timer(neigh, next); - immediate_probe = true; } else { neigh->nud_state = NUD_FAILED; neigh->updated = jiffies; @@ -1571,7 +1577,7 @@ static void neigh_managed_work(struct work_struct *work) write_lock_bh(&tbl->lock); list_for_each_entry(neigh, &tbl->managed_list, managed_list) - neigh_event_send(neigh, NULL); + neigh_event_send_probe(neigh, NULL, false); queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME)); write_unlock_bh(&tbl->lock); From b293dcc473d22a62dc6d78de2b15e4f49515db56 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 2 Feb 2022 14:01:58 +0800 Subject: [PATCH 357/367] bpf: Use VM_MAP instead of VM_ALLOC for ringbuf After commit 2fd3fb0be1d1 ("kasan, vmalloc: unpoison VM_ALLOC pages after mapping"), non-VM_ALLOC mappings will be marked as accessible in __get_vm_area_node() when KASAN is enabled. But now the flag for ringbuf area is VM_ALLOC, so KASAN will complain out-of-bound access after vmap() returns. Because the ringbuf area is created by mapping allocated pages, so use VM_MAP instead. After the change, info in /proc/vmallocinfo also changes from [start]-[end] 24576 ringbuf_map_alloc+0x171/0x290 vmalloc user to [start]-[end] 24576 ringbuf_map_alloc+0x171/0x290 vmap user Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Reported-by: syzbot+5ad567a418794b9b5983@syzkaller.appspotmail.com Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220202060158.6260-1-houtao1@huawei.com --- kernel/bpf/ringbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 638d7fd7b375..710ba9de12ce 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -104,7 +104,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node) } rb = vmap(pages, nr_meta_pages + 2 * nr_data_pages, - VM_ALLOC | VM_USERMAP, PAGE_KERNEL); + VM_MAP | VM_USERMAP, PAGE_KERNEL); if (rb) { kmemleak_not_leak(pages); rb->pages = pages; From c36c04c2e132fc39f6b658bf607aed4425427fd7 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Tue, 1 Feb 2022 19:23:17 -0800 Subject: [PATCH 358/367] Revert "mm/gup: small refactoring: simplify try_grab_page()" This reverts commit 54d516b1d62ff8f17cee2da06e5e4706a0d00b8a That commit did a refactoring that effectively combined fast and slow gup paths (again). And that was again incorrect, for two reasons: a) Fast gup and slow gup get reference counts on pages in different ways and with different goals: see Linus' writeup in commit cd1adf1b63a1 ("Revert "mm/gup: remove try_get_page(), call try_get_compound_head() directly""), and b) try_grab_compound_head() also has a specific check for "FOLL_LONGTERM && !is_pinned(page)", that assumes that the caller can fall back to slow gup. This resulted in new failures, as recently report by Will McVicker [1]. But (a) has problems too, even though they may not have been reported yet. So just revert this. Link: https://lore.kernel.org/r/20220131203504.3458775-1-willmcvicker@google.com [1] Fixes: 54d516b1d62f ("mm/gup: small refactoring: simplify try_grab_page()") Reported-and-tested-by: Will McVicker Cc: Christoph Hellwig Cc: Minchan Kim Cc: Matthew Wilcox Cc: Christian Borntraeger Cc: Heiko Carstens Cc: Vasily Gorbik Cc: stable@vger.kernel.org # 5.15 Signed-off-by: John Hubbard Signed-off-by: Linus Torvalds --- mm/gup.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index f0af462ac1e2..a9d4d724aef7 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -124,8 +124,8 @@ static inline struct page *try_get_compound_head(struct page *page, int refs) * considered failure, and furthermore, a likely bug in the caller, so a warning * is also emitted. */ -struct page *try_grab_compound_head(struct page *page, - int refs, unsigned int flags) +__maybe_unused struct page *try_grab_compound_head(struct page *page, + int refs, unsigned int flags) { if (flags & FOLL_GET) return try_get_compound_head(page, refs); @@ -208,10 +208,35 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags) */ bool __must_check try_grab_page(struct page *page, unsigned int flags) { - if (!(flags & (FOLL_GET | FOLL_PIN))) - return true; + WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN)); - return try_grab_compound_head(page, 1, flags); + if (flags & FOLL_GET) + return try_get_page(page); + else if (flags & FOLL_PIN) { + int refs = 1; + + page = compound_head(page); + + if (WARN_ON_ONCE(page_ref_count(page) <= 0)) + return false; + + if (hpage_pincount_available(page)) + hpage_pincount_add(page, 1); + else + refs = GUP_PIN_COUNTING_BIAS; + + /* + * Similar to try_grab_compound_head(): even if using the + * hpage_pincount_add/_sub() routines, be sure to + * *also* increment the normal page refcount field at least + * once, so that the page really is pinned. + */ + page_ref_add(page, refs); + + mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, 1); + } + + return true; } /** From 7f3bdbc3f13146eb9d07de81ea71f551587a384b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 1 Feb 2022 14:25:04 -0700 Subject: [PATCH 359/367] tools/resolve_btfids: Do not print any commands when building silently When building with 'make -s', there is some output from resolve_btfids: $ make -sj"$(nproc)" oldconfig prepare MKDIR .../tools/bpf/resolve_btfids/libbpf/ MKDIR .../tools/bpf/resolve_btfids//libsubcmd LINK resolve_btfids Silent mode means that no information should be emitted about what is currently being done. Use the $(silent) variable from Makefile.include to avoid defining the msg macro so that there is no information printed. Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object") Signed-off-by: Nathan Chancellor Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220201212503.731732-1-nathan@kernel.org --- tools/bpf/resolve_btfids/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 9ddeca947635..320a88ac28c9 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -9,7 +9,11 @@ ifeq ($(V),1) msg = else Q = @ - msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; + ifeq ($(silent),1) + msg = + else + msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; + endif MAKEFLAGS=--no-print-directory endif From 2bdfd2825c9662463371e6691b1a794e97fa36b4 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 2 Feb 2022 22:31:03 -0500 Subject: [PATCH 360/367] cgroup/cpuset: Fix "suspicious RCU usage" lockdep warning It was found that a "suspicious RCU usage" lockdep warning was issued with the rcu_read_lock() call in update_sibling_cpumasks(). It is because the update_cpumasks_hier() function may sleep. So we have to release the RCU lock, call update_cpumasks_hier() and reacquire it afterward. Also add a percpu_rwsem_assert_held() in update_sibling_cpumasks() instead of stating that in the comment. Fixes: 4716909cc5c5 ("cpuset: Track cpusets that use parent's effective_cpus") Signed-off-by: Waiman Long Tested-by: Phil Auld Reviewed-by: Phil Auld Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 804ff5738c5f..4c7254e8f49a 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1550,10 +1550,15 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, struct cpuset *sibling; struct cgroup_subsys_state *pos_css; + percpu_rwsem_assert_held(&cpuset_rwsem); + /* * Check all its siblings and call update_cpumasks_hier() * if their use_parent_ecpus flag is set in order for them * to use the right effective_cpus value. + * + * The update_cpumasks_hier() function may sleep. So we have to + * release the RCU read lock before calling it. */ rcu_read_lock(); cpuset_for_each_child(sibling, pos_css, parent) { @@ -1561,8 +1566,13 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, continue; if (!sibling->use_parent_ecpus) continue; + if (!css_tryget_online(&sibling->css)) + continue; + rcu_read_unlock(); update_cpumasks_hier(sibling, tmp); + rcu_read_lock(); + css_put(&sibling->css); } rcu_read_unlock(); } From ac62a0174d62ae0f4447c0c8cf35a8e5d793df56 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 1 Feb 2022 09:02:04 -0600 Subject: [PATCH 361/367] dt-bindings: net: qcom,ipa: add optional qcom,qmp property For some systems, the IPA driver must make a request to ensure that its registers are retained across power collapse of the IPA hardware. On such systems, we'll use the existence of the "qcom,qmp" property as a signal that this request is required. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/qcom,ipa.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index b86edf67ce62..58ecc62adfaa 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -107,6 +107,10 @@ properties: - const: imem - const: config + qcom,qmp: + $ref: /schemas/types.yaml#/definitions/phandle + description: phandle to the AOSS side-channel message RAM + qcom,smem-states: $ref: /schemas/types.yaml#/definitions/phandle-array description: State bits used in by the AP to signal the modem. @@ -222,6 +226,8 @@ examples: "imem", "config"; + qcom,qmp = <&aoss_qmp>; + qcom,smem-states = <&ipa_smp2p_out 0>, <&ipa_smp2p_out 1>; qcom,smem-state-names = "ipa-clock-enabled-valid", From 34a081761e4e3c35381cbfad609ebae2962fe2f8 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 1 Feb 2022 09:02:05 -0600 Subject: [PATCH 362/367] net: ipa: request IPA register values be retained In some cases, the IPA hardware needs to request the always-on subsystem (AOSS) to coordinate with the IPA microcontroller to retain IPA register values at power collapse. This is done by issuing a QMP request to the AOSS microcontroller. A similar request ondoes that request. We must get and hold the "QMP" handle early, because we might get back EPROBE_DEFER for that. But the actual request should be sent while we know the IPA clock is active, and when we know the microcontroller is operational. Fixes: 1aac309d3207 ("net: ipa: use autosuspend") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_power.c | 52 +++++++++++++++++++++++++++++++++++++ drivers/net/ipa/ipa_power.h | 7 +++++ drivers/net/ipa/ipa_uc.c | 5 ++++ 3 files changed, 64 insertions(+) diff --git a/drivers/net/ipa/ipa_power.c b/drivers/net/ipa/ipa_power.c index b1c6c0fcb654..f2989aac47a6 100644 --- a/drivers/net/ipa/ipa_power.c +++ b/drivers/net/ipa/ipa_power.c @@ -11,6 +11,8 @@ #include #include +#include "linux/soc/qcom/qcom_aoss.h" + #include "ipa.h" #include "ipa_power.h" #include "ipa_endpoint.h" @@ -64,6 +66,7 @@ enum ipa_power_flag { * struct ipa_power - IPA power management information * @dev: IPA device pointer * @core: IPA core clock + * @qmp: QMP handle for AOSS communication * @spinlock: Protects modem TX queue enable/disable * @flags: Boolean state flags * @interconnect_count: Number of elements in interconnect[] @@ -72,6 +75,7 @@ enum ipa_power_flag { struct ipa_power { struct device *dev; struct clk *core; + struct qmp *qmp; spinlock_t spinlock; /* used with STOPPED/STARTED power flags */ DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT); u32 interconnect_count; @@ -382,6 +386,47 @@ void ipa_power_modem_queue_active(struct ipa *ipa) clear_bit(IPA_POWER_FLAG_STARTED, ipa->power->flags); } +static int ipa_power_retention_init(struct ipa_power *power) +{ + struct qmp *qmp = qmp_get(power->dev); + + if (IS_ERR(qmp)) { + if (PTR_ERR(qmp) == -EPROBE_DEFER) + return -EPROBE_DEFER; + + /* We assume any other error means it's not defined/needed */ + qmp = NULL; + } + power->qmp = qmp; + + return 0; +} + +static void ipa_power_retention_exit(struct ipa_power *power) +{ + qmp_put(power->qmp); + power->qmp = NULL; +} + +/* Control register retention on power collapse */ +void ipa_power_retention(struct ipa *ipa, bool enable) +{ + static const char fmt[] = "{ class: bcm, res: ipa_pc, val: %c }"; + struct ipa_power *power = ipa->power; + char buf[36]; /* Exactly enough for fmt[]; size a multiple of 4 */ + int ret; + + if (!power->qmp) + return; /* Not needed on this platform */ + + (void)snprintf(buf, sizeof(buf), fmt, enable ? '1' : '0'); + + ret = qmp_send(power->qmp, buf, sizeof(buf)); + if (ret) + dev_err(power->dev, "error %d sending QMP %sable request\n", + ret, enable ? "en" : "dis"); +} + int ipa_power_setup(struct ipa *ipa) { int ret; @@ -438,12 +483,18 @@ ipa_power_init(struct device *dev, const struct ipa_power_data *data) if (ret) goto err_kfree; + ret = ipa_power_retention_init(power); + if (ret) + goto err_interconnect_exit; + pm_runtime_set_autosuspend_delay(dev, IPA_AUTOSUSPEND_DELAY); pm_runtime_use_autosuspend(dev); pm_runtime_enable(dev); return power; +err_interconnect_exit: + ipa_interconnect_exit(power); err_kfree: kfree(power); err_clk_put: @@ -460,6 +511,7 @@ void ipa_power_exit(struct ipa_power *power) pm_runtime_disable(dev); pm_runtime_dont_use_autosuspend(dev); + ipa_power_retention_exit(power); ipa_interconnect_exit(power); kfree(power); clk_put(clk); diff --git a/drivers/net/ipa/ipa_power.h b/drivers/net/ipa/ipa_power.h index 2151805d7fbb..6f84f057a209 100644 --- a/drivers/net/ipa/ipa_power.h +++ b/drivers/net/ipa/ipa_power.h @@ -40,6 +40,13 @@ void ipa_power_modem_queue_wake(struct ipa *ipa); */ void ipa_power_modem_queue_active(struct ipa *ipa); +/** + * ipa_power_retention() - Control register retention on power collapse + * @ipa: IPA pointer + * @enable: Whether retention should be enabled or disabled + */ +void ipa_power_retention(struct ipa *ipa, bool enable); + /** * ipa_power_setup() - Set up IPA power management * @ipa: IPA pointer diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c index 856e55a080a7..fe11910518d9 100644 --- a/drivers/net/ipa/ipa_uc.c +++ b/drivers/net/ipa/ipa_uc.c @@ -11,6 +11,7 @@ #include "ipa.h" #include "ipa_uc.h" +#include "ipa_power.h" /** * DOC: The IPA embedded microcontroller @@ -154,6 +155,7 @@ static void ipa_uc_response_hdlr(struct ipa *ipa, enum ipa_irq_id irq_id) case IPA_UC_RESPONSE_INIT_COMPLETED: if (ipa->uc_powered) { ipa->uc_loaded = true; + ipa_power_retention(ipa, true); pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); ipa->uc_powered = false; @@ -184,6 +186,9 @@ void ipa_uc_deconfig(struct ipa *ipa) ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_1); ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_0); + if (ipa->uc_loaded) + ipa_power_retention(ipa, false); + if (!ipa->uc_powered) return; From 67d6212afda218d564890d1674bab28e8612170f Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Thu, 27 Jan 2022 15:39:53 -0800 Subject: [PATCH 363/367] Revert "module, async: async_synchronize_full() on module init iff async is used" This reverts commit 774a1221e862b343388347bac9b318767336b20b. We need to finish all async code before the module init sequence is done. In the reverted commit the PF_USED_ASYNC flag was added to mark a thread that called async_schedule(). Then the PF_USED_ASYNC flag was used to determine whether or not async_synchronize_full() needs to be invoked. This works when modprobe thread is calling async_schedule(), but it does not work if module dispatches init code to a worker thread which then calls async_schedule(). For example, PCI driver probing is invoked from a worker thread based on a node where device is attached: if (cpu < nr_cpu_ids) error = work_on_cpu(cpu, local_pci_probe, &ddi); else error = local_pci_probe(&ddi); We end up in a situation where a worker thread gets the PF_USED_ASYNC flag set instead of the modprobe thread. As a result, async_synchronize_full() is not invoked and modprobe completes without waiting for the async code to finish. The issue was discovered while loading the pm80xx driver: (scsi_mod.scan=async) modprobe pm80xx worker ... do_init_module() ... pci_call_probe() work_on_cpu(local_pci_probe) local_pci_probe() pm8001_pci_probe() scsi_scan_host() async_schedule() worker->flags |= PF_USED_ASYNC; ... < return from worker > ... if (current->flags & PF_USED_ASYNC) <--- false async_synchronize_full(); Commit 21c3c5d28007 ("block: don't request module during elevator init") fixed the deadlock issue which the reverted commit 774a1221e862 ("module, async: async_synchronize_full() on module init iff async is used") tried to fix. Since commit 0fdff3ec6d87 ("async, kmod: warn on synchronous request_module() from async workers") synchronous module loading from async is not allowed. Given that the original deadlock issue is fixed and it is no longer allowed to call synchronous request_module() from async we can remove PF_USED_ASYNC flag to make module init consistently invoke async_synchronize_full() unless async module probe is requested. Signed-off-by: Igor Pylypiv Reviewed-by: Changyuan Lyu Reviewed-by: Luis Chamberlain Acked-by: Tejun Heo Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - kernel/async.c | 3 --- kernel/module.c | 25 +++++-------------------- 3 files changed, 5 insertions(+), 24 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index f5b2be39a78c..75ba8aa60248 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1680,7 +1680,6 @@ extern struct pid *cad_pid; #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ diff --git a/kernel/async.c b/kernel/async.c index b8d7a663497f..b2c4ba5686ee 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -205,9 +205,6 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, atomic_inc(&entry_count); spin_unlock_irqrestore(&async_lock, flags); - /* mark that this task has queued an async job, used by module init */ - current->flags |= PF_USED_ASYNC; - /* schedule for execution */ queue_work_node(node, system_unbound_wq, &entry->work); diff --git a/kernel/module.c b/kernel/module.c index 24dab046e16c..46a5c2ed1928 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3725,12 +3725,6 @@ static noinline int do_init_module(struct module *mod) } freeinit->module_init = mod->init_layout.base; - /* - * We want to find out whether @mod uses async during init. Clear - * PF_USED_ASYNC. async_schedule*() will set it. - */ - current->flags &= ~PF_USED_ASYNC; - do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) @@ -3756,22 +3750,13 @@ static noinline int do_init_module(struct module *mod) /* * We need to finish all async code before the module init sequence - * is done. This has potential to deadlock. For example, a newly - * detected block device can trigger request_module() of the - * default iosched from async probing task. Once userland helper - * reaches here, async_synchronize_full() will wait on the async - * task waiting on request_module() and deadlock. + * is done. This has potential to deadlock if synchronous module + * loading is requested from async (which is not allowed!). * - * This deadlock is avoided by perfomring async_synchronize_full() - * iff module init queued any async jobs. This isn't a full - * solution as it will deadlock the same if module loading from - * async jobs nests more than once; however, due to the various - * constraints, this hack seems to be the best option for now. - * Please refer to the following thread for details. - * - * http://thread.gmane.org/gmane.linux.kernel/1420814 + * See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous + * request_module() from async workers") for more details. */ - if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) + if (!mod->async_probe_requested) async_synchronize_full(); ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base + From 1f2cfdd349b7647f438c1e552dc1b983da86d830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 3 Feb 2022 15:50:29 +0100 Subject: [PATCH 364/367] printk: Fix incorrect __user type in proc_dointvec_minmax_sysadmin() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The move of proc_dointvec_minmax_sysadmin() from kernel/sysctl.c to kernel/printk/sysctl.c introduced an incorrect __user attribute to the buffer argument. I spotted this change in [1] as well as the kernel test robot. Revert this change to please sparse: kernel/printk/sysctl.c:20:51: warning: incorrect type in argument 3 (different address spaces) kernel/printk/sysctl.c:20:51: expected void * kernel/printk/sysctl.c:20:51: got void [noderef] __user *buffer Fixes: faaa357a55e0 ("printk: move printk sysctl to printk/sysctl.c") Link: https://lore.kernel.org/r/20220104155024.48023-2-mic@digikod.net [1] Reported-by: kernel test robot Cc: Andrew Morton Cc: John Ogness Cc: Luis Chamberlain Cc: Petr Mladek Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Xiaoming Ni Signed-off-by: Mickaël Salaün Link: https://lore.kernel.org/r/20220203145029.272640-1-mic@digikod.net Signed-off-by: Linus Torvalds --- kernel/printk/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c index 653ae04aab7f..c228343eeb97 100644 --- a/kernel/printk/sysctl.c +++ b/kernel/printk/sysctl.c @@ -12,7 +12,7 @@ static const int ten_thousand = 10000; static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { if (write && !capable(CAP_SYS_ADMIN)) return -EPERM; From 80d4609008e6d696a279e39ae7458c916fcd44c1 Mon Sep 17 00:00:00 2001 From: Yannick Vignon Date: Thu, 3 Feb 2022 17:00:25 +0100 Subject: [PATCH 365/367] net: stmmac: ensure PTP time register reads are consistent Even if protected from preemption and interrupts, a small time window remains when the 2 register reads could return inconsistent values, each time the "seconds" register changes. This could lead to an about 1-second error in the reported time. Add logic to ensure the "seconds" and "nanoseconds" values are consistent. Fixes: 92ba6888510c ("stmmac: add the support for PTP hw clock driver") Signed-off-by: Yannick Vignon Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20220203160025.750632-1-yannick.vignon@oss.nxp.com Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 074e2cdfb0fa..a7ec9f4d46ce 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -145,15 +145,20 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, static void get_systime(void __iomem *ioaddr, u64 *systime) { - u64 ns; + u64 ns, sec0, sec1; - /* Get the TSSS value */ - ns = readl(ioaddr + PTP_STNSR); - /* Get the TSS and convert sec time value to nanosecond */ - ns += readl(ioaddr + PTP_STSR) * 1000000000ULL; + /* Get the TSS value */ + sec1 = readl_relaxed(ioaddr + PTP_STSR); + do { + sec0 = sec1; + /* Get the TSSS value */ + ns = readl_relaxed(ioaddr + PTP_STNSR); + /* Get the TSS value */ + sec1 = readl_relaxed(ioaddr + PTP_STSR); + } while (sec0 != sec1); if (systime) - *systime = ns; + *systime = ns + (sec1 * 1000000000ULL); } static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time) From 87563a043cef044fed5db7967a75741cc16ad2b1 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 3 Feb 2022 23:08:11 +0800 Subject: [PATCH 366/367] ax25: fix reference count leaks of ax25_dev The previous commit d01ffb9eee4a ("ax25: add refcount in ax25_dev to avoid UAF bugs") introduces refcount into ax25_dev, but there are reference leak paths in ax25_ctl_ioctl(), ax25_fwd_ioctl(), ax25_rt_add(), ax25_rt_del() and ax25_rt_opt(). This patch uses ax25_dev_put() and adjusts the position of ax25_addr_ax25dev() to fix reference cout leaks of ax25_dev. Fixes: d01ffb9eee4a ("ax25: add refcount in ax25_dev to avoid UAF bugs") Signed-off-by: Duoming Zhou Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20220203150811.42256-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski --- include/net/ax25.h | 8 +++++--- net/ax25/af_ax25.c | 12 ++++++++---- net/ax25/ax25_dev.c | 24 +++++++++++++++++------- net/ax25/ax25_route.c | 16 +++++++++++----- 4 files changed, 41 insertions(+), 19 deletions(-) diff --git a/include/net/ax25.h b/include/net/ax25.h index 50b417df6221..8221af1811df 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -294,10 +294,12 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) } } -#define ax25_dev_hold(__ax25_dev) \ - refcount_inc(&((__ax25_dev)->refcount)) +static inline void ax25_dev_hold(ax25_dev *ax25_dev) +{ + refcount_inc(&ax25_dev->refcount); +} -static __inline__ void ax25_dev_put(ax25_dev *ax25_dev) +static inline void ax25_dev_put(ax25_dev *ax25_dev) { if (refcount_dec_and_test(&ax25_dev->refcount)) { kfree(ax25_dev); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 32f61978ff29..3e49d28824ed 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -359,21 +359,25 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl))) return -EFAULT; - if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL) - return -ENODEV; - if (ax25_ctl.digi_count > AX25_MAX_DIGIS) return -EINVAL; if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL) return -EINVAL; + ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr); + if (!ax25_dev) + return -ENODEV; + digi.ndigi = ax25_ctl.digi_count; for (k = 0; k < digi.ndigi; k++) digi.calls[k] = ax25_ctl.digi_addr[k]; - if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL) + ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev); + if (!ax25) { + ax25_dev_put(ax25_dev); return -ENOTCONN; + } switch (ax25_ctl.cmd) { case AX25_KILL: diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 770b787fb7bb..d2a244e1c260 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -85,8 +85,8 @@ void ax25_dev_device_up(struct net_device *dev) spin_lock_bh(&ax25_dev_lock); ax25_dev->next = ax25_dev_list; ax25_dev_list = ax25_dev; - ax25_dev_hold(ax25_dev); spin_unlock_bh(&ax25_dev_lock); + ax25_dev_hold(ax25_dev); ax25_register_dev_sysctl(ax25_dev); } @@ -115,8 +115,8 @@ void ax25_dev_device_down(struct net_device *dev) if ((s = ax25_dev_list) == ax25_dev) { ax25_dev_list = s->next; - ax25_dev_put(ax25_dev); spin_unlock_bh(&ax25_dev_lock); + ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; dev_put_track(dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); @@ -126,8 +126,8 @@ void ax25_dev_device_down(struct net_device *dev) while (s != NULL && s->next != NULL) { if (s->next == ax25_dev) { s->next = ax25_dev->next; - ax25_dev_put(ax25_dev); spin_unlock_bh(&ax25_dev_lock); + ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; dev_put_track(dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); @@ -150,25 +150,35 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) switch (cmd) { case SIOCAX25ADDFWD: - if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL) + fwd_dev = ax25_addr_ax25dev(&fwd->port_to); + if (!fwd_dev) { + ax25_dev_put(ax25_dev); return -EINVAL; - if (ax25_dev->forward != NULL) + } + if (ax25_dev->forward) { + ax25_dev_put(fwd_dev); + ax25_dev_put(ax25_dev); return -EINVAL; + } ax25_dev->forward = fwd_dev->dev; ax25_dev_put(fwd_dev); + ax25_dev_put(ax25_dev); break; case SIOCAX25DELFWD: - if (ax25_dev->forward == NULL) + if (!ax25_dev->forward) { + ax25_dev_put(ax25_dev); return -EINVAL; + } ax25_dev->forward = NULL; + ax25_dev_put(ax25_dev); break; default: + ax25_dev_put(ax25_dev); return -EINVAL; } - ax25_dev_put(ax25_dev); return 0; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 1e32693833e5..9751207f7757 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -75,11 +75,13 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_dev *ax25_dev; int i; - if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL) - return -EINVAL; if (route->digi_count > AX25_MAX_DIGIS) return -EINVAL; + ax25_dev = ax25_addr_ax25dev(&route->port_addr); + if (!ax25_dev) + return -EINVAL; + write_lock_bh(&ax25_route_lock); ax25_rt = ax25_route_list; @@ -91,6 +93,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if (route->digi_count != 0) { if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; @@ -101,6 +104,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) } } write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } ax25_rt = ax25_rt->next; @@ -108,6 +112,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return -ENOMEM; } @@ -116,11 +121,11 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_rt->dev = ax25_dev->dev; ax25_rt->digipeat = NULL; ax25_rt->ip_mode = ' '; - ax25_dev_put(ax25_dev); if (route->digi_count != 0) { if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); kfree(ax25_rt); + ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; @@ -133,6 +138,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_rt->next = ax25_route_list; ax25_route_list = ax25_rt; write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } @@ -173,8 +179,8 @@ static int ax25_rt_del(struct ax25_routes_struct *route) } } } - ax25_dev_put(ax25_dev); write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } @@ -216,8 +222,8 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option) } out: - ax25_dev_put(ax25_dev); write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return err; } From dcb85f85fa6f142aae1fe86f399d4503d49f2b60 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 3 Feb 2022 12:17:54 -0800 Subject: [PATCH 367/367] gcc-plugins/stackleak: Use noinstr in favor of notrace While the stackleak plugin was already using notrace, objtool is now a bit more picky. Update the notrace uses to noinstr. Silences the following objtool warnings when building with: CONFIG_DEBUG_ENTRY=y CONFIG_STACK_VALIDATION=y CONFIG_VMLINUX_VALIDATION=y CONFIG_GCC_PLUGIN_STACKLEAK=y vmlinux.o: warning: objtool: do_syscall_64()+0x9: call to stackleak_track_stack() leaves .noinstr.text section vmlinux.o: warning: objtool: do_int80_syscall_32()+0x9: call to stackleak_track_stack() leaves .noinstr.text section vmlinux.o: warning: objtool: exc_general_protection()+0x22: call to stackleak_track_stack() leaves .noinstr.text section vmlinux.o: warning: objtool: fixup_bad_iret()+0x20: call to stackleak_track_stack() leaves .noinstr.text section vmlinux.o: warning: objtool: do_machine_check()+0x27: call to stackleak_track_stack() leaves .noinstr.text section vmlinux.o: warning: objtool: .text+0x5346e: call to stackleak_erase() leaves .noinstr.text section vmlinux.o: warning: objtool: .entry.text+0x143: call to stackleak_erase() leaves .noinstr.text section vmlinux.o: warning: objtool: .entry.text+0x10eb: call to stackleak_erase() leaves .noinstr.text section vmlinux.o: warning: objtool: .entry.text+0x17f9: call to stackleak_erase() leaves .noinstr.text section Note that the plugin's addition of calls to stackleak_track_stack() from noinstr functions is expected to be safe, as it isn't runtime instrumentation and is self-contained. Cc: Alexander Popov Suggested-by: Peter Zijlstra Signed-off-by: Kees Cook Signed-off-by: Linus Torvalds --- kernel/stackleak.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/stackleak.c b/kernel/stackleak.c index 66b8af394e58..ddb5a7f48d69 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -70,7 +70,7 @@ late_initcall(stackleak_sysctls_init); #define skip_erasing() false #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */ -asmlinkage void notrace stackleak_erase(void) +asmlinkage void noinstr stackleak_erase(void) { /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */ unsigned long kstack_ptr = current->lowest_stack; @@ -124,9 +124,8 @@ asmlinkage void notrace stackleak_erase(void) /* Reset the 'lowest_stack' value for the next syscall */ current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64; } -NOKPROBE_SYMBOL(stackleak_erase); -void __used __no_caller_saved_registers notrace stackleak_track_stack(void) +void __used __no_caller_saved_registers noinstr stackleak_track_stack(void) { unsigned long sp = current_stack_pointer;