From 633efc8b3dc96f56f5a57f2a49764853a2fa3f50 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Wed, 26 Oct 2022 10:03:21 +0800 Subject: [PATCH 01/53] net: dsa: Fix possible memory leaks in dsa_loop_init() kmemleak reported memory leaks in dsa_loop_init(): kmemleak: 12 new suspected memory leaks unreferenced object 0xffff8880138ce000 (size 2048): comm "modprobe", pid 390, jiffies 4295040478 (age 238.976s) backtrace: [<000000006a94f1d5>] kmalloc_trace+0x26/0x60 [<00000000a9c44622>] phy_device_create+0x5d/0x970 [<00000000d0ee2afc>] get_phy_device+0xf3/0x2b0 [<00000000dca0c71f>] __fixed_phy_register.part.0+0x92/0x4e0 [<000000008a834798>] fixed_phy_register+0x84/0xb0 [<0000000055223fcb>] dsa_loop_init+0xa9/0x116 [dsa_loop] ... There are two reasons for memleak in dsa_loop_init(). First, fixed_phy_register() create and register phy_device: fixed_phy_register() get_phy_device() phy_device_create() # freed by phy_device_free() phy_device_register() # freed by phy_device_remove() But fixed_phy_unregister() only calls phy_device_remove(). So the memory allocated in phy_device_create() is leaked. Second, when mdio_driver_register() fail in dsa_loop_init(), it just returns and there is no cleanup for phydevs. Fix the problems by catching the error of mdio_driver_register() in dsa_loop_init(), then calling both fixed_phy_unregister() and phy_device_free() to release phydevs. Also add a function for phydevs cleanup to avoid duplacate. Fixes: 98cd1552ea27 ("net: dsa: Mock-up driver") Signed-off-by: Chen Zhongjin Signed-off-by: David S. Miller --- drivers/net/dsa/dsa_loop.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index b9107fe40023..5b139f2206b6 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -376,6 +376,17 @@ static struct mdio_driver dsa_loop_drv = { #define NUM_FIXED_PHYS (DSA_LOOP_NUM_PORTS - 2) +static void dsa_loop_phydevs_unregister(void) +{ + unsigned int i; + + for (i = 0; i < NUM_FIXED_PHYS; i++) + if (!IS_ERR(phydevs[i])) { + fixed_phy_unregister(phydevs[i]); + phy_device_free(phydevs[i]); + } +} + static int __init dsa_loop_init(void) { struct fixed_phy_status status = { @@ -383,23 +394,23 @@ static int __init dsa_loop_init(void) .speed = SPEED_100, .duplex = DUPLEX_FULL, }; - unsigned int i; + unsigned int i, ret; for (i = 0; i < NUM_FIXED_PHYS; i++) phydevs[i] = fixed_phy_register(PHY_POLL, &status, NULL); - return mdio_driver_register(&dsa_loop_drv); + ret = mdio_driver_register(&dsa_loop_drv); + if (ret) + dsa_loop_phydevs_unregister(); + + return ret; } module_init(dsa_loop_init); static void __exit dsa_loop_exit(void) { - unsigned int i; - mdio_driver_unregister(&dsa_loop_drv); - for (i = 0; i < NUM_FIXED_PHYS; i++) - if (!IS_ERR(phydevs[i])) - fixed_phy_unregister(phydevs[i]); + dsa_loop_phydevs_unregister(); } module_exit(dsa_loop_exit); From 8fdf3f6aba7cfa0c0e2bf66ecca7bb5783acd0d6 Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Wed, 26 Oct 2022 20:45:24 +0530 Subject: [PATCH 02/53] net: emaclite: update reset_lock member documentation Instead of generic description, mention what reset_lock actually protects i.e. lock to serialize xmit and tx_timeout execution. Signed-off-by: Radhey Shyam Pandey Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 05848ff15fb5..a3967f8de417 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -108,7 +108,7 @@ * @next_tx_buf_to_use: next Tx buffer to write to * @next_rx_buf_to_use: next Rx buffer to read from * @base_addr: base address of the Emaclite device - * @reset_lock: lock used for synchronization + * @reset_lock: lock to serialize xmit and tx_timeout execution * @deferred_skb: holds an skb (for transmission at a later time) when the * Tx buffer is not free * @phy_dev: pointer to the PHY device From 7354c9024f2835f6122ed9612e21ab379df050f9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Oct 2022 14:21:07 -0700 Subject: [PATCH 03/53] netlink: hide validation union fields from kdoc Mark the validation fields as private, users shouldn't set them directly and they are too complicated to explain in a more succinct way (there's already a long explanation in the comment above). The strict_start_type field is set directly and has a dedicated comment so move that above the "private" section. Link: https://lore.kernel.org/r/20221027212107.2639255-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index 4418b1981e31..7db13b3261fc 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -317,19 +317,10 @@ struct nla_policy { u8 validation_type; u16 len; union { - const u32 bitfield32_valid; - const u32 mask; - const char *reject_message; - const struct nla_policy *nested_policy; - struct netlink_range_validation *range; - struct netlink_range_validation_signed *range_signed; - struct { - s16 min, max; - u8 network_byte_order:1; - }; - int (*validate)(const struct nlattr *attr, - struct netlink_ext_ack *extack); - /* This entry is special, and used for the attribute at index 0 + /** + * @strict_start_type: first attribute to validate strictly + * + * This entry is special, and used for the attribute at index 0 * only, and specifies special data about the policy, namely it * specifies the "boundary type" where strict length validation * starts for any attribute types >= this value, also, strict @@ -348,6 +339,20 @@ struct nla_policy { * was added to enforce strict validation from thereon. */ u16 strict_start_type; + + /* private: use NLA_POLICY_*() to set */ + const u32 bitfield32_valid; + const u32 mask; + const char *reject_message; + const struct nla_policy *nested_policy; + struct netlink_range_validation *range; + struct netlink_range_validation_signed *range_signed; + struct { + s16 min, max; + u8 network_byte_order:1; + }; + int (*validate)(const struct nlattr *attr, + struct netlink_ext_ack *extack); }; }; From e4ba4554209f626c52e2e57f26cba49a62663c8b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Oct 2022 20:25:01 -0700 Subject: [PATCH 04/53] net: openvswitch: add missing .resv_start_op I missed one of the families in OvS when annotating .resv_start_op. This triggers the warning added in commit ce48ebdd5651 ("genetlink: limit the use of validation workarounds to old ops"). Reported-by: syzbot+40eb8c0447c0e47a7e9b@syzkaller.appspotmail.com Fixes: 9c5d03d36251 ("genetlink: start to validate reserved header bytes") Link: https://lore.kernel.org/r/20221028032501.2724270-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/openvswitch/datapath.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 155263e73512..8b84869eb2ac 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2544,6 +2544,7 @@ struct genl_family dp_vport_genl_family __ro_after_init = { .parallel_ops = true, .small_ops = dp_vport_genl_ops, .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops), + .resv_start_op = OVS_VPORT_CMD_SET + 1, .mcgrps = &ovs_dp_vport_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, From 1208b93dd901bafe2526fa9db005bbc30e7ae83a Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Thu, 27 Oct 2022 21:21:59 -0700 Subject: [PATCH 05/53] enic: MAINTAINERS: Update enic maintainers Update enic maintainers. Signed-off-by: Govindarajulu Varadarajan Link: https://lore.kernel.org/r/20221028042159.735670-1-govind.varadar@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 10c1344b4473..9e437612dd81 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5038,7 +5038,7 @@ F: drivers/scsi/snic/ CISCO VIC ETHERNET NIC DRIVER M: Christian Benvenuti -M: Govindarajulu Varadarajan <_govind@gmx.com> +M: Satish Kharat S: Supported F: drivers/net/ethernet/cisco/enic/ From 8f279fb00bb29def9ac79e28c5d6d8e07d21f3fb Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Oct 2022 00:25:56 +0100 Subject: [PATCH 06/53] udp: advertise ipv6 udp support for msghdr::ubuf_info Mark udp ipv6 as supporting msghdr::ubuf_info. In the original commit SOCK_SUPPORT_ZC was supposed to be set by a udp_init_sock() call from udp6_init_sock(), but d38afeec26ed4 ("tcp/udp: Call inet6_destroy_sock() in IPv6 ...") removed it and so ipv6 udp misses the flag. Cc: # 6.0 Fixes: e993ffe3da4bc ("net: flag sockets supporting msghdr originated zerocopy") Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- net/ipv6/udp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 129ec5a9b0eb..bc65e5b7195b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -66,6 +66,7 @@ int udpv6_init_sock(struct sock *sk) { skb_queue_head_init(&udp_sk(sk)->reader_queue); sk->sk_destruct = udpv6_destruct_sock; + set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); return 0; } From fee9ac06647e59a69fb7aec58f25267c134264b4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Oct 2022 00:25:57 +0100 Subject: [PATCH 07/53] net: remove SOCK_SUPPORT_ZC from sockmap sockmap replaces ->sk_prot with its own callbacks, we should remove SOCK_SUPPORT_ZC as the new proto doesn't support msghdr::ubuf_info. Cc: # 6.0 Reported-by: Jakub Kicinski Fixes: e993ffe3da4bc ("net: flag sockets supporting msghdr originated zerocopy") Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- include/net/sock.h | 7 +++++++ net/ipv4/tcp_bpf.c | 4 ++-- net/ipv4/udp_bpf.c | 4 ++-- net/unix/unix_bpf.c | 8 ++++---- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 22f8bab583dd..5db02546941c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1889,6 +1889,13 @@ void sock_kfree_s(struct sock *sk, void *mem, int size); void sock_kzfree_s(struct sock *sk, void *mem, int size); void sk_send_sigurg(struct sock *sk); +static inline void sock_replace_proto(struct sock *sk, struct proto *proto) +{ + if (sk->sk_socket) + clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + WRITE_ONCE(sk->sk_prot, proto); +} + struct sockcm_cookie { u64 transmit_time; u32 mark; diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index a1626afe87a1..c501c329b1db 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -607,7 +607,7 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) } else { sk->sk_write_space = psock->saved_write_space; /* Pairs with lockless read in sk_clone_lock() */ - WRITE_ONCE(sk->sk_prot, psock->sk_proto); + sock_replace_proto(sk, psock->sk_proto); } return 0; } @@ -620,7 +620,7 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) } /* Pairs with lockless read in sk_clone_lock() */ - WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]); + sock_replace_proto(sk, &tcp_bpf_prots[family][config]); return 0; } EXPORT_SYMBOL_GPL(tcp_bpf_update_proto); diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index ff15918b7bdc..e5dc91d0e079 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -141,14 +141,14 @@ int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) if (restore) { sk->sk_write_space = psock->saved_write_space; - WRITE_ONCE(sk->sk_prot, psock->sk_proto); + sock_replace_proto(sk, psock->sk_proto); return 0; } if (sk->sk_family == AF_INET6) udp_bpf_check_v6_needs_rebuild(psock->sk_proto); - WRITE_ONCE(sk->sk_prot, &udp_bpf_prots[family]); + sock_replace_proto(sk, &udp_bpf_prots[family]); return 0; } EXPORT_SYMBOL_GPL(udp_bpf_update_proto); diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 7cf14c6b1725..e9bf15513961 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -145,12 +145,12 @@ int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool re if (restore) { sk->sk_write_space = psock->saved_write_space; - WRITE_ONCE(sk->sk_prot, psock->sk_proto); + sock_replace_proto(sk, psock->sk_proto); return 0; } unix_dgram_bpf_check_needs_rebuild(psock->sk_proto); - WRITE_ONCE(sk->sk_prot, &unix_dgram_bpf_prot); + sock_replace_proto(sk, &unix_dgram_bpf_prot); return 0; } @@ -158,12 +158,12 @@ int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool r { if (restore) { sk->sk_write_space = psock->saved_write_space; - WRITE_ONCE(sk->sk_prot, psock->sk_proto); + sock_replace_proto(sk, psock->sk_proto); return 0; } unix_stream_bpf_check_needs_rebuild(psock->sk_proto); - WRITE_ONCE(sk->sk_prot, &unix_stream_bpf_prot); + sock_replace_proto(sk, &unix_stream_bpf_prot); return 0; } From e276d62dcfdee6582486e8b8344dd869518e14be Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Oct 2022 00:25:58 +0100 Subject: [PATCH 08/53] net/ulp: remove SOCK_SUPPORT_ZC from tls sockets Remove SOCK_SUPPORT_ZC when we're setting ulp as it might not support msghdr::ubuf_info, e.g. like TLS replacing ->sk_prot with a new set of handlers. Cc: # 6.0 Reported-by: Jakub Kicinski Fixes: e993ffe3da4bc ("net: flag sockets supporting msghdr originated zerocopy") Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ulp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 7c27aa629af1..9ae50b1bd844 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -136,6 +136,9 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops) if (icsk->icsk_ulp_ops) goto out_err; + if (sk->sk_socket) + clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + err = ulp_ops->init(sk); if (err) goto out_err; From 71b7786ea478f3c4611deff4d2b9676b0c17c56b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 27 Oct 2022 00:25:59 +0100 Subject: [PATCH 09/53] net: also flag accepted sockets supporting msghdr originated zerocopy Without this only the client initiated tcp sockets have SOCK_SUPPORT_ZC. The listening socket on the server also has it, but the accepted connections didn't, which meant IORING_OP_SEND[MSG]_ZC will always fails with -EOPNOTSUPP. Fixes: e993ffe3da4b ("net: flag sockets supporting msghdr originated zerocopy") Cc: # 6.0 CC: Jens Axboe Link: https://lore.kernel.org/io-uring/20221024141503.22b4e251@kernel.org/T/#m38aa19b0b825758fb97860a38ad13122051f9dda Signed-off-by: Stefan Metzmacher Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- net/ipv4/af_inet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3dd02396517d..4728087c42a5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -754,6 +754,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags, (TCPF_ESTABLISHED | TCPF_SYN_RECV | TCPF_CLOSE_WAIT | TCPF_CLOSE))); + if (test_bit(SOCK_SUPPORT_ZC, &sock->flags)) + set_bit(SOCK_SUPPORT_ZC, &newsock->flags); sock_graft(sk2, newsock); newsock->state = SS_CONNECTED; From 21ce2c121fa07b00b0906bd781590ea362e82ea2 Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Thu, 27 Oct 2022 12:56:55 +0300 Subject: [PATCH 10/53] net: ethernet: adi: adin1110: Fix notifiers ADIN1110 was registering netdev_notifiers on each device probe. This leads to warnings/probe failures because of double registration of the same notifier when to adin1110/2111 devices are connected to the same system. Move the registration of netdev_notifiers in module init call, in this way multiple driver instances can use the same notifiers. Fixes: bc93e19d088b ("net: ethernet: adi: Add ADIN1110 support") Signed-off-by: Alexandru Tachici Link: https://lore.kernel.org/r/20221027095655.89890-2-alexandru.tachici@analog.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/adi/adin1110.c | 38 ++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c index 1744d623999d..606c97610808 100644 --- a/drivers/net/ethernet/adi/adin1110.c +++ b/drivers/net/ethernet/adi/adin1110.c @@ -1512,16 +1512,15 @@ static struct notifier_block adin1110_switchdev_notifier = { .notifier_call = adin1110_switchdev_event, }; -static void adin1110_unregister_notifiers(void *data) +static void adin1110_unregister_notifiers(void) { unregister_switchdev_blocking_notifier(&adin1110_switchdev_blocking_notifier); unregister_switchdev_notifier(&adin1110_switchdev_notifier); unregister_netdevice_notifier(&adin1110_netdevice_nb); } -static int adin1110_setup_notifiers(struct adin1110_priv *priv) +static int adin1110_setup_notifiers(void) { - struct device *dev = &priv->spidev->dev; int ret; ret = register_netdevice_notifier(&adin1110_netdevice_nb); @@ -1536,13 +1535,14 @@ static int adin1110_setup_notifiers(struct adin1110_priv *priv) if (ret < 0) goto err_sdev; - return devm_add_action_or_reset(dev, adin1110_unregister_notifiers, NULL); + return 0; err_sdev: unregister_switchdev_notifier(&adin1110_switchdev_notifier); err_netdev: unregister_netdevice_notifier(&adin1110_netdevice_nb); + return ret; } @@ -1613,10 +1613,6 @@ static int adin1110_probe_netdevs(struct adin1110_priv *priv) if (ret < 0) return ret; - ret = adin1110_setup_notifiers(priv); - if (ret < 0) - return ret; - for (i = 0; i < priv->cfg->ports_nr; i++) { ret = devm_register_netdev(dev, priv->ports[i]->netdev); if (ret < 0) { @@ -1693,7 +1689,31 @@ static struct spi_driver adin1110_driver = { .probe = adin1110_probe, .id_table = adin1110_spi_id, }; -module_spi_driver(adin1110_driver); + +static int __init adin1110_driver_init(void) +{ + int ret; + + ret = adin1110_setup_notifiers(); + if (ret < 0) + return ret; + + ret = spi_register_driver(&adin1110_driver); + if (ret < 0) { + adin1110_unregister_notifiers(); + return ret; + } + + return 0; +} + +static void __exit adin1110_exit(void) +{ + adin1110_unregister_notifiers(); + spi_unregister_driver(&adin1110_driver); +} +module_init(adin1110_driver_init); +module_exit(adin1110_exit); MODULE_DESCRIPTION("ADIN1110 Network driver"); MODULE_AUTHOR("Alexandru Tachici "); From a2c65a9d0568b6737c02b54f00b80716a53fac61 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 27 Oct 2022 17:54:39 +0300 Subject: [PATCH 11/53] net: dsa: fall back to default tagger if we can't load the one from DT DSA tagging protocol drivers can be changed at runtime through sysfs and at probe time through the device tree (support for the latter was added later). When changing through sysfs, it is assumed that the module for the new tagging protocol was already loaded into the kernel (in fact this is only a concern for Ocelot/Felix switches, where we have tag_ocelot.ko and tag_ocelot_8021q.ko; for every other switch, the default and alternative protocols are compiled within the same .ko, so there is nothing for the user to load). The kernel cannot currently call request_module(), because it has no way of constructing the modalias name of the tagging protocol driver ("dsa_tag-%d", where the number is one of DSA_TAG_PROTO_*_VALUE). The device tree only contains the string name of the tagging protocol ("ocelot-8021q"), and the only mapping between the string and the DSA_TAG_PROTO_OCELOT_8021Q_VALUE is present in tag_ocelot_8021q.ko. So this is a chicken-and-egg situation and dsa_core.ko has nothing based on which it can automatically request the insertion of the module. As a consequence, if CONFIG_NET_DSA_TAG_OCELOT_8021Q is built as module, the switch will forever defer probing. The long-term solution is to make DSA call request_module() somehow, but that probably needs some refactoring. What we can do to keep operating with existing device tree blobs is to cancel the attempt to change the tagging protocol with the one specified there, and to remain operating with the default one. Depending on the situation, the default protocol might still allow some functionality (in the case of ocelot, it does), and it's better to have that than to fail to probe. Fixes: deff710703d8 ("net: dsa: Allow default tag protocol to be overridden from DT") Link: https://lore.kernel.org/lkml/20221027113248.420216-1-michael@walle.cc/ Reported-by: Heiko Thiery Reported-by: Michael Walle Signed-off-by: Vladimir Oltean Tested-by: Michael Walle Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221027145439.3086017-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa2.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index af0e2c0394ac..e504a18fc125 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1409,9 +1409,9 @@ static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp, static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, const char *user_protocol) { + const struct dsa_device_ops *tag_ops = NULL; struct dsa_switch *ds = dp->ds; struct dsa_switch_tree *dst = ds->dst; - const struct dsa_device_ops *tag_ops; enum dsa_tag_protocol default_proto; /* Find out which protocol the switch would prefer. */ @@ -1434,10 +1434,17 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, } tag_ops = dsa_find_tagger_by_name(user_protocol); - } else { - tag_ops = dsa_tag_driver_get(default_proto); + if (IS_ERR(tag_ops)) { + dev_warn(ds->dev, + "Failed to find a tagging driver for protocol %s, using default\n", + user_protocol); + tag_ops = NULL; + } } + if (!tag_ops) + tag_ops = dsa_tag_driver_get(default_proto); + if (IS_ERR(tag_ops)) { if (PTR_ERR(tag_ops) == -ENOPROTOOPT) return -EPROBE_DEFER; From 8e4aae6b8ca76afb1fb64dcb24be44ba814e7f8a Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 27 Oct 2022 22:03:29 +0800 Subject: [PATCH 12/53] nfc: fdp: Fix potential memory leak in fdp_nci_send() fdp_nci_send() will call fdp_nci_i2c_write that will not free skb in the function. As a result, when fdp_nci_i2c_write() finished, the skb will memleak. fdp_nci_send() should free skb after fdp_nci_i2c_write() finished. Fixes: a06347c04c13 ("NFC: Add Intel Fields Peak NFC solution driver") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller --- drivers/nfc/fdp/fdp.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index c6b3334f24c9..f12f903a9dd1 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -249,11 +249,19 @@ static int fdp_nci_close(struct nci_dev *ndev) static int fdp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) { struct fdp_nci_info *info = nci_get_drvdata(ndev); + int ret; if (atomic_dec_and_test(&info->data_pkt_counter)) info->data_pkt_counter_cb(ndev); - return info->phy_ops->write(info->phy, skb); + ret = info->phy_ops->write(info->phy, skb); + if (ret < 0) { + kfree_skb(skb); + return ret; + } + + consume_skb(skb); + return 0; } static int fdp_nci_request_firmware(struct nci_dev *ndev) From 7bf1ed6aff0f70434bd0cdd45495e83f1dffb551 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 27 Oct 2022 22:03:30 +0800 Subject: [PATCH 13/53] nfc: nxp-nci: Fix potential memory leak in nxp_nci_send() nxp_nci_send() will call nxp_nci_i2c_write(), and only free skb when nxp_nci_i2c_write() failed. However, even if the nxp_nci_i2c_write() run succeeds, the skb will not be freed in nxp_nci_i2c_write(). As the result, the skb will memleak. nxp_nci_send() should also free the skb when nxp_nci_i2c_write() succeeds. Fixes: dece45855a8b ("NFC: nxp-nci: Add support for NXP NCI chips") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller --- drivers/nfc/nxp-nci/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c index 7c93d484dc1b..580cb6ecffee 100644 --- a/drivers/nfc/nxp-nci/core.c +++ b/drivers/nfc/nxp-nci/core.c @@ -80,10 +80,13 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) return -EINVAL; r = info->phy_ops->write(info->phy_id, skb); - if (r < 0) + if (r < 0) { kfree_skb(skb); + return r; + } - return r; + consume_skb(skb); + return 0; } static int nxp_nci_rf_pll_unlocked_ntf(struct nci_dev *ndev, From 3a146b7e3099dc7cf3114f627d9b79291e2d2203 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 27 Oct 2022 22:03:31 +0800 Subject: [PATCH 14/53] nfc: s3fwrn5: Fix potential memory leak in s3fwrn5_nci_send() s3fwrn5_nci_send() will call s3fwrn5_i2c_write() or s3fwrn82_uart_write(), and free the skb if write() failed. However, even if the write() run succeeds, the skb will not be freed in write(). As the result, the skb will memleak. s3fwrn5_nci_send() should also free the skb when write() succeeds. Fixes: c04c674fadeb ("nfc: s3fwrn5: Add driver for Samsung S3FWRN5 NFC Chip") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller --- drivers/nfc/s3fwrn5/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c index 1c412007fabb..0270e05b68df 100644 --- a/drivers/nfc/s3fwrn5/core.c +++ b/drivers/nfc/s3fwrn5/core.c @@ -110,11 +110,15 @@ static int s3fwrn5_nci_send(struct nci_dev *ndev, struct sk_buff *skb) } ret = s3fwrn5_write(info, skb); - if (ret < 0) + if (ret < 0) { kfree_skb(skb); + mutex_unlock(&info->mutex); + return ret; + } + consume_skb(skb); mutex_unlock(&info->mutex); - return ret; + return 0; } static int s3fwrn5_nci_post_setup(struct nci_dev *ndev) From 93d904a734a74c54d945a9884b4962977f1176cd Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 27 Oct 2022 22:03:32 +0800 Subject: [PATCH 15/53] nfc: nfcmrvl: Fix potential memory leak in nfcmrvl_i2c_nci_send() nfcmrvl_i2c_nci_send() will be called by nfcmrvl_nci_send(), and skb should be freed in nfcmrvl_i2c_nci_send(). However, nfcmrvl_nci_send() will only free skb when i2c_master_send() return >=0, which means skb will memleak when i2c_master_send() failed. Free skb no matter whether i2c_master_send() succeeds. Fixes: b5b3e23e4cac ("NFC: nfcmrvl: add i2c driver") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller --- drivers/nfc/nfcmrvl/i2c.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index acef0cfd76af..24436c9e54c9 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -132,10 +132,15 @@ static int nfcmrvl_i2c_nci_send(struct nfcmrvl_private *priv, ret = -EREMOTEIO; } else ret = 0; - kfree_skb(skb); } - return ret; + if (ret) { + kfree_skb(skb); + return ret; + } + + consume_skb(skb); + return 0; } static void nfcmrvl_i2c_nci_update_config(struct nfcmrvl_private *priv, From 06a4df5863f73af193a4ff7abf7cb04058584f06 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 28 Oct 2022 10:09:11 +0800 Subject: [PATCH 16/53] net: fec: fix improper use of NETDEV_TX_BUSY The ndo_start_xmit() method must not free skb when returning NETDEV_TX_BUSY, since caller is going to requeue freed skb. Fix it by returning NETDEV_TX_OK in case of dma_map_single() fails. Fixes: 79f339125ea3 ("net: fec: Add software TSO support") Signed-off-by: Zhang Changzhong Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 28ef4d3c1878..f623c12eaf95 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -713,7 +713,7 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb, dev_kfree_skb_any(skb); if (net_ratelimit()) netdev_err(ndev, "Tx DMA memory map failed\n"); - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; } bdp->cbd_datlen = cpu_to_fec16(size); @@ -775,7 +775,7 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq, dev_kfree_skb_any(skb); if (net_ratelimit()) netdev_err(ndev, "Tx DMA memory map failed\n"); - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; } } From 8bdc2acd420c6f3dd1f1c78750ec989f02a1e2b9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Oct 2022 18:05:00 +0300 Subject: [PATCH 17/53] net: sched: Fix use after free in red_enqueue() We can't use "skb" again after passing it to qdisc_enqueue(). This is basically identical to commit 2f09707d0c97 ("sch_sfb: Also store skb len before calling child enqueue"). Fixes: d7f4f332f082 ("sch_red: update backlog as well") Signed-off-by: Dan Carpenter Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_red.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index a5a401f93c1a..98129324e157 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -72,6 +72,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; + unsigned int len; int ret; q->vars.qavg = red_calc_qavg(&q->parms, @@ -126,9 +127,10 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; } + len = qdisc_pkt_len(skb); ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; From e230d36f7d4cf1b89614e2bb4c2f0c55a16d3259 Mon Sep 17 00:00:00 2001 From: Rick Lindsley Date: Fri, 28 Oct 2022 13:35:11 -0700 Subject: [PATCH 18/53] ibmvnic: change maintainers for vnic driver Changed maintainers for vnic driver, since Dany has new responsibilities. Also added Nick Child as reviewer. Signed-off-by: Rick Lindsley Link: https://lore.kernel.org/r/20221028203509.4070154-1-ricklind@us.ibm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9e437612dd81..9b297be5c6ed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9775,7 +9775,10 @@ S: Supported F: drivers/pci/hotplug/rpaphp* IBM Power SRIOV Virtual NIC Device Driver -M: Dany Madden +M: Haren Myneni +M: Rick Lindsley +R: Nick Child +R: Dany Madden R: Thomas Falcon L: netdev@vger.kernel.org S: Supported From 363a5328f4b0517e59572118ccfb7c626d81dca9 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sat, 29 Oct 2022 17:41:01 +0800 Subject: [PATCH 19/53] net: tun: fix bugs for oversize packet when napi frags enabled Recently, we got two syzkaller problems because of oversize packet when napi frags enabled. One of the problems is because the first seg size of the iov_iter from user space is very big, it is 2147479538 which is bigger than the threshold value for bail out early in __alloc_pages(). And skb->pfmemalloc is true, __kmalloc_reserve() would use pfmemalloc reserves without __GFP_NOWARN flag. Thus we got a warning as following: ======================================================== WARNING: CPU: 1 PID: 17965 at mm/page_alloc.c:5295 __alloc_pages+0x1308/0x16c4 mm/page_alloc.c:5295 ... Call trace: __alloc_pages+0x1308/0x16c4 mm/page_alloc.c:5295 __alloc_pages_node include/linux/gfp.h:550 [inline] alloc_pages_node include/linux/gfp.h:564 [inline] kmalloc_large_node+0x94/0x350 mm/slub.c:4038 __kmalloc_node_track_caller+0x620/0x8e4 mm/slub.c:4545 __kmalloc_reserve.constprop.0+0x1e4/0x2b0 net/core/skbuff.c:151 pskb_expand_head+0x130/0x8b0 net/core/skbuff.c:1654 __skb_grow include/linux/skbuff.h:2779 [inline] tun_napi_alloc_frags+0x144/0x610 drivers/net/tun.c:1477 tun_get_user+0x31c/0x2010 drivers/net/tun.c:1835 tun_chr_write_iter+0x98/0x100 drivers/net/tun.c:2036 The other problem is because odd IPv6 packets without NEXTHDR_NONE extension header and have big packet length, it is 2127925 which is bigger than ETH_MAX_MTU(65535). After ipv6_gso_pull_exthdrs() in ipv6_gro_receive(), network_header offset and transport_header offset are all bigger than U16_MAX. That would trigger skb->network_header and skb->transport_header overflow error, because they are all '__u16' type. Eventually, it would affect the value for __skb_push(skb, value), and make it be a big value. After __skb_push() in ipv6_gro_receive(), skb->data would less than skb->head, an out of bounds memory bug occurred. That would trigger the problem as following: ================================================================== BUG: KASAN: use-after-free in eth_type_trans+0x100/0x260 ... Call trace: dump_backtrace+0xd8/0x130 show_stack+0x1c/0x50 dump_stack_lvl+0x64/0x7c print_address_description.constprop.0+0xbc/0x2e8 print_report+0x100/0x1e4 kasan_report+0x80/0x120 __asan_load8+0x78/0xa0 eth_type_trans+0x100/0x260 napi_gro_frags+0x164/0x550 tun_get_user+0xda4/0x1270 tun_chr_write_iter+0x74/0x130 do_iter_readv_writev+0x130/0x1ec do_iter_write+0xbc/0x1e0 vfs_writev+0x13c/0x26c To fix the problems, restrict the packet size less than (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN) which has considered reserved skb space in napi_alloc_skb() because transport_header is an offset from skb->head. Add len check in tun_napi_alloc_frags() simply. Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Ziyang Xuan Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20221029094101.1653855-1-william.xuanziyang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 27c6d235cbda..946628050f28 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1459,7 +1459,8 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, int err; int i; - if (it->nr_segs > MAX_SKB_FRAGS + 1) + if (it->nr_segs > MAX_SKB_FRAGS + 1 || + len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN)) return ERR_PTR(-EMSGSIZE); local_bh_disable(); From d4bc8271db21ea9f1c86a1ca4d64999f184d4aae Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 26 Oct 2022 09:52:36 +0200 Subject: [PATCH 20/53] netfilter: nf_tables: netlink notifier might race to release objects commit release path is invoked via call_rcu and it runs lockless to release the objects after rcu grace period. The netlink notifier handler might win race to remove objects that the transaction context is still referencing from the commit release path. Call rcu_barrier() to ensure pending rcu callbacks run to completion if the list of transactions to be destroyed is not empty. Fixes: 6001a930ce03 ("netfilter: nftables: introduce table ownership") Reported-by: syzbot+8f747f62763bc6c32916@syzkaller.appspotmail.com Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 58d9cbc9ccdc..2197118aa7b0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10030,6 +10030,8 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, nft_net = nft_pernet(net); deleted = 0; mutex_lock(&nft_net->commit_mutex); + if (!list_empty(&nf_tables_destroy_list)) + rcu_barrier(); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && From 26b5934ff4194e13196bedcba373cd4915071d0e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 26 Oct 2022 09:54:45 +0200 Subject: [PATCH 21/53] netfilter: nf_tables: release flow rule object from commit path No need to postpone this to the commit release path, since no packets are walking over this object, this is accessed from control plane only. This helped uncovered UAF triggered by races with the netlink notifier. Fixes: 9dd732e0bdf5 ("netfilter: nf_tables: memleak flow rule from commit path") Reported-by: syzbot+8f747f62763bc6c32916@syzkaller.appspotmail.com Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2197118aa7b0..76bd4d03dbda 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8465,9 +8465,6 @@ static void nft_commit_release(struct nft_trans *trans) nf_tables_chain_destroy(&trans->ctx); break; case NFT_MSG_DELRULE: - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) - nft_flow_rule_destroy(nft_trans_flow_rule(trans)); - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELSET: @@ -8973,6 +8970,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), NFT_TRANS_COMMIT); + + if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: nft_clear(net, nft_trans_set(trans)); From 6c412da54c80a54b1a8b7f89677f6e82f0fabec4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 29 Oct 2022 22:57:11 +0200 Subject: [PATCH 22/53] sfc: Fix an error handling path in efx_pci_probe() If an error occurs after the first kzalloc() the corresponding memory allocation is never freed. Add the missing kfree() in the error handling path, as already done in the remove() function. Fixes: 7e773594dada ("sfc: Separate efx_nic memory from net_device memory") Signed-off-by: Christophe JAILLET Acked-by: Martin Habets Link: https://lore.kernel.org/r/dc114193121c52c8fa3779e49bdd99d4b41344a9.1667077009.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/efx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 054d5ce6029e..0556542d7a6b 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1059,8 +1059,10 @@ static int efx_pci_probe(struct pci_dev *pci_dev, /* Allocate and initialise a struct net_device */ net_dev = alloc_etherdev_mq(sizeof(probe_data), EFX_MAX_CORE_TX_QUEUES); - if (!net_dev) - return -ENOMEM; + if (!net_dev) { + rc = -ENOMEM; + goto fail0; + } probe_ptr = netdev_priv(net_dev); *probe_ptr = probe_data; efx->net_dev = net_dev; @@ -1132,6 +1134,8 @@ static int efx_pci_probe(struct pci_dev *pci_dev, WARN_ON(rc > 0); netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc); free_netdev(net_dev); + fail0: + kfree(probe_data); return rc; } From 486c292230166c2d61701d3c984bf9143588ea28 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sun, 30 Oct 2022 22:36:34 +0100 Subject: [PATCH 23/53] net: lan966x: Fix the MTU calculation When the MTU was changed, the lan966x didn't take in consideration the L2 header and the FCS. So the HW was configured with a smaller value than what was desired. Therefore the correct value to configure the HW would be new_mtu + ETH_HLEN + ETH_FCS_LEN. The vlan tag is not considered here, because at the time when the blamed commit was added, there was no vlan filtering support. The vlan fix will be part of the next patch. Fixes: d28d6d2e37d1 ("net: lan966x: add port module support") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_main.c | 2 +- drivers/net/ethernet/microchip/lan966x/lan966x_main.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index be2fd030cccb..b3070c3fcad0 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -386,7 +386,7 @@ static int lan966x_port_change_mtu(struct net_device *dev, int new_mtu) int old_mtu = dev->mtu; int err; - lan_wr(DEV_MAC_MAXLEN_CFG_MAX_LEN_SET(new_mtu), + lan_wr(DEV_MAC_MAXLEN_CFG_MAX_LEN_SET(LAN966X_HW_MTU(new_mtu)), lan966x, DEV_MAC_MAXLEN_CFG(port->chip_port)); dev->mtu = new_mtu; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 9656071b8289..4ec33999e4df 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -26,6 +26,8 @@ #define LAN966X_BUFFER_MEMORY (160 * 1024) #define LAN966X_BUFFER_MIN_SZ 60 +#define LAN966X_HW_MTU(mtu) ((mtu) + ETH_HLEN + ETH_FCS_LEN) + #define PGID_AGGR 64 #define PGID_SRC 80 #define PGID_ENTRIES 89 From 25f28bb1b4a7717a9df3aa574d210374ebb6bb23 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sun, 30 Oct 2022 22:36:35 +0100 Subject: [PATCH 24/53] net: lan966x: Adjust maximum frame size when vlan is enabled/disabled When vlan filtering is enabled/disabled, it is required to adjust the maximum received frame size that it can received. When vlan filtering is enabled, it would all to receive extra 4 bytes, that are the vlan tag. So the maximum frame size would be 1522 with a vlan tag. If vlan filtering is disabled then the maximum frame size would be 1518 regardless if there is or not a vlan tag. Fixes: 6d2c186afa5d ("net: lan966x: Add vlan support.") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- .../net/ethernet/microchip/lan966x/lan966x_regs.h | 15 +++++++++++++++ .../net/ethernet/microchip/lan966x/lan966x_vlan.c | 6 ++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h index 1d90b93dd417..fb5087fef22e 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h @@ -585,6 +585,21 @@ enum lan966x_target { #define DEV_MAC_MAXLEN_CFG_MAX_LEN_GET(x)\ FIELD_GET(DEV_MAC_MAXLEN_CFG_MAX_LEN, x) +/* DEV:MAC_CFG_STATUS:MAC_TAGS_CFG */ +#define DEV_MAC_TAGS_CFG(t) __REG(TARGET_DEV, t, 8, 28, 0, 1, 44, 12, 0, 1, 4) + +#define DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA BIT(1) +#define DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA_SET(x)\ + FIELD_PREP(DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA, x) +#define DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA_GET(x)\ + FIELD_GET(DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA, x) + +#define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA BIT(0) +#define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA_SET(x)\ + FIELD_PREP(DEV_MAC_TAGS_CFG_VLAN_AWR_ENA, x) +#define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA_GET(x)\ + FIELD_GET(DEV_MAC_TAGS_CFG_VLAN_AWR_ENA, x) + /* DEV:MAC_CFG_STATUS:MAC_IFG_CFG */ #define DEV_MAC_IFG_CFG(t) __REG(TARGET_DEV, t, 8, 28, 0, 1, 44, 20, 0, 1, 4) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c index 8d7260cd7da9..3c44660128da 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c @@ -169,6 +169,12 @@ void lan966x_vlan_port_apply(struct lan966x_port *port) ANA_VLAN_CFG_VLAN_POP_CNT, lan966x, ANA_VLAN_CFG(port->chip_port)); + lan_rmw(DEV_MAC_TAGS_CFG_VLAN_AWR_ENA_SET(port->vlan_aware) | + DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA_SET(port->vlan_aware), + DEV_MAC_TAGS_CFG_VLAN_AWR_ENA | + DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA, + lan966x, DEV_MAC_TAGS_CFG(port->chip_port)); + /* Drop frames with multicast source address */ val = ANA_DROP_CFG_DROP_MC_SMAC_ENA_SET(1); if (port->vlan_aware && !pvid) From 872ad758f9b7fb4eb42aebaf64e50c5b29b7ffe5 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sun, 30 Oct 2022 22:36:36 +0100 Subject: [PATCH 25/53] net: lan966x: Fix FDMA when MTU is changed When MTU is changed, FDMA is required to calculate what is the maximum size of the frame that it can received. So it can calculate what is the page order needed to allocate for the received frames. The first problem was that, when the max MTU was calculated it was reading the value from dev and not from HW, so in this way it was missing L2 header + the FCS. The other problem was that once the skb is created using __build_skb_around, it would reserve some space for skb_shared_info. So if we received a frame which size is at the limit of the page order then the creating will failed because it would not have space to put all the data. Fixes: 2ea1cbac267e ("net: lan966x: Update FDMA to change MTU.") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c | 8 ++++++-- drivers/net/ethernet/microchip/lan966x/lan966x_main.c | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c index a42035cec611..c235edd2b182 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c @@ -668,12 +668,14 @@ static int lan966x_fdma_get_max_mtu(struct lan966x *lan966x) int i; for (i = 0; i < lan966x->num_phys_ports; ++i) { + struct lan966x_port *port; int mtu; - if (!lan966x->ports[i]) + port = lan966x->ports[i]; + if (!port) continue; - mtu = lan966x->ports[i]->dev->mtu; + mtu = lan_rd(lan966x, DEV_MAC_MAXLEN_CFG(port->chip_port)); if (mtu > max_mtu) max_mtu = mtu; } @@ -733,6 +735,8 @@ int lan966x_fdma_change_mtu(struct lan966x *lan966x) max_mtu = lan966x_fdma_get_max_mtu(lan966x); max_mtu += IFH_LEN * sizeof(u32); + max_mtu += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + max_mtu += VLAN_HLEN * 2; if (round_up(max_mtu, PAGE_SIZE) / PAGE_SIZE - 1 == lan966x->rx.page_order) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index b3070c3fcad0..20ee5b28f70a 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -395,7 +395,7 @@ static int lan966x_port_change_mtu(struct net_device *dev, int new_mtu) err = lan966x_fdma_change_mtu(lan966x); if (err) { - lan_wr(DEV_MAC_MAXLEN_CFG_MAX_LEN_SET(old_mtu), + lan_wr(DEV_MAC_MAXLEN_CFG_MAX_LEN_SET(LAN966X_HW_MTU(old_mtu)), lan966x, DEV_MAC_MAXLEN_CFG(port->chip_port)); dev->mtu = old_mtu; } From fc57062f98b0b0ae52bc584d8fd5ac77c50df607 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 31 Oct 2022 14:34:21 +0100 Subject: [PATCH 26/53] net: lan966x: Fix unmapping of received frames using FDMA When lan966x was receiving a frame, then it was building the skb and after that it was calling dma_unmap_single with frame size as the length. This actually has 2 issues: 1. It is using a length to map and a different length to unmap. 2. When the unmap was happening, the data was sync for cpu but it could be that this will overwrite what build_skb was initializing. The fix for these two problems is to change the order of operations. First to sync the frame for cpu, then to build the skb and in the end to unmap using the correct size but without sync the frame again for cpu. Fixes: c8349639324a ("net: lan966x: Add FDMA functionality") Signed-off-by: Horatiu Vultur Link: https://lore.kernel.org/r/20221031133421.1283196-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- .../ethernet/microchip/lan966x/lan966x_fdma.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c index c235edd2b182..e6948939ccc2 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c @@ -414,13 +414,15 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx) /* Get the received frame and unmap it */ db = &rx->dcbs[rx->dcb_index].db[rx->db_index]; page = rx->page[rx->dcb_index][rx->db_index]; + + dma_sync_single_for_cpu(lan966x->dev, (dma_addr_t)db->dataptr, + FDMA_DCB_STATUS_BLOCKL(db->status), + DMA_FROM_DEVICE); + skb = build_skb(page_address(page), PAGE_SIZE << rx->page_order); if (unlikely(!skb)) goto unmap_page; - dma_unmap_single(lan966x->dev, (dma_addr_t)db->dataptr, - FDMA_DCB_STATUS_BLOCKL(db->status), - DMA_FROM_DEVICE); skb_put(skb, FDMA_DCB_STATUS_BLOCKL(db->status)); lan966x_ifh_get_src_port(skb->data, &src_port); @@ -429,6 +431,10 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx) if (WARN_ON(src_port >= lan966x->num_phys_ports)) goto free_skb; + dma_unmap_single_attrs(lan966x->dev, (dma_addr_t)db->dataptr, + PAGE_SIZE << rx->page_order, DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + skb->dev = lan966x->ports[src_port]->dev; skb_pull(skb, IFH_LEN * sizeof(u32)); @@ -454,9 +460,9 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx) free_skb: kfree_skb(skb); unmap_page: - dma_unmap_page(lan966x->dev, (dma_addr_t)db->dataptr, - FDMA_DCB_STATUS_BLOCKL(db->status), - DMA_FROM_DEVICE); + dma_unmap_single_attrs(lan966x->dev, (dma_addr_t)db->dataptr, + PAGE_SIZE << rx->page_order, DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); __free_pages(page, rx->page_order); return NULL; From ecaf75ffd5f5db320d8b1da0198eef5a5ce64a3f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 31 Oct 2022 13:34:07 +0100 Subject: [PATCH 27/53] netlink: introduce bigendian integer types Jakub reported that the addition of the "network_byte_order" member in struct nla_policy increases size of 32bit platforms. Instead of scraping the bit from elsewhere Johannes suggested to add explicit NLA_BE types instead, so do this here. NLA_POLICY_MAX_BE() macro is removed again, there is no need for it: NLA_POLICY_MAX(NLA_BE.., ..) will do the right thing. NLA_BE64 can be added later. Fixes: 08724ef69907 ("netlink: introduce NLA_POLICY_MAX_BE") Reported-by: Jakub Kicinski Suggested-by: Johannes Berg Signed-off-by: Florian Westphal Link: https://lore.kernel.org/r/20221031123407.9158-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 19 +++++++++-------- lib/nlattr.c | 41 ++++++++++++++----------------------- net/netfilter/nft_payload.c | 6 +++--- 3 files changed, 28 insertions(+), 38 deletions(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index 7db13b3261fc..6bfa972f2fbf 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -181,6 +181,8 @@ enum { NLA_S64, NLA_BITFIELD32, NLA_REJECT, + NLA_BE16, + NLA_BE32, __NLA_TYPE_MAX, }; @@ -231,6 +233,7 @@ enum nla_policy_validation { * NLA_U32, NLA_U64, * NLA_S8, NLA_S16, * NLA_S32, NLA_S64, + * NLA_BE16, NLA_BE32, * NLA_MSECS Leaving the length field zero will verify the * given type fits, using it verifies minimum length * just like "All other" @@ -261,6 +264,8 @@ enum nla_policy_validation { * NLA_U16, * NLA_U32, * NLA_U64, + * NLA_BE16, + * NLA_BE32, * NLA_S8, * NLA_S16, * NLA_S32, @@ -349,7 +354,6 @@ struct nla_policy { struct netlink_range_validation_signed *range_signed; struct { s16 min, max; - u8 network_byte_order:1; }; int (*validate)(const struct nlattr *attr, struct netlink_ext_ack *extack); @@ -374,6 +378,8 @@ struct nla_policy { (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || tp == NLA_U64) #define __NLA_IS_SINT_TYPE(tp) \ (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64) +#define __NLA_IS_BEINT_TYPE(tp) \ + (tp == NLA_BE16 || tp == NLA_BE32) #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition)) #define NLA_ENSURE_UINT_TYPE(tp) \ @@ -387,6 +393,7 @@ struct nla_policy { #define NLA_ENSURE_INT_OR_BINARY_TYPE(tp) \ (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) || \ __NLA_IS_SINT_TYPE(tp) || \ + __NLA_IS_BEINT_TYPE(tp) || \ tp == NLA_MSECS || \ tp == NLA_BINARY) + tp) #define NLA_ENSURE_NO_VALIDATION_PTR(tp) \ @@ -394,6 +401,8 @@ struct nla_policy { tp != NLA_REJECT && \ tp != NLA_NESTED && \ tp != NLA_NESTED_ARRAY) + tp) +#define NLA_ENSURE_BEINT_TYPE(tp) \ + (__NLA_ENSURE(__NLA_IS_BEINT_TYPE(tp)) + tp) #define NLA_POLICY_RANGE(tp, _min, _max) { \ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ @@ -424,14 +433,6 @@ struct nla_policy { .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_MAX, \ .max = _max, \ - .network_byte_order = 0, \ -} - -#define NLA_POLICY_MAX_BE(tp, _max) { \ - .type = NLA_ENSURE_UINT_TYPE(tp), \ - .validation_type = NLA_VALIDATE_MAX, \ - .max = _max, \ - .network_byte_order = 1, \ } #define NLA_POLICY_MASK(tp, _mask) { \ diff --git a/lib/nlattr.c b/lib/nlattr.c index 40f22b177d69..b67a53e29b8f 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -124,10 +124,12 @@ void nla_get_range_unsigned(const struct nla_policy *pt, range->max = U8_MAX; break; case NLA_U16: + case NLA_BE16: case NLA_BINARY: range->max = U16_MAX; break; case NLA_U32: + case NLA_BE32: range->max = U32_MAX; break; case NLA_U64: @@ -159,31 +161,6 @@ void nla_get_range_unsigned(const struct nla_policy *pt, } } -static u64 nla_get_attr_bo(const struct nla_policy *pt, - const struct nlattr *nla) -{ - switch (pt->type) { - case NLA_U16: - if (pt->network_byte_order) - return ntohs(nla_get_be16(nla)); - - return nla_get_u16(nla); - case NLA_U32: - if (pt->network_byte_order) - return ntohl(nla_get_be32(nla)); - - return nla_get_u32(nla); - case NLA_U64: - if (pt->network_byte_order) - return be64_to_cpu(nla_get_be64(nla)); - - return nla_get_u64(nla); - } - - WARN_ON_ONCE(1); - return 0; -} - static int nla_validate_range_unsigned(const struct nla_policy *pt, const struct nlattr *nla, struct netlink_ext_ack *extack, @@ -197,9 +174,13 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt, value = nla_get_u8(nla); break; case NLA_U16: + value = nla_get_u16(nla); + break; case NLA_U32: + value = nla_get_u32(nla); + break; case NLA_U64: - value = nla_get_attr_bo(pt, nla); + value = nla_get_u64(nla); break; case NLA_MSECS: value = nla_get_u64(nla); @@ -207,6 +188,12 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt, case NLA_BINARY: value = nla_len(nla); break; + case NLA_BE16: + value = ntohs(nla_get_be16(nla)); + break; + case NLA_BE32: + value = ntohl(nla_get_be32(nla)); + break; default: return -EINVAL; } @@ -334,6 +321,8 @@ static int nla_validate_int_range(const struct nla_policy *pt, case NLA_U64: case NLA_MSECS: case NLA_BINARY: + case NLA_BE16: + case NLA_BE32: return nla_validate_range_unsigned(pt, nla, extack, validate); case NLA_S8: case NLA_S16: diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 088244f9d838..4edd899aeb9b 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -173,10 +173,10 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 }, [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_OFFSET] = NLA_POLICY_MAX_BE(NLA_U32, 255), - [NFTA_PAYLOAD_LEN] = NLA_POLICY_MAX_BE(NLA_U32, 255), + [NFTA_PAYLOAD_OFFSET] = NLA_POLICY_MAX(NLA_BE32, 255), + [NFTA_PAYLOAD_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_CSUM_OFFSET] = NLA_POLICY_MAX_BE(NLA_U32, 255), + [NFTA_PAYLOAD_CSUM_OFFSET] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 }, }; From 5c26159c97b324dc5174a5713eafb8c855cf8106 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 26 Oct 2022 14:32:16 +0200 Subject: [PATCH 28/53] ipvs: use explicitly signed chars The `char` type with no explicit sign is sometimes signed and sometimes unsigned. This code will break on platforms such as arm, where char is unsigned. So mark it here as explicitly signed, so that the todrop_counter decrement and subsequent comparison is correct. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason A. Donenfeld Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 8c04bb57dd6f..7c4866c04343 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1265,8 +1265,8 @@ static inline int todrop_entry(struct ip_vs_conn *cp) * The drop rate array needs tuning for real environments. * Called from timer bh only => no locking */ - static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; - static char todrop_counter[9] = {0}; + static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + static signed char todrop_counter[9] = {0}; int i; /* if the conn entry hasn't lasted for 60 seconds, don't drop it. From 3d00c6a0da8ddcf75213e004765e4a42acc71d5d Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 31 Oct 2022 20:07:04 +0800 Subject: [PATCH 29/53] ipvs: fix WARNING in __ip_vs_cleanup_batch() During the initialization of ip_vs_conn_net_init(), if file ip_vs_conn or ip_vs_conn_sync fails to be created, the initialization is successful by default. Therefore, the ip_vs_conn or ip_vs_conn_sync file doesn't be found during the remove. The following is the stack information: name 'ip_vs_conn_sync' WARNING: CPU: 3 PID: 9 at fs/proc/generic.c:712 remove_proc_entry+0x389/0x460 Modules linked in: Workqueue: netns cleanup_net RIP: 0010:remove_proc_entry+0x389/0x460 Call Trace: __ip_vs_cleanup_batch+0x7d/0x120 ops_exit_list+0x125/0x170 cleanup_net+0x4ea/0xb00 process_one_work+0x9bf/0x1710 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Fixes: 61b1ab4583e2 ("IPVS: netns, add basic init per netns.") Signed-off-by: Zhengchao Shao Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 7c4866c04343..13534e02346c 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1447,20 +1447,36 @@ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs) { atomic_set(&ipvs->conn_count, 0); - proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, - &ip_vs_conn_seq_ops, sizeof(struct ip_vs_iter_state)); - proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net, - &ip_vs_conn_sync_seq_ops, - sizeof(struct ip_vs_iter_state)); +#ifdef CONFIG_PROC_FS + if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, + &ip_vs_conn_seq_ops, + sizeof(struct ip_vs_iter_state))) + goto err_conn; + + if (!proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net, + &ip_vs_conn_sync_seq_ops, + sizeof(struct ip_vs_iter_state))) + goto err_conn_sync; +#endif + return 0; + +#ifdef CONFIG_PROC_FS +err_conn_sync: + remove_proc_entry("ip_vs_conn", ipvs->net->proc_net); +err_conn: + return -ENOMEM; +#endif } void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs) { /* flush all the connection entries first */ ip_vs_conn_flush(ipvs); +#ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_conn", ipvs->net->proc_net); remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net); +#endif } int __init ip_vs_conn_init(void) From 5663ed63adb9619c98ab7479aa4606fa9b7a548c Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 31 Oct 2022 20:07:05 +0800 Subject: [PATCH 30/53] ipvs: fix WARNING in ip_vs_app_net_cleanup() During the initialization of ip_vs_app_net_init(), if file ip_vs_app fails to be created, the initialization is successful by default. Therefore, the ip_vs_app file doesn't be found during the remove in ip_vs_app_net_cleanup(). It will cause WRNING. The following is the stack information: name 'ip_vs_app' WARNING: CPU: 1 PID: 9 at fs/proc/generic.c:712 remove_proc_entry+0x389/0x460 Modules linked in: Workqueue: netns cleanup_net RIP: 0010:remove_proc_entry+0x389/0x460 Call Trace: ops_exit_list+0x125/0x170 cleanup_net+0x4ea/0xb00 process_one_work+0x9bf/0x1710 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Fixes: 457c4cbc5a3d ("[NET]: Make /proc/net per network namespace") Signed-off-by: Zhengchao Shao Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_app.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index f9b16f2b2219..fdacbc3c15be 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -599,13 +599,19 @@ static const struct seq_operations ip_vs_app_seq_ops = { int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) { INIT_LIST_HEAD(&ipvs->app_list); - proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops, - sizeof(struct seq_net_private)); +#ifdef CONFIG_PROC_FS + if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, + &ip_vs_app_seq_ops, + sizeof(struct seq_net_private))) + return -ENOMEM; +#endif return 0; } void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) { unregister_ip_vs_app(ipvs, NULL /* all */); +#ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_app", ipvs->net->proc_net); +#endif } From cbc1dd5b659f5a2c3cba88b197b7443679bb35a0 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 1 Nov 2022 19:52:52 +0800 Subject: [PATCH 31/53] netfilter: nf_nat: Fix possible memory leak in nf_nat_init() In nf_nat_init(), register_nf_nat_bpf() can fail and return directly without any error handling. Then nf_nat_bysource will leak and registering of &nat_net_ops, &follow_master_nat and nf_nat_hook won't be reverted. This leaves wild ops in linkedlists and when another module tries to call register_pernet_operations() or nf_ct_helper_expectfn_register() it triggers page fault: BUG: unable to handle page fault for address: fffffbfff81b964c RIP: 0010:register_pernet_operations+0x1b9/0x5f0 Call Trace: register_pernet_subsys+0x29/0x40 ebtables_init+0x58/0x1000 [ebtables] ... Fixes: 820dc0523e05 ("net: netfilter: move bpf_ct_set_nat_info kfunc in nf_nat_bpf.c") Signed-off-by: Chen Zhongjin Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 18319a6e6806..e29e4ccb5c5a 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1152,7 +1152,16 @@ static int __init nf_nat_init(void) WARN_ON(nf_nat_hook != NULL); RCU_INIT_POINTER(nf_nat_hook, &nat_hook); - return register_nf_nat_bpf(); + ret = register_nf_nat_bpf(); + if (ret < 0) { + RCU_INIT_POINTER(nf_nat_hook, NULL); + nf_ct_helper_expectfn_unregister(&follow_master_nat); + synchronize_net(); + unregister_pernet_subsys(&nat_net_ops); + kvfree(nf_nat_bysource); + } + + return ret; } static void __exit nf_nat_cleanup(void) From e97c089d7a49f67027395ddf70bf327eeac2611e Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sat, 29 Oct 2022 00:10:49 +0800 Subject: [PATCH 32/53] rose: Fix NULL pointer dereference in rose_send_frame() The syzkaller reported an issue: KASAN: null-ptr-deref in range [0x0000000000000380-0x0000000000000387] CPU: 0 PID: 4069 Comm: kworker/0:15 Not tainted 6.0.0-syzkaller-02734-g0326074ff465 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 Workqueue: rcu_gp srcu_invoke_callbacks RIP: 0010:rose_send_frame+0x1dd/0x2f0 net/rose/rose_link.c:101 Call Trace: rose_transmit_clear_request+0x1d5/0x290 net/rose/rose_link.c:255 rose_rx_call_request+0x4c0/0x1bc0 net/rose/af_rose.c:1009 rose_loopback_timer+0x19e/0x590 net/rose/rose_loopback.c:111 call_timer_fn+0x1a0/0x6b0 kernel/time/timer.c:1474 expire_timers kernel/time/timer.c:1519 [inline] __run_timers.part.0+0x674/0xa80 kernel/time/timer.c:1790 __run_timers kernel/time/timer.c:1768 [inline] run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1803 __do_softirq+0x1d0/0x9c8 kernel/softirq.c:571 [...] It triggers NULL pointer dereference when 'neigh->dev->dev_addr' is called in the rose_send_frame(). It's the first occurrence of the `neigh` is in rose_loopback_timer() as `rose_loopback_neigh', and the 'dev' in 'rose_loopback_neigh' is initialized sa nullptr. It had been fixed by commit 3b3fd068c56e3fbea30090859216a368398e39bf ("rose: Fix Null pointer dereference in rose_send_frame()") ever. But it's introduced by commit 3c53cd65dece47dd1f9d3a809f32e59d1d87b2b8 ("rose: check NULL rose_loopback_neigh->loopback") again. We fix it by add NULL check in rose_transmit_clear_request(). When the 'dev' in 'neigh' is NULL, we don't reply the request and just clear it. syzkaller don't provide repro, and I provide a syz repro like: r0 = syz_init_net_socket$bt_sco(0x1f, 0x5, 0x2) ioctl$sock_inet_SIOCSIFFLAGS(r0, 0x8914, &(0x7f0000000180)={'rose0\x00', 0x201}) r1 = syz_init_net_socket$rose(0xb, 0x5, 0x0) bind$rose(r1, &(0x7f00000000c0)=@full={0xb, @dev, @null, 0x0, [@null, @null, @netrom, @netrom, @default, @null]}, 0x40) connect$rose(r1, &(0x7f0000000240)=@short={0xb, @dev={0xbb, 0xbb, 0xbb, 0x1, 0x0}, @remote={0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x1}, 0x1, @netrom={0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0x0, 0x0}}, 0x1c) Fixes: 3c53cd65dece ("rose: check NULL rose_loopback_neigh->loopback") Signed-off-by: Zhang Qilong Signed-off-by: David S. Miller --- net/rose/rose_link.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index 8b96a56d3a49..0f77ae8ef944 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -236,6 +236,9 @@ void rose_transmit_clear_request(struct rose_neigh *neigh, unsigned int lci, uns unsigned char *dptr; int len; + if (!neigh->dev) + return; + len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3; if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) From e7d1d4d9ac0dfa40be4c2c8abd0731659869b297 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 31 Oct 2022 20:13:40 +0800 Subject: [PATCH 33/53] mISDN: fix possible memory leak in mISDN_register_device() Afer commit 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array"), the name of device is allocated dynamically, add put_device() to give up the reference, so that the name can be freed in kobject_cleanup() when the refcount is 0. Set device class before put_device() to avoid null release() function WARN message in device_release(). Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/isdn/mISDN/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index a41b4b264594..7ea0100f218a 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -233,11 +233,12 @@ mISDN_register_device(struct mISDNdevice *dev, if (debug & DEBUG_CORE) printk(KERN_DEBUG "mISDN_register %s %d\n", dev_name(&dev->dev), dev->id); + dev->dev.class = &mISDN_class; + err = create_stack(dev); if (err) goto error1; - dev->dev.class = &mISDN_class; dev->dev.platform_data = dev; dev->dev.parent = parent; dev_set_drvdata(&dev->dev, dev); @@ -249,8 +250,8 @@ mISDN_register_device(struct mISDNdevice *dev, error3: delete_stack(dev); - return err; error1: + put_device(&dev->dev); return err; } From bf00f5426074249058a106a6edbb89e4b25a4d79 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 31 Oct 2022 20:13:41 +0800 Subject: [PATCH 34/53] isdn: mISDN: netjet: fix wrong check of device registration The class is set in mISDN_register_device(), but if device_add() returns error, it will lead to delete a device without added, fix this by using device_is_registered() to check if the device is registered. Fixes: a900845e5661 ("mISDN: Add support for Traverse Technologies NETJet PCI cards") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/netjet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c index a52f275f8263..f8447135a902 100644 --- a/drivers/isdn/hardware/mISDN/netjet.c +++ b/drivers/isdn/hardware/mISDN/netjet.c @@ -956,7 +956,7 @@ nj_release(struct tiger_hw *card) } if (card->irq > 0) free_irq(card->irq, card); - if (card->isac.dch.dev.dev.class) + if (device_is_registered(&card->isac.dch.dev.dev)) mISDN_unregister_device(&card->isac.dch.dev); for (i = 0; i < 2; i++) { From 510841da1fcc16f702440ab58ef0b4d82a9056b7 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 2 Nov 2022 10:40:47 +0100 Subject: [PATCH 35/53] netfilter: ipset: enforce documented limit to prevent allocating huge memory Daniel Xu reported that the hash:net,iface type of the ipset subsystem does not limit adding the same network with different interfaces to a set, which can lead to huge memory usage or allocation failure. The quick reproducer is $ ipset create ACL.IN.ALL_PERMIT hash:net,iface hashsize 1048576 timeout 0 $ for i in $(seq 0 100); do /sbin/ipset add ACL.IN.ALL_PERMIT 0.0.0.0/0,kaf_$i timeout 0 -exist; done The backtrace when vmalloc fails: [Tue Oct 25 00:13:08 2022] ipset: vmalloc error: size 1073741848, exceeds total pages <...> [Tue Oct 25 00:13:08 2022] Call Trace: [Tue Oct 25 00:13:08 2022] [Tue Oct 25 00:13:08 2022] dump_stack_lvl+0x48/0x60 [Tue Oct 25 00:13:08 2022] warn_alloc+0x155/0x180 [Tue Oct 25 00:13:08 2022] __vmalloc_node_range+0x72a/0x760 [Tue Oct 25 00:13:08 2022] ? hash_netiface4_add+0x7c0/0xb20 [Tue Oct 25 00:13:08 2022] ? __kmalloc_large_node+0x4a/0x90 [Tue Oct 25 00:13:08 2022] kvmalloc_node+0xa6/0xd0 [Tue Oct 25 00:13:08 2022] ? hash_netiface4_resize+0x99/0x710 <...> The fix is to enforce the limit documented in the ipset(8) manpage: > The internal restriction of the hash:net,iface set type is that the same > network prefix cannot be stored with more than 64 different interfaces > in a single set. Fixes: ccf0a4b7fc68 ("netfilter: ipset: Add bucketsize parameter to all hash types") Reported-by: Daniel Xu Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 30 ++++++--------------------- 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 6e391308431d..3adc291d9ce1 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -42,31 +42,8 @@ #define AHASH_MAX_SIZE (6 * AHASH_INIT_SIZE) /* Max muber of elements in the array block when tuned */ #define AHASH_MAX_TUNED 64 - #define AHASH_MAX(h) ((h)->bucketsize) -/* Max number of elements can be tuned */ -#ifdef IP_SET_HASH_WITH_MULTI -static u8 -tune_bucketsize(u8 curr, u32 multi) -{ - u32 n; - - if (multi < curr) - return curr; - - n = curr + AHASH_INIT_SIZE; - /* Currently, at listing one hash bucket must fit into a message. - * Therefore we have a hard limit here. - */ - return n > curr && n <= AHASH_MAX_TUNED ? n : curr; -} -#define TUNE_BUCKETSIZE(h, multi) \ - ((h)->bucketsize = tune_bucketsize((h)->bucketsize, multi)) -#else -#define TUNE_BUCKETSIZE(h, multi) -#endif - /* A hash bucket */ struct hbucket { struct rcu_head rcu; /* for call_rcu */ @@ -936,7 +913,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, goto set_full; /* Create a new slot */ if (n->pos >= n->size) { - TUNE_BUCKETSIZE(h, multi); +#ifdef IP_SET_HASH_WITH_MULTI + if (h->bucketsize >= AHASH_MAX_TUNED) + goto set_full; + else if (h->bucketsize < multi) + h->bucketsize += AHASH_INIT_SIZE; +#endif if (n->size >= AHASH_MAX(h)) { /* Trigger rehashing */ mtype_data_next(&h->next, d); From 3aff8aaca4e36dc8b17eaa011684881a80238966 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 5 Oct 2022 00:27:18 +0300 Subject: [PATCH 36/53] Bluetooth: L2CAP: Fix use-after-free caused by l2cap_reassemble_sdu Fix the race condition between the following two flows that run in parallel: 1. l2cap_reassemble_sdu -> chan->ops->recv (l2cap_sock_recv_cb) -> __sock_queue_rcv_skb. 2. bt_sock_recvmsg -> skb_recv_datagram, skb_free_datagram. An SKB can be queued by the first flow and immediately dequeued and freed by the second flow, therefore the callers of l2cap_reassemble_sdu can't use the SKB after that function returns. However, some places continue accessing struct l2cap_ctrl that resides in the SKB's CB for a short time after l2cap_reassemble_sdu returns, leading to a use-after-free condition (the stack trace is below, line numbers for kernel 5.19.8). Fix it by keeping a local copy of struct l2cap_ctrl. BUG: KASAN: use-after-free in l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth Read of size 1 at addr ffff88812025f2f0 by task kworker/u17:3/43169 Workqueue: hci0 hci_rx_work [bluetooth] Call Trace: dump_stack_lvl (lib/dump_stack.c:107 (discriminator 4)) print_report.cold (mm/kasan/report.c:314 mm/kasan/report.c:429) ? l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth kasan_report (mm/kasan/report.c:162 mm/kasan/report.c:493) ? l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth l2cap_rx (net/bluetooth/l2cap_core.c:7236 net/bluetooth/l2cap_core.c:7271) bluetooth ret_from_fork (arch/x86/entry/entry_64.S:306) Allocated by task 43169: kasan_save_stack (mm/kasan/common.c:39) __kasan_slab_alloc (mm/kasan/common.c:45 mm/kasan/common.c:436 mm/kasan/common.c:469) kmem_cache_alloc_node (mm/slab.h:750 mm/slub.c:3243 mm/slub.c:3293) __alloc_skb (net/core/skbuff.c:414) l2cap_recv_frag (./include/net/bluetooth/bluetooth.h:425 net/bluetooth/l2cap_core.c:8329) bluetooth l2cap_recv_acldata (net/bluetooth/l2cap_core.c:8442) bluetooth hci_rx_work (net/bluetooth/hci_core.c:3642 net/bluetooth/hci_core.c:3832) bluetooth process_one_work (kernel/workqueue.c:2289) worker_thread (./include/linux/list.h:292 kernel/workqueue.c:2437) kthread (kernel/kthread.c:376) ret_from_fork (arch/x86/entry/entry_64.S:306) Freed by task 27920: kasan_save_stack (mm/kasan/common.c:39) kasan_set_track (mm/kasan/common.c:45) kasan_set_free_info (mm/kasan/generic.c:372) ____kasan_slab_free (mm/kasan/common.c:368 mm/kasan/common.c:328) slab_free_freelist_hook (mm/slub.c:1780) kmem_cache_free (mm/slub.c:3536 mm/slub.c:3553) skb_free_datagram (./include/net/sock.h:1578 ./include/net/sock.h:1639 net/core/datagram.c:323) bt_sock_recvmsg (net/bluetooth/af_bluetooth.c:295) bluetooth l2cap_sock_recvmsg (net/bluetooth/l2cap_sock.c:1212) bluetooth sock_read_iter (net/socket.c:1087) new_sync_read (./include/linux/fs.h:2052 fs/read_write.c:401) vfs_read (fs/read_write.c:482) ksys_read (fs/read_write.c:620) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) Link: https://lore.kernel.org/linux-bluetooth/CAKErNvoqga1WcmoR3-0875esY6TVWFQDandbVZncSiuGPBQXLA@mail.gmail.com/T/#u Fixes: d2a7ac5d5d3a ("Bluetooth: Add the ERTM receive state machine") Fixes: 4b51dae96731 ("Bluetooth: Add streaming mode receive and incoming packet classifier") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 48 ++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 1f34b82ca0ec..2283871d3f01 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6885,6 +6885,7 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb, u8 event) { + struct l2cap_ctrl local_control; int err = 0; bool skb_in_use = false; @@ -6909,15 +6910,32 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, chan->buffer_seq = chan->expected_tx_seq; skb_in_use = true; + /* l2cap_reassemble_sdu may free skb, hence invalidate + * control, so make a copy in advance to use it after + * l2cap_reassemble_sdu returns and to avoid the race + * condition, for example: + * + * The current thread calls: + * l2cap_reassemble_sdu + * chan->ops->recv == l2cap_sock_recv_cb + * __sock_queue_rcv_skb + * Another thread calls: + * bt_sock_recvmsg + * skb_recv_datagram + * skb_free_datagram + * Then the current thread tries to access control, but + * it was freed by skb_free_datagram. + */ + local_control = *control; err = l2cap_reassemble_sdu(chan, skb, control); if (err) break; - if (control->final) { + if (local_control.final) { if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) { - control->final = 0; - l2cap_retransmit_all(chan, control); + local_control.final = 0; + l2cap_retransmit_all(chan, &local_control); l2cap_ertm_send(chan); } } @@ -7297,11 +7315,27 @@ static int l2cap_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb) { + /* l2cap_reassemble_sdu may free skb, hence invalidate control, so store + * the txseq field in advance to use it after l2cap_reassemble_sdu + * returns and to avoid the race condition, for example: + * + * The current thread calls: + * l2cap_reassemble_sdu + * chan->ops->recv == l2cap_sock_recv_cb + * __sock_queue_rcv_skb + * Another thread calls: + * bt_sock_recvmsg + * skb_recv_datagram + * skb_free_datagram + * Then the current thread tries to access control, but it was freed by + * skb_free_datagram. + */ + u16 txseq = control->txseq; + BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb, chan->rx_state); - if (l2cap_classify_txseq(chan, control->txseq) == - L2CAP_TXSEQ_EXPECTED) { + if (l2cap_classify_txseq(chan, txseq) == L2CAP_TXSEQ_EXPECTED) { l2cap_pass_to_tx(chan, control); BT_DBG("buffer_seq %u->%u", chan->buffer_seq, @@ -7324,8 +7358,8 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, } } - chan->last_acked_seq = control->txseq; - chan->expected_tx_seq = __next_seq(chan, control->txseq); + chan->last_acked_seq = txseq; + chan->expected_tx_seq = __next_seq(chan, txseq); return 0; } From b36a234dc438cb6b76fc929a8df9a0e59c8acf23 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Tue, 11 Oct 2022 22:25:33 +0300 Subject: [PATCH 37/53] Bluetooth: hci_conn: Fix CIS connection dst_type handling hci_connect_cis and iso_connect_cis call hci_bind_cis inconsistently with dst_type being either ISO socket address type or the HCI type, but these values cannot be mixed like this. Fix this by using only the HCI type. CIS connection dst_type was also not initialized in hci_bind_cis, even though it is used in hci_conn_hash_lookup_cis to find existing connections. Set the value in hci_bind_cis, so that existing CIS connections are found e.g. when doing deferred socket connections, also when dst_type is not 0 (ADDR_LE_DEV_PUBLIC). Fixes: 26afbd826ee3 ("Bluetooth: Add initial implementation of CIS connections") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 7 +------ net/bluetooth/iso.c | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7a59c4487050..1176bad5d833 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1761,6 +1761,7 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, if (!cis) return ERR_PTR(-ENOMEM); cis->cleanup = cis_cleanup; + cis->dst_type = dst_type; } if (cis->state == BT_CONNECTED) @@ -2140,12 +2141,6 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, struct hci_conn *le; struct hci_conn *cis; - /* Convert from ISO socket address type to HCI address type */ - if (dst_type == BDADDR_LE_PUBLIC) - dst_type = ADDR_LE_DEV_PUBLIC; - else - dst_type = ADDR_LE_DEV_RANDOM; - if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) le = hci_connect_le(hdev, dst, dst_type, false, BT_SECURITY_LOW, diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 613039ba5dbf..f825857db6d0 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -235,6 +235,14 @@ static int iso_chan_add(struct iso_conn *conn, struct sock *sk, return err; } +static inline u8 le_addr_type(u8 bdaddr_type) +{ + if (bdaddr_type == BDADDR_LE_PUBLIC) + return ADDR_LE_DEV_PUBLIC; + else + return ADDR_LE_DEV_RANDOM; +} + static int iso_connect_bis(struct sock *sk) { struct iso_conn *conn; @@ -328,14 +336,16 @@ static int iso_connect_cis(struct sock *sk) /* Just bind if DEFER_SETUP has been set */ if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { hcon = hci_bind_cis(hdev, &iso_pi(sk)->dst, - iso_pi(sk)->dst_type, &iso_pi(sk)->qos); + le_addr_type(iso_pi(sk)->dst_type), + &iso_pi(sk)->qos); if (IS_ERR(hcon)) { err = PTR_ERR(hcon); goto done; } } else { hcon = hci_connect_cis(hdev, &iso_pi(sk)->dst, - iso_pi(sk)->dst_type, &iso_pi(sk)->qos); + le_addr_type(iso_pi(sk)->dst_type), + &iso_pi(sk)->qos); if (IS_ERR(hcon)) { err = PTR_ERR(hcon); goto done; From 160fbcf3bfb93c3c086427f9f4c8bc70f217e9be Mon Sep 17 00:00:00 2001 From: Soenke Huster Date: Wed, 12 Oct 2022 09:45:06 +0200 Subject: [PATCH 38/53] Bluetooth: virtio_bt: Use skb_put to set length By using skb_put we ensure that skb->tail is set correctly. Currently, skb->tail is always zero, which leads to errors, such as the following page fault in rfcomm_recv_frame: BUG: unable to handle page fault for address: ffffed1021de29ff #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page RIP: 0010:rfcomm_run+0x831/0x4040 (net/bluetooth/rfcomm/core.c:1751) Fixes: afd2daa26c7a ("Bluetooth: Add support for virtio transport driver") Signed-off-by: Soenke Huster Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/virtio_bt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index 67c21263f9e0..fd281d439505 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -219,7 +219,7 @@ static void virtbt_rx_work(struct work_struct *work) if (!skb) return; - skb->len = len; + skb_put(skb, len); virtbt_rx_handle(vbt, skb); if (virtbt_add_inbuf(vbt) < 0) From 0d0e2d032811280b927650ff3c15fe5020e82533 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 17 Oct 2022 15:58:13 +0800 Subject: [PATCH 39/53] Bluetooth: L2CAP: fix use-after-free in l2cap_conn_del() When l2cap_recv_frame() is invoked to receive data, and the cid is L2CAP_CID_A2MP, if the channel does not exist, it will create a channel. However, after a channel is created, the hold operation of the channel is not performed. In this case, the value of channel reference counting is 1. As a result, after hci_error_reset() is triggered, l2cap_conn_del() invokes the close hook function of A2MP to release the channel. Then l2cap_chan_unlock(chan) will trigger UAF issue. The process is as follows: Receive data: l2cap_data_channel() a2mp_channel_create() --->channel ref is 2 l2cap_chan_put() --->channel ref is 1 Triger event: hci_error_reset() hci_dev_do_close() ... l2cap_disconn_cfm() l2cap_conn_del() l2cap_chan_hold() --->channel ref is 2 l2cap_chan_del() --->channel ref is 1 a2mp_chan_close_cb() --->channel ref is 0, release channel l2cap_chan_unlock() --->UAF of channel The detailed Call Trace is as follows: BUG: KASAN: use-after-free in __mutex_unlock_slowpath+0xa6/0x5e0 Read of size 8 at addr ffff8880160664b8 by task kworker/u11:1/7593 Workqueue: hci0 hci_error_reset Call Trace: dump_stack_lvl+0xcd/0x134 print_report.cold+0x2ba/0x719 kasan_report+0xb1/0x1e0 kasan_check_range+0x140/0x190 __mutex_unlock_slowpath+0xa6/0x5e0 l2cap_conn_del+0x404/0x7b0 l2cap_disconn_cfm+0x8c/0xc0 hci_conn_hash_flush+0x11f/0x260 hci_dev_close_sync+0x5f5/0x11f0 hci_dev_do_close+0x2d/0x70 hci_error_reset+0x9e/0x140 process_one_work+0x98a/0x1620 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Allocated by task 7593: kasan_save_stack+0x1e/0x40 __kasan_kmalloc+0xa9/0xd0 l2cap_chan_create+0x40/0x930 amp_mgr_create+0x96/0x990 a2mp_channel_create+0x7d/0x150 l2cap_recv_frame+0x51b8/0x9a70 l2cap_recv_acldata+0xaa3/0xc00 hci_rx_work+0x702/0x1220 process_one_work+0x98a/0x1620 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Freed by task 7593: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_set_free_info+0x20/0x30 ____kasan_slab_free+0x167/0x1c0 slab_free_freelist_hook+0x89/0x1c0 kfree+0xe2/0x580 l2cap_chan_put+0x22a/0x2d0 l2cap_conn_del+0x3fc/0x7b0 l2cap_disconn_cfm+0x8c/0xc0 hci_conn_hash_flush+0x11f/0x260 hci_dev_close_sync+0x5f5/0x11f0 hci_dev_do_close+0x2d/0x70 hci_error_reset+0x9e/0x140 process_one_work+0x98a/0x1620 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Last potentially related work creation: kasan_save_stack+0x1e/0x40 __kasan_record_aux_stack+0xbe/0xd0 call_rcu+0x99/0x740 netlink_release+0xe6a/0x1cf0 __sock_release+0xcd/0x280 sock_close+0x18/0x20 __fput+0x27c/0xa90 task_work_run+0xdd/0x1a0 exit_to_user_mode_prepare+0x23c/0x250 syscall_exit_to_user_mode+0x19/0x50 do_syscall_64+0x42/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Second to last potentially related work creation: kasan_save_stack+0x1e/0x40 __kasan_record_aux_stack+0xbe/0xd0 call_rcu+0x99/0x740 netlink_release+0xe6a/0x1cf0 __sock_release+0xcd/0x280 sock_close+0x18/0x20 __fput+0x27c/0xa90 task_work_run+0xdd/0x1a0 exit_to_user_mode_prepare+0x23c/0x250 syscall_exit_to_user_mode+0x19/0x50 do_syscall_64+0x42/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: d0be8347c623 ("Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put") Signed-off-by: Zhengchao Shao Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 2283871d3f01..9a32ce634919 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7615,6 +7615,7 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, return; } + l2cap_chan_hold(chan); l2cap_chan_lock(chan); } else { BT_DBG("unknown cid 0x%4.4x", cid); From 7c9524d929648935bac2bbb4c20437df8f9c3f42 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Tue, 18 Oct 2022 10:18:51 +0800 Subject: [PATCH 40/53] Bluetooth: L2CAP: Fix memory leak in vhci_write Syzkaller reports a memory leak as follows: ==================================== BUG: memory leak unreferenced object 0xffff88810d81ac00 (size 240): [...] hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] __alloc_skb+0x1f9/0x270 net/core/skbuff.c:418 [] alloc_skb include/linux/skbuff.h:1257 [inline] [] bt_skb_alloc include/net/bluetooth/bluetooth.h:469 [inline] [] vhci_get_user drivers/bluetooth/hci_vhci.c:391 [inline] [] vhci_write+0x5f/0x230 drivers/bluetooth/hci_vhci.c:511 [] call_write_iter include/linux/fs.h:2192 [inline] [] new_sync_write fs/read_write.c:491 [inline] [] vfs_write+0x42d/0x540 fs/read_write.c:578 [] ksys_write+0x9d/0x160 fs/read_write.c:631 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd ==================================== HCI core will uses hci_rx_work() to process frame, which is queued to the hdev->rx_q tail in hci_recv_frame() by HCI driver. Yet the problem is that, HCI core may not free the skb after handling ACL data packets. To be more specific, when start fragment does not contain the L2CAP length, HCI core just copies skb into conn->rx_skb and finishes frame process in l2cap_recv_acldata(), without freeing the skb, which triggers the above memory leak. This patch solves it by releasing the relative skb, after processing the above case in l2cap_recv_acldata(). Fixes: 4d7ea8ee90e4 ("Bluetooth: L2CAP: Fix handling fragmented length") Link: https://lore.kernel.org/all/0000000000000d0b1905e6aaef64@google.com/ Reported-and-tested-by: syzbot+8f819e36e01022991cfa@syzkaller.appspotmail.com Signed-off-by: Hawkins Jiawei Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9a32ce634919..1fbe087d6ae4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -8461,9 +8461,8 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) * expected length. */ if (skb->len < L2CAP_LEN_SIZE) { - if (l2cap_recv_frag(conn, skb, conn->mtu) < 0) - goto drop; - return; + l2cap_recv_frag(conn, skb, conn->mtu); + break; } len = get_unaligned_le16(skb->data) + L2CAP_HDR_SIZE; @@ -8507,7 +8506,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) /* Header still could not be read just continue */ if (conn->rx_skb->len < L2CAP_LEN_SIZE) - return; + break; } if (skb->len > conn->rx_len) { From 5638d9ea9c01c77fc11693d48cf719bc7e88f224 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 17 Oct 2022 15:36:23 -0700 Subject: [PATCH 41/53] Bluetooth: hci_conn: Fix not restoring ISO buffer count on disconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When disconnecting an ISO link the controller may not generate HCI_EV_NUM_COMP_PKTS for unacked packets which needs to be restored in hci_conn_del otherwise the host would assume they are still in use and would not be able to use all the buffers available. Fixes: 26afbd826ee3 ("Bluetooth: Add initial implementation of CIS connections") Signed-off-by: Luiz Augusto von Dentz Tested-by: Frédéric Danis --- net/bluetooth/hci_conn.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1176bad5d833..a6c12863a253 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1067,10 +1067,21 @@ int hci_conn_del(struct hci_conn *conn) hdev->acl_cnt += conn->sent; } else { struct hci_conn *acl = conn->link; + if (acl) { acl->link = NULL; hci_conn_drop(acl); } + + /* Unacked ISO frames */ + if (conn->type == ISO_LINK) { + if (hdev->iso_pkts) + hdev->iso_cnt += conn->sent; + else if (hdev->le_pkts) + hdev->le_cnt += conn->sent; + else + hdev->acl_cnt += conn->sent; + } } if (conn->amp_mgr) From 711f8c3fb3db61897080468586b970c87c61d9e4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 31 Oct 2022 16:10:32 -0700 Subject: [PATCH 42/53] Bluetooth: L2CAP: Fix accepting connection request for invalid SPSM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Bluetooth spec states that the valid range for SPSM is from 0x0001-0x00ff so it is invalid to accept values outside of this range: BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A page 1059: Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges CVE: CVE-2022-42896 CC: stable@vger.kernel.org Reported-by: Tamás Koczka Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Tedd Ho-Jeong An --- net/bluetooth/l2cap_core.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 1fbe087d6ae4..3eee915fb245 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5813,6 +5813,19 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm), scid, mtu, mps); + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A + * page 1059: + * + * Valid range: 0x0001-0x00ff + * + * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges + */ + if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { + result = L2CAP_CR_LE_BAD_PSM; + chan = NULL; + goto response; + } + /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, &conn->hcon->dst, LE_LINK); @@ -6001,6 +6014,18 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, psm = req->psm; + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A + * page 1059: + * + * Valid range: 0x0001-0x00ff + * + * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges + */ + if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { + result = L2CAP_CR_LE_BAD_PSM; + goto response; + } + BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); memset(&pdu, 0, sizeof(pdu)); From f937b758a188d6fd328a81367087eddbb2fce50f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 31 Oct 2022 16:10:33 -0700 Subject: [PATCH 43/53] Bluetooth: L2CAP: Fix l2cap_global_chan_by_psm l2cap_global_chan_by_psm shall not return fixed channels as they are not meant to be connected by (S)PSM. Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Tedd Ho-Jeong An --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 3eee915fb245..4d8a1d862f6b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1990,7 +1990,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, if (link_type == LE_LINK && c->src_type == BDADDR_BREDR) continue; - if (c->psm == psm) { + if (c->chan_type != L2CAP_CHAN_FIXED && c->psm == psm) { int src_match, dst_match; int src_any, dst_any; From b1a2cd50c0357f243b7435a732b4e62ba3157a2e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 31 Oct 2022 16:10:52 -0700 Subject: [PATCH 44/53] Bluetooth: L2CAP: Fix attempting to access uninitialized memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On l2cap_parse_conf_req the variable efs is only initialized if remote_efs has been set. CVE: CVE-2022-42895 CC: stable@vger.kernel.org Reported-by: Tamás Koczka Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Tedd Ho-Jeong An --- net/bluetooth/l2cap_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4d8a1d862f6b..9c24947aa41e 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3764,7 +3764,8 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); - if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { + if (remote_efs && + test_bit(FLAG_EFS_ENABLE, &chan->flags)) { chan->remote_id = efs.id; chan->remote_stype = efs.stype; chan->remote_msdu = le16_to_cpu(efs.msdu); From 40e4eb324c59e11fcb927aa46742d28aba6ecb8a Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 31 Oct 2022 21:26:45 +0800 Subject: [PATCH 45/53] net: mdio: fix undefined behavior in bit shift for __mdiobus_register Shifting signed 32-bit value by 31 bits is undefined, so changing significant bit to unsigned. The UBSAN warning calltrace like below: UBSAN: shift-out-of-bounds in drivers/net/phy/mdio_bus.c:586:27 left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: dump_stack_lvl+0x7d/0xa5 dump_stack+0x15/0x1b ubsan_epilogue+0xe/0x4e __ubsan_handle_shift_out_of_bounds+0x1e7/0x20c __mdiobus_register+0x49d/0x4e0 fixed_mdio_bus_init+0xd8/0x12d do_one_initcall+0x76/0x430 kernel_init_freeable+0x3b3/0x422 kernel_init+0x24/0x1e0 ret_from_fork+0x1f/0x30 Fixes: 4fd5f812c23c ("phylib: allow incremental scanning of an mii bus") Signed-off-by: Gaosheng Cui Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221031132645.168421-1-cuigaosheng1@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index f82090bdf7ab..1cd604cd1fa1 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -583,7 +583,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) } for (i = 0; i < PHY_MAX_ADDR; i++) { - if ((bus->phy_mask & (1 << i)) == 0) { + if ((bus->phy_mask & BIT(i)) == 0) { struct phy_device *phydev; phydev = mdiobus_scan(bus, i); From d6dd2fe71153f0ff748bf188bd4af076fe09a0a6 Mon Sep 17 00:00:00 2001 From: Nick Child Date: Mon, 31 Oct 2022 10:06:42 -0500 Subject: [PATCH 46/53] ibmvnic: Free rwi on reset success Free the rwi structure in the event that the last rwi in the list processed successfully. The logic in commit 4f408e1fa6e1 ("ibmvnic: retry reset if there are no other resets") introduces an issue that results in a 32 byte memory leak whenever the last rwi in the list gets processed. Fixes: 4f408e1fa6e1 ("ibmvnic: retry reset if there are no other resets") Signed-off-by: Nick Child Link: https://lore.kernel.org/r/20221031150642.13356-1-nnac123@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 65dbfbec487a..9282381a438f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3007,19 +3007,19 @@ static void __ibmvnic_reset(struct work_struct *work) rwi = get_next_rwi(adapter); /* - * If there is another reset queued, free the previous rwi - * and process the new reset even if previous reset failed - * (the previous reset could have failed because of a fail - * over for instance, so process the fail over). - * * If there are no resets queued and the previous reset failed, * the adapter would be in an undefined state. So retry the * previous reset as a hard reset. + * + * Else, free the previous rwi and, if there is another reset + * queued, process the new reset even if previous reset failed + * (the previous reset could have failed because of a fail + * over for instance, so process the fail over). */ - if (rwi) - kfree(tmprwi); - else if (rc) + if (!rwi && rc) rwi = tmprwi; + else + kfree(tmprwi); if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER || rwi->reset_reason == VNIC_RESET_MOBILITY || rc)) From 2ae34111fe4eebb69986f6490015b57c88804373 Mon Sep 17 00:00:00 2001 From: Liu Peibao Date: Tue, 1 Nov 2022 14:02:18 +0800 Subject: [PATCH 47/53] stmmac: dwmac-loongson: fix invalid mdio_node In current code "plat->mdio_node" is always NULL, the mdio support is lost as there is no "mdio_bus_data". The original driver could work as the "mdio" variable is never set to false, which is described in commit ("stmmac: dwmac-loongson: fix uninitialized variable ......"). And after this commit merged, the "mdio" variable is always false, causing the mdio supoort logic lost. Fixes: 30bba69d7db4 ("stmmac: pci: Add dwmac support for Loongson") Signed-off-by: Liu Peibao Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221101060218.16453-1-liupeibao@loongson.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 017dbbda0c1c..79fa7870563b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -51,7 +51,6 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id struct stmmac_resources res; struct device_node *np; int ret, i, phy_mode; - bool mdio = false; np = dev_of_node(&pdev->dev); @@ -69,12 +68,10 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id if (!plat) return -ENOMEM; + plat->mdio_node = of_get_child_by_name(np, "mdio"); if (plat->mdio_node) { - dev_err(&pdev->dev, "Found MDIO subnode\n"); - mdio = true; - } + dev_info(&pdev->dev, "Found MDIO subnode\n"); - if (mdio) { plat->mdio_bus_data = devm_kzalloc(&pdev->dev, sizeof(*plat->mdio_bus_data), GFP_KERNEL); From 62ff373da2534534c55debe6c724c7fe14adb97f Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 1 Nov 2022 17:37:22 +0800 Subject: [PATCH 48/53] net/smc: Fix possible leaked pernet namespace in smc_init() In smc_init(), register_pernet_subsys(&smc_net_stat_ops) is called without any error handling. If it fails, registering of &smc_net_ops won't be reverted. And if smc_nl_init() fails, &smc_net_stat_ops itself won't be reverted. This leaves wild ops in subsystem linkedlist and when another module tries to call register_pernet_operations() it triggers page fault: BUG: unable to handle page fault for address: fffffbfff81b964c RIP: 0010:register_pernet_operations+0x1b9/0x5f0 Call Trace: register_pernet_subsys+0x29/0x40 ebtables_init+0x58/0x1000 [ebtables] ... Fixes: 194730a9beb5 ("net/smc: Make SMC statistics network namespace aware") Signed-off-by: Chen Zhongjin Reviewed-by: Tony Lu Reviewed-by: Wenjia Zhang Link: https://lore.kernel.org/r/20221101093722.127223-1-chenzhongjin@huawei.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 3ccbf3c201cd..e12d4fa5aece 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3380,14 +3380,14 @@ static int __init smc_init(void) rc = register_pernet_subsys(&smc_net_stat_ops); if (rc) - return rc; + goto out_pernet_subsys; smc_ism_init(); smc_clc_init(); rc = smc_nl_init(); if (rc) - goto out_pernet_subsys; + goto out_pernet_subsys_stat; rc = smc_pnet_init(); if (rc) @@ -3480,6 +3480,8 @@ static int __init smc_init(void) smc_pnet_exit(); out_nl: smc_nl_exit(); +out_pernet_subsys_stat: + unregister_pernet_subsys(&smc_net_stat_ops); out_pernet_subsys: unregister_pernet_subsys(&smc_net_ops); From f8017317cb0b279b8ab98b0f3901a2e0ac880dad Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 1 Nov 2022 20:15:52 +0800 Subject: [PATCH 49/53] net, neigh: Fix null-ptr-deref in neigh_table_clear() When IPv6 module gets initialized but hits an error in the middle, kenel panic with: KASAN: null-ptr-deref in range [0x0000000000000598-0x000000000000059f] CPU: 1 PID: 361 Comm: insmod Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) RIP: 0010:__neigh_ifdown.isra.0+0x24b/0x370 RSP: 0018:ffff888012677908 EFLAGS: 00000202 ... Call Trace: neigh_table_clear+0x94/0x2d0 ndisc_cleanup+0x27/0x40 [ipv6] inet6_init+0x21c/0x2cb [ipv6] do_one_initcall+0xd3/0x4d0 do_init_module+0x1ae/0x670 ... Kernel panic - not syncing: Fatal exception When ipv6 initialization fails, it will try to cleanup and calls: neigh_table_clear() neigh_ifdown(tbl, NULL) pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev == NULL)) # dev_net(NULL) triggers null-ptr-deref. Fix it by passing NULL to pneigh_queue_purge() in neigh_ifdown() if dev is NULL, to make kernel not panic immediately. Fixes: 66ba215cb513 ("neigh: fix possible DoS due to net iface start/stop loop") Signed-off-by: Chen Zhongjin Reviewed-by: Eric Dumazet Reviewed-by: Denis V. Lunev Link: https://lore.kernel.org/r/20221101121552.21890-1-chenzhongjin@huawei.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3c4786b99907..a77a85e357e0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -409,7 +409,7 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); pneigh_ifdown_and_unlock(tbl, dev); - pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev)); + pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL); if (skb_queue_empty_lockless(&tbl->proxy_queue)) del_timer_sync(&tbl->proxy_timer); return 0; From 628ac04a75ed5ff13647e725f40192da22ef2be8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 1 Nov 2022 20:57:53 +0200 Subject: [PATCH 50/53] bridge: Fix flushing of dynamic FDB entries The following commands should result in all the dynamic FDB entries being flushed, but instead all the non-local (non-permanent) entries are flushed: # bridge fdb add 00:aa:bb:cc:dd:ee dev dummy1 master static # bridge fdb add 00:11:22:33:44:55 dev dummy1 master dynamic # ip link set dev br0 type bridge fdb_flush # bridge fdb show brport dummy1 00:00:00:00:00:01 master br0 permanent 33:33:00:00:00:01 self permanent 01:00:5e:00:00:01 self permanent This is because br_fdb_flush() works with FDB flags and not the corresponding enumerator values. Fix by passing the FDB flag instead. After the fix: # bridge fdb add 00:aa:bb:cc:dd:ee dev dummy1 master static # bridge fdb add 00:11:22:33:44:55 dev dummy1 master dynamic # ip link set dev br0 type bridge fdb_flush # bridge fdb show brport dummy1 00:aa:bb:cc:dd:ee master br0 static 00:00:00:00:00:01 master br0 permanent 33:33:00:00:00:01 self permanent 01:00:5e:00:00:01 self permanent Fixes: 1f78ee14eeac ("net: bridge: fdb: add support for fine-grained flushing") Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20221101185753.2120691-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_netlink.c | 2 +- net/bridge/br_sysfs_br.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 5aeb3646e74c..d087fd4c784a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1332,7 +1332,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_FDB_FLUSH]) { struct net_bridge_fdb_flush_desc desc = { - .flags_mask = BR_FDB_STATIC + .flags_mask = BIT(BR_FDB_STATIC) }; br_fdb_flush(br, &desc); diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 612e367fff20..ea733542244c 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -345,7 +345,7 @@ static int set_flush(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { struct net_bridge_fdb_flush_desc desc = { - .flags_mask = BR_FDB_STATIC + .flags_mask = BIT(BR_FDB_STATIC) }; br_fdb_flush(br, &desc); From 768b3c745fe5789f2430bdab02f35a9ad1148d97 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 2 Nov 2022 10:06:10 +0800 Subject: [PATCH 51/53] ipv6: fix WARNING in ip6_route_net_exit_late() During the initialization of ip6_route_net_init_late(), if file ipv6_route or rt6_stats fails to be created, the initialization is successful by default. Therefore, the ipv6_route or rt6_stats file doesn't be found during the remove in ip6_route_net_exit_late(). It will cause WRNING. The following is the stack information: name 'rt6_stats' WARNING: CPU: 0 PID: 9 at fs/proc/generic.c:712 remove_proc_entry+0x389/0x460 Modules linked in: Workqueue: netns cleanup_net RIP: 0010:remove_proc_entry+0x389/0x460 PKRU: 55555554 Call Trace: ops_exit_list+0xb0/0x170 cleanup_net+0x4ea/0xb00 process_one_work+0x9bf/0x1710 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Fixes: cdb1876192db ("[NETNS][IPV6] route6 - create route6 proc files for the namespace") Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20221102020610.351330-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 69252eb462b2..2f355f0ec32a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6555,10 +6555,16 @@ static void __net_exit ip6_route_net_exit(struct net *net) static int __net_init ip6_route_net_init_late(struct net *net) { #ifdef CONFIG_PROC_FS - proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops, - sizeof(struct ipv6_route_iter)); - proc_create_net_single("rt6_stats", 0444, net->proc_net, - rt6_stats_seq_show, NULL); + if (!proc_create_net("ipv6_route", 0, net->proc_net, + &ipv6_route_seq_ops, + sizeof(struct ipv6_route_iter))) + return -ENOMEM; + + if (!proc_create_net_single("rt6_stats", 0444, net->proc_net, + rt6_stats_seq_show, NULL)) { + remove_proc_entry("ipv6_route", net->proc_net); + return -ENOMEM; + } #endif return 0; } From cf6ff0df0fd123493e57278a1bd4414a97511a34 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 31 Oct 2022 19:17:05 -0700 Subject: [PATCH 52/53] vsock: remove the unused 'wait' in vsock_connectible_recvmsg() Remove the unused variable introduced by 19c1b90e1979. Fixes: 19c1b90e1979 ("af_vsock: separate receive data loop") Signed-off-by: Dexuan Cui Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- net/vmw_vsock/af_vsock.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index ee418701cdee..d258fd43092e 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2092,8 +2092,6 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, const struct vsock_transport *transport; int err; - DEFINE_WAIT(wait); - sk = sock->sk; vsk = vsock_sk(sk); err = 0; From 466a85336fee6e3b35eb97b8405a28302fd25809 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 31 Oct 2022 19:17:06 -0700 Subject: [PATCH 53/53] vsock: fix possible infinite sleep in vsock_connectible_wait_data() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently vsock_connectible_has_data() may miss a wakeup operation between vsock_connectible_has_data() == 0 and the prepare_to_wait(). Fix the race by adding the process to the wait queue before checking vsock_connectible_has_data(). Fixes: b3f7fd54881b ("af_vsock: separate wait data loop") Signed-off-by: Dexuan Cui Reviewed-by: Stefano Garzarella Reported-by: Frédéric Dalleau Tested-by: Frédéric Dalleau Signed-off-by: Paolo Abeni --- net/vmw_vsock/af_vsock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d258fd43092e..884eca7f6743 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1905,8 +1905,11 @@ static int vsock_connectible_wait_data(struct sock *sk, err = 0; transport = vsk->transport; - while ((data = vsock_connectible_has_data(vsk)) == 0) { + while (1) { prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE); + data = vsock_connectible_has_data(vsk); + if (data != 0) + break; if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) ||