From 244ae992e3e80e5c9c272c77324c831148457f95 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Tue, 20 Feb 2024 15:57:10 -0800 Subject: [PATCH 01/13] igc: Fix missing time sync events Fix "double" clearing of interrupts, which can cause external events or timestamps to be missed. The IGC_TSIRC Time Sync Interrupt Cause register can be cleared in two ways, by either reading it or by writing '1' into the specific cause bit. This is documented in section 8.16.1. The following flow was used: 1. read IGC_TSIRC into 'tsicr'; 2. handle the interrupts present in 'tsirc' and mark them in 'ack'; 3. write 'ack' into IGC_TSICR; As both (1) and (3) will clear the interrupt cause, if the same interrupt happens again between (1) and (3) it will be ignored, causing events to be missed. Remove the extra clear in (3). Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") Reviewed-by: Kurt Kanzenbach Tested-by: Kurt Kanzenbach # Intel i225 Signed-off-by: Vinicius Costa Gomes Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 81c21a893ede..e447ba037056 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5302,25 +5302,22 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev, static void igc_tsync_interrupt(struct igc_adapter *adapter) { - u32 ack, tsauxc, sec, nsec, tsicr; struct igc_hw *hw = &adapter->hw; + u32 tsauxc, sec, nsec, tsicr; struct ptp_clock_event event; struct timespec64 ts; tsicr = rd32(IGC_TSICR); - ack = 0; if (tsicr & IGC_TSICR_SYS_WRAP) { event.type = PTP_CLOCK_PPS; if (adapter->ptp_caps.pps) ptp_clock_event(adapter->ptp_clock, &event); - ack |= IGC_TSICR_SYS_WRAP; } if (tsicr & IGC_TSICR_TXTS) { /* retrieve hardware timestamp */ igc_ptp_tx_tstamp_event(adapter); - ack |= IGC_TSICR_TXTS; } if (tsicr & IGC_TSICR_TT0) { @@ -5334,7 +5331,6 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) wr32(IGC_TSAUXC, tsauxc); adapter->perout[0].start = ts; spin_unlock(&adapter->tmreg_lock); - ack |= IGC_TSICR_TT0; } if (tsicr & IGC_TSICR_TT1) { @@ -5348,7 +5344,6 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) wr32(IGC_TSAUXC, tsauxc); adapter->perout[1].start = ts; spin_unlock(&adapter->tmreg_lock); - ack |= IGC_TSICR_TT1; } if (tsicr & IGC_TSICR_AUTT0) { @@ -5358,7 +5353,6 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) event.index = 0; event.timestamp = sec * NSEC_PER_SEC + nsec; ptp_clock_event(adapter->ptp_clock, &event); - ack |= IGC_TSICR_AUTT0; } if (tsicr & IGC_TSICR_AUTT1) { @@ -5368,11 +5362,7 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) event.index = 1; event.timestamp = sec * NSEC_PER_SEC + nsec; ptp_clock_event(adapter->ptp_clock, &event); - ack |= IGC_TSICR_AUTT1; } - - /* acknowledge the interrupts */ - wr32(IGC_TSICR, ack); } /** From ee14cc9ea19ba9678177e2224a9c58cce5937c73 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Tue, 20 Feb 2024 15:57:11 -0800 Subject: [PATCH 02/13] igb: Fix missing time sync events Fix "double" clearing of interrupts, which can cause external events or timestamps to be missed. The E1000_TSIRC Time Sync Interrupt Cause register can be cleared in two ways, by either reading it or by writing '1' into the specific cause bit. This is documented in section 8.16.1. The following flow was used: 1. read E1000_TSIRC into 'tsicr'; 2. handle the interrupts present into 'tsirc' and mark them in 'ack'; 3. write 'ack' into E1000_TSICR; As both (1) and (3) will clear the interrupt cause, if the same interrupt happens again between (1) and (3) it will be ignored, causing events to be missed. Remove the extra clear in (3). Fixes: 00c65578b47b ("igb: enable internal PPS for the i210") Acked-by: Richard Cochran Signed-off-by: Vinicius Costa Gomes Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index cebb44f51d5f..7662c42e35c1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6985,44 +6985,31 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) static void igb_tsync_interrupt(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - u32 ack = 0, tsicr = rd32(E1000_TSICR); + u32 tsicr = rd32(E1000_TSICR); struct ptp_clock_event event; if (tsicr & TSINTR_SYS_WRAP) { event.type = PTP_CLOCK_PPS; if (adapter->ptp_caps.pps) ptp_clock_event(adapter->ptp_clock, &event); - ack |= TSINTR_SYS_WRAP; } if (tsicr & E1000_TSICR_TXTS) { /* retrieve hardware timestamp */ schedule_work(&adapter->ptp_tx_work); - ack |= E1000_TSICR_TXTS; } - if (tsicr & TSINTR_TT0) { + if (tsicr & TSINTR_TT0) igb_perout(adapter, 0); - ack |= TSINTR_TT0; - } - if (tsicr & TSINTR_TT1) { + if (tsicr & TSINTR_TT1) igb_perout(adapter, 1); - ack |= TSINTR_TT1; - } - if (tsicr & TSINTR_AUTT0) { + if (tsicr & TSINTR_AUTT0) igb_extts(adapter, 0); - ack |= TSINTR_AUTT0; - } - if (tsicr & TSINTR_AUTT1) { + if (tsicr & TSINTR_AUTT1) igb_extts(adapter, 1); - ack |= TSINTR_AUTT1; - } - - /* acknowledge the interrupts */ - wr32(E1000_TSICR, ack); } static irqreturn_t igb_msix_other(int irq, void *data) From 257310e998700e60382fbd3f4fd275fdbd9b2aaf Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Tue, 27 Feb 2024 15:31:06 +0100 Subject: [PATCH 03/13] ice: fix stats being updated by way too large values Simplify stats accumulation logic to fix the case where we don't take previous stat value into account, we should always respect it. Main netdev stats of our PF (Tx/Rx packets/bytes) were reported orders of magnitude too big during OpenStack reconfiguration events, possibly other reconfiguration cases too. The regression was reported to be between 6.1 and 6.2, so I was almost certain that on of the two "preserve stats over reset" commits were the culprit. While reading the code, it was found that in some cases we will increase the stats by arbitrarily large number (thanks to ignoring "-prev" part of condition, after zeroing it). Note that this fixes also the case where we were around limits of u64, but that was not the regression reported. Full disclosure: I remember suggesting this particular piece of code to Ben a few years ago, so blame on me. Fixes: 2fd5e433cd26 ("ice: Accumulate HW and Netdev statistics over reset") Reported-by: Nebojsa Stevanovic Link: https://lore.kernel.org/intel-wired-lan/VI1PR02MB439744DEDAA7B59B9A2833FE912EA@VI1PR02MB4397.eurprd02.prod.outlook.com Reported-by: Christian Rohmann Link: https://lore.kernel.org/intel-wired-lan/f38a6ca4-af05-48b1-a3e6-17ef2054e525@inovex.de Reviewed-by: Jacob Keller Signed-off-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 24 +++++++++++------------ 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index df6a68ab747e..6d256dbcb77d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6737,6 +6737,7 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) { struct rtnl_link_stats64 *net_stats, *stats_prev; struct rtnl_link_stats64 *vsi_stats; + struct ice_pf *pf = vsi->back; u64 pkts, bytes; int i; @@ -6782,21 +6783,18 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) net_stats = &vsi->net_stats; stats_prev = &vsi->net_stats_prev; - /* clear prev counters after reset */ - if (vsi_stats->tx_packets < stats_prev->tx_packets || - vsi_stats->rx_packets < stats_prev->rx_packets) { - stats_prev->tx_packets = 0; - stats_prev->tx_bytes = 0; - stats_prev->rx_packets = 0; - stats_prev->rx_bytes = 0; + /* Update netdev counters, but keep in mind that values could start at + * random value after PF reset. And as we increase the reported stat by + * diff of Prev-Cur, we need to be sure that Prev is valid. If it's not, + * let's skip this round. + */ + if (likely(pf->stat_prev_loaded)) { + net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets; + net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes; + net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets; + net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes; } - /* update netdev counters */ - net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets; - net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes; - net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets; - net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes; - stats_prev->tx_packets = vsi_stats->tx_packets; stats_prev->tx_bytes = vsi_stats->tx_bytes; stats_prev->rx_packets = vsi_stats->rx_packets; From e3fb8e8ba72b053d05ca2602acdd6b869f9f296f Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 4 Mar 2024 14:56:12 +0100 Subject: [PATCH 04/13] net: dsa: microchip: make sure drive strength configuration is not lost by soft reset This driver has two separate reset sequence in different places: - gpio/HW reset on start of ksz_switch_register() - SW reset on start of ksz_setup() The second one will overwrite drive strength configuration made in the ksz_switch_register(). To fix it, move ksz_parse_drive_strength() from ksz_switch_register() to ksz_setup(). Fixes: d67d7247f641 ("net: dsa: microchip: Add drive strength configuration") Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20240304135612.814404-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 245dfb7a7a31..25a49708f484 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2185,6 +2185,8 @@ static int ksz_pirq_setup(struct ksz_device *dev, u8 p) return ksz_irq_common_setup(dev, pirq); } +static int ksz_parse_drive_strength(struct ksz_device *dev); + static int ksz_setup(struct dsa_switch *ds) { struct ksz_device *dev = ds->priv; @@ -2206,6 +2208,10 @@ static int ksz_setup(struct dsa_switch *ds) return ret; } + ret = ksz_parse_drive_strength(dev); + if (ret) + return ret; + /* set broadcast storm protection 10% rate */ regmap_update_bits(ksz_regmap_16(dev), regs[S_BROADCAST_CTRL], BROADCAST_STORM_RATE, @@ -4242,10 +4248,6 @@ int ksz_switch_register(struct ksz_device *dev) for (port_num = 0; port_num < dev->info->port_cnt; ++port_num) dev->ports[port_num].interface = PHY_INTERFACE_MODE_NA; if (dev->dev->of_node) { - ret = ksz_parse_drive_strength(dev); - if (ret) - return ret; - ret = of_get_phy_mode(dev->dev->of_node, &interface); if (ret == 0) dev->compat_interface = interface; From c4386ab4f6c600f75fdfd21143f89bac3e625d0d Mon Sep 17 00:00:00 2001 From: Shiming Cheng Date: Thu, 7 Mar 2024 18:01:57 +0800 Subject: [PATCH 05/13] ipv6: fib6_rules: flush route cache when rule is changed When rule policy is changed, ipv6 socket cache is not refreshed. The sock's skb still uses a outdated route cache and was sent to a wrong interface. To avoid this error we should update fib node's version when rule is changed. Then skb's route will be reroute checked as route cache version is already different with fib node version. The route cache is refreshed to match the latest rule. Fixes: 101367c2f8c4 ("[IPV6]: Policy Routing Rules") Signed-off-by: Shiming Cheng Signed-off-by: Lena Wang Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 7523c4baef35..52c04f0ac498 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -449,6 +449,11 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(16); /* src */ } +static void fib6_rule_flush_cache(struct fib_rules_ops *ops) +{ + rt_genid_bump_ipv6(ops->fro_net); +} + static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), @@ -461,6 +466,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .compare = fib6_rule_compare, .fill = fib6_rule_fill, .nlmsg_payload = fib6_rule_nlmsg_payload, + .flush_cache = fib6_rule_flush_cache, .nlgroup = RTNLGRP_IPV6_RULE, .owner = THIS_MODULE, .fro_net = &init_net, From b0ec2abf98267f14d032102551581c833b0659d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2024 10:07:16 +0000 Subject: [PATCH 06/13] net: ip_tunnel: make sure to pull inner header in ip_tunnel_rcv() Apply the same fix than ones found in : 8d975c15c0cd ("ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv()") 1ca1ba465e55 ("geneve: make sure to pull inner header in geneve_rx()") We have to save skb->network_header in a temporary variable in order to be able to recompute the network_header pointer after a pskb_inet_may_pull() call. pskb_inet_may_pull() makes sure the needed headers are in skb->head. syzbot reported: BUG: KMSAN: uninit-value in __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] BUG: KMSAN: uninit-value in INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] BUG: KMSAN: uninit-value in IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] BUG: KMSAN: uninit-value in ip_tunnel_rcv+0xed9/0x2ed0 net/ipv4/ip_tunnel.c:409 __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] ip_tunnel_rcv+0xed9/0x2ed0 net/ipv4/ip_tunnel.c:409 __ipgre_rcv+0x9bc/0xbc0 net/ipv4/ip_gre.c:389 ipgre_rcv net/ipv4/ip_gre.c:411 [inline] gre_rcv+0x423/0x19f0 net/ipv4/ip_gre.c:447 gre_rcv+0x2a4/0x390 net/ipv4/gre_demux.c:163 ip_protocol_deliver_rcu+0x264/0x1300 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2b8/0x440 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish net/ipv4/ip_input.c:449 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip_rcv+0x46f/0x760 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core net/core/dev.c:5534 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5648 netif_receive_skb_internal net/core/dev.c:5734 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5793 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1556 tun_get_user+0x53b9/0x66e0 drivers/net/tun.c:2009 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2055 call_write_iter include/linux/fs.h:2087 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xb6b/0x1520 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: __alloc_pages+0x9a6/0xe00 mm/page_alloc.c:4590 alloc_pages_mpol+0x62b/0x9d0 mm/mempolicy.c:2133 alloc_pages+0x1be/0x1e0 mm/mempolicy.c:2204 skb_page_frag_refill+0x2bf/0x7c0 net/core/sock.c:2909 tun_build_skb drivers/net/tun.c:1686 [inline] tun_get_user+0xe0a/0x66e0 drivers/net/tun.c:1826 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2055 call_write_iter include/linux/fs.h:2087 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xb6b/0x1520 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 1b6981de3f29..7af36e4f1647 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -378,7 +378,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, bool log_ecn_error) { const struct iphdr *iph = ip_hdr(skb); - int err; + int nh, err; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { @@ -404,8 +404,21 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(tunnel->dev, rx_length_errors); + DEV_STATS_INC(tunnel->dev, rx_errors); + goto drop; + } + iph = (struct iphdr *)(skb->head + nh); + err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) From fc1b2901e0feed933aaee273d0b0ca961539f4d7 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Thu, 7 Mar 2024 16:26:23 +0530 Subject: [PATCH 07/13] octeontx2-af: Fix devlink params Devlink param for adjusting NPC MCAM high zone area is in wrong param list and is not getting activated on CN10KA silicon. That patch fixes this issue. Fixes: dd7842878633 ("octeontx2-af: Add new devlink param to configure maximum usable NIX block LFs") Signed-off-by: Sunil Goutham Signed-off-by: Sai Krishna Signed-off-by: David S. Miller --- .../marvell/octeontx2/af/rvu_devlink.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 1e6fbd98423d..96c04f7d93f8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1235,8 +1235,8 @@ static int rvu_af_dl_dwrr_mtu_get(struct devlink *devlink, u32 id, enum rvu_af_dl_param_id { RVU_AF_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU, - RVU_AF_DEVLINK_PARAM_ID_NPC_EXACT_FEATURE_DISABLE, RVU_AF_DEVLINK_PARAM_ID_NPC_MCAM_ZONE_PERCENT, + RVU_AF_DEVLINK_PARAM_ID_NPC_EXACT_FEATURE_DISABLE, RVU_AF_DEVLINK_PARAM_ID_NIX_MAXLF, }; @@ -1434,15 +1434,6 @@ static const struct devlink_param rvu_af_dl_params[] = { BIT(DEVLINK_PARAM_CMODE_RUNTIME), rvu_af_dl_dwrr_mtu_get, rvu_af_dl_dwrr_mtu_set, rvu_af_dl_dwrr_mtu_validate), -}; - -static const struct devlink_param rvu_af_dl_param_exact_match[] = { - DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_NPC_EXACT_FEATURE_DISABLE, - "npc_exact_feature_disable", DEVLINK_PARAM_TYPE_STRING, - BIT(DEVLINK_PARAM_CMODE_RUNTIME), - rvu_af_npc_exact_feature_get, - rvu_af_npc_exact_feature_disable, - rvu_af_npc_exact_feature_validate), DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_NPC_MCAM_ZONE_PERCENT, "npc_mcam_high_zone_percent", DEVLINK_PARAM_TYPE_U8, BIT(DEVLINK_PARAM_CMODE_RUNTIME), @@ -1457,6 +1448,15 @@ static const struct devlink_param rvu_af_dl_param_exact_match[] = { rvu_af_dl_nix_maxlf_validate), }; +static const struct devlink_param rvu_af_dl_param_exact_match[] = { + DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_NPC_EXACT_FEATURE_DISABLE, + "npc_exact_feature_disable", DEVLINK_PARAM_TYPE_STRING, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + rvu_af_npc_exact_feature_get, + rvu_af_npc_exact_feature_disable, + rvu_af_npc_exact_feature_validate), +}; + /* Devlink switch mode */ static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { From 4469c0c5b14a0919f5965c7ceac96b523eb57b79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20L=27h=C3=B4pital?= Date: Thu, 7 Mar 2024 12:19:06 +0100 Subject: [PATCH 08/13] net: phy: fix phy_get_internal_delay accessing an empty array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The phy_get_internal_delay function could try to access to an empty array in the case that the driver is calling phy_get_internal_delay without defining delay_values and rx-internal-delay-ps or tx-internal-delay-ps is defined to 0 in the device-tree. This will lead to "unable to handle kernel NULL pointer dereference at virtual address 0". To avoid this kernel oops, the test should be delay >= 0. As there is already delay < 0 test just before, the test could only be size == 0. Fixes: 92252eec913b ("net: phy: Add a helper to return the index for of the internal delay") Co-developed-by: Enguerrand de Ribaucourt Signed-off-by: Enguerrand de Ribaucourt Signed-off-by: Kévin L'hôpital Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 3611ea64875e..3ad9bbf65cbe 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2959,7 +2959,7 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, if (delay < 0) return delay; - if (delay && size == 0) + if (size == 0) return delay; if (delay < delay_values[0] || delay > delay_values[size - 1]) { From b446631f355ece73b13c311dd712c47381a23172 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 6 Mar 2024 16:12:40 +0100 Subject: [PATCH 09/13] dpll: fix dpll_xa_ref_*_del() for multiple registrations Currently, if there are multiple registrations of the same pin on the same dpll device, following warnings are observed: WARNING: CPU: 5 PID: 2212 at drivers/dpll/dpll_core.c:143 dpll_xa_ref_pin_del.isra.0+0x21e/0x230 WARNING: CPU: 5 PID: 2212 at drivers/dpll/dpll_core.c:223 __dpll_pin_unregister+0x2b3/0x2c0 The problem is, that in both dpll_xa_ref_dpll_del() and dpll_xa_ref_pin_del() registration is only removed from list in case the reference count drops to zero. That is wrong, the registration has to be removed always. To fix this, remove the registration from the list and free it unconditionally, instead of doing it only when the ref reference counter reaches zero. Fixes: 9431063ad323 ("dpll: core: Add DPLL framework base functions") Signed-off-by: Jiri Pirko Reviewed-by: Rahul Rameshbabu Signed-off-by: David S. Miller --- drivers/dpll/dpll_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 7f686d179fc9..c751a87c7a8e 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -129,9 +129,9 @@ static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin, reg = dpll_pin_registration_find(ref, ops, priv); if (WARN_ON(!reg)) return -EINVAL; + list_del(®->list); + kfree(reg); if (refcount_dec_and_test(&ref->refcount)) { - list_del(®->list); - kfree(reg); xa_erase(xa_pins, i); WARN_ON(!list_empty(&ref->registration_list)); kfree(ref); @@ -209,9 +209,9 @@ dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll, reg = dpll_pin_registration_find(ref, ops, priv); if (WARN_ON(!reg)) return; + list_del(®->list); + kfree(reg); if (refcount_dec_and_test(&ref->refcount)) { - list_del(®->list); - kfree(reg); xa_erase(xa_dplls, i); WARN_ON(!list_empty(&ref->registration_list)); kfree(ref); From c8a5c731fd1223090af57da33838c671a7fc6a78 Mon Sep 17 00:00:00 2001 From: Tim Pambor Date: Tue, 5 Mar 2024 12:06:08 +0100 Subject: [PATCH 10/13] net: phy: dp83822: Fix RGMII TX delay configuration The logic for enabling the TX clock shift is inverse of enabling the RX clock shift. The TX clock shift is disabled when DP83822_TX_CLK_SHIFT is set. Correct the current behavior and always write the delay configuration to ensure consistent delay settings regardless of bootloader configuration. Reference: https://www.ti.com/lit/ds/symlink/dp83822i.pdf p. 69 Fixes: 8095295292b5 ("net: phy: DP83822: Add setting the fixed internal delay") Signed-off-by: Tim Pambor Link: https://lore.kernel.org/r/20240305110608.104072-1-tp@osasysteme.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83822.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index b7cb71817780..29e1cbea6dc0 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -380,7 +380,7 @@ static int dp83822_config_init(struct phy_device *phydev) { struct dp83822_private *dp83822 = phydev->priv; struct device *dev = &phydev->mdio.dev; - int rgmii_delay; + int rgmii_delay = 0; s32 rx_int_delay; s32 tx_int_delay; int err = 0; @@ -390,30 +390,33 @@ static int dp83822_config_init(struct phy_device *phydev) rx_int_delay = phy_get_internal_delay(phydev, dev, NULL, 0, true); - if (rx_int_delay <= 0) - rgmii_delay = 0; - else - rgmii_delay = DP83822_RX_CLK_SHIFT; + /* Set DP83822_RX_CLK_SHIFT to enable rx clk internal delay */ + if (rx_int_delay > 0) + rgmii_delay |= DP83822_RX_CLK_SHIFT; tx_int_delay = phy_get_internal_delay(phydev, dev, NULL, 0, false); + + /* Set DP83822_TX_CLK_SHIFT to disable tx clk internal delay */ if (tx_int_delay <= 0) - rgmii_delay &= ~DP83822_TX_CLK_SHIFT; - else rgmii_delay |= DP83822_TX_CLK_SHIFT; - if (rgmii_delay) { - err = phy_set_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_RCSR, rgmii_delay); - if (err) - return err; - } + err = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR, + DP83822_RX_CLK_SHIFT | DP83822_TX_CLK_SHIFT, rgmii_delay); + if (err) + return err; - phy_set_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + err = phy_set_bits_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + + if (err) + return err; } else { - phy_clear_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + err = phy_clear_bits_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + + if (err) + return err; } if (dp83822->fx_enabled) { From 8f4cd89bf10607de08231d6d91a73dd63336808e Mon Sep 17 00:00:00 2001 From: William Tu Date: Sun, 10 Mar 2024 18:45:47 +0200 Subject: [PATCH 11/13] devlink: Fix length of eswitch inline-mode Set eswitch inline-mode to be u8, not u16. Otherwise, errors below $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev \ inline-mode network Error: Attribute failed policy validation. kernel answers: Numerical result out of rang netlink: 'devlink': attribute type 26 has an invalid length. Fixes: f2f9dd164db0 ("netlink: specs: devlink: add the remaining command to generate complete split_ops") Signed-off-by: William Tu Reviewed-by: Jiri Pirko Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240310164547.35219-1-witu@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 2 +- net/devlink/netlink_gen.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index cf6eaa0da821..09fbb4c03fc8 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -290,7 +290,7 @@ attribute-sets: enum: eswitch-mode - name: eswitch-inline-mode - type: u16 + type: u8 enum: eswitch-inline-mode - name: dpipe-tables diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index c81cf2dd154f..f9786d51f68f 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -198,7 +198,7 @@ static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITC [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_MAX(NLA_U16, 1), - [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U16, 3), + [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U8, 3), [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = NLA_POLICY_MAX(NLA_U8, 1), }; From 46590b545df636aea0f9f7f2100d9963c0af5824 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Fri, 8 Mar 2024 15:52:06 +0800 Subject: [PATCH 12/13] r8152: fix unknown device for choose_configuration For the unknown device, rtl8152_cfgselector_choose_configuration() should return a negative value. Then, usb_choose_configuration() would set a configuration for CDC ECM or NCM mode. Otherwise, there is no usb interface driver for the device. Fixes: aa4f2b3e418e ("r8152: Choose our USB config with choose_configuration() rather than probe()") Signed-off-by: Hayes Wang Link: https://lore.kernel.org/r/20240308075206.33553-436-nic_swsd@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0d0672d2a654..51fe00c2b896 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10078,7 +10078,7 @@ static int rtl8152_cfgselector_choose_configuration(struct usb_device *udev) * driver supports it. */ if (__rtl_get_hw_ver(udev) == RTL_VER_UNKNOWN) - return 0; + return -ENODEV; /* The vendor mode is not always config #1, so to find it out. */ c = udev->config; From 84e95149bd341705f0eca6a7fcb955c548805002 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 8 Mar 2024 22:25:40 +0800 Subject: [PATCH 13/13] nfp: flower: handle acti_netdevs allocation failure The kmalloc_array() in nfp_fl_lag_do_work() will return null, if the physical memory has run out. As a result, if we dereference the acti_netdevs, the null pointer dereference bugs will happen. This patch adds a check to judge whether allocation failure occurs. If it happens, the delayed work will be rescheduled and try again. Fixes: bb9a8d031140 ("nfp: flower: monitor and offload LAG groups") Signed-off-by: Duoming Zhou Reviewed-by: Louis Peens Link: https://lore.kernel.org/r/20240308142540.9674-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/flower/lag_conf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c index 361d7c495e2d..2c7bd6e80d99 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c @@ -337,6 +337,11 @@ static void nfp_fl_lag_do_work(struct work_struct *work) acti_netdevs = kmalloc_array(entry->slave_cnt, sizeof(*acti_netdevs), GFP_KERNEL); + if (!acti_netdevs) { + schedule_delayed_work(&lag->work, + NFP_FL_LAG_DELAY); + continue; + } /* Include sanity check in the loop. It may be that a bond has * changed between processing the last notification and the