mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-23 02:51:47 +00:00
Merge branch 'gro-fixes'
Antoine Tenart says: ==================== gro: various fixes related to UDP tunnels We found issues when a UDP tunnel endpoint is in a different netns than where UDP GRO happens. This kind of setup is actually quite diverse, from having one leg of the tunnel on a remove host, to having a tunnel between netns (eg. being bridged in another one or on the host). In our case that UDP tunnel was geneve. UDP tunnel packets should not be GROed at the UDP level. The fundamental issue here is such packet can't be detected in a foolproof way: we can't know by looking at a packet alone and the current logic of looking up UDP sockets is fragile (socket could be in another netns, packet could be modified in between, etc). Because there is no way to make the GRO code to correctly handle those packets in all cases, this series aims at two things: making the net stack to correctly behave (as in, no crash and no invalid packet) when such thing happens, and in some cases to prevent this "early GRO" from happening. First three patches fix issues when an "UDP tunneled" packet is being GROed too early by rx-udp-gro-forwarding or rx-gro-list. Last patch is preventing locally generated UDP tunnel packets from being GROed. This turns out to be more complex than this patch alone as it relies on skb->encapsulation which is currently untrusty in some cases (see iptunnel_handle_offloads); but that should fix things in practice and is acceptable for a fix. Future work is required to improve things (prevent all locally generated UDP tunnel packets from being GROed), such as fixing the misuse of skb->encapsulation in drivers; but that would be net-next material. Thanks! Antoine Since v3: - Fixed the udpgro_fwd selftest in patch 5 (Jakub Kicinski feedback). - Improved commit message on patch 3 (Willem de Bruijn feeback). Since v2: - Fixed a build issue with IPv6=m in patch 1 (Jakub Kicinski feedback). - Fixed typo in patch 1 (Nikolay Aleksandrov feedback). - Added Reviewed-by tag on patch 2 (Willem de Bruijn feeback). - Added back conversion to CHECKSUM_UNNECESSARY but only from non CHECKSUM_PARTIAL in patch 3 (Paolo Abeni & Willem de Bruijn feeback). - Reworded patch 3 commit msg. Since v1: - Fixed a build issue with IPv6 disabled in patch 1. - Reworked commit log in patch 2 (Willem de Bruijn feedback). - Added Reviewed-by tags on patches 1 & 4 (Willem de Bruijn feeback). ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
ad69a730c6
7 changed files with 54 additions and 27 deletions
|
@ -150,6 +150,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
|
|||
}
|
||||
}
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
|
||||
#endif
|
||||
|
||||
static inline bool udp_encap_needed(void)
|
||||
{
|
||||
if (static_branch_unlikely(&udp_encap_needed_key))
|
||||
return true;
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
if (static_branch_unlikely(&udpv6_encap_needed_key))
|
||||
return true;
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
if (!skb_is_gso(skb))
|
||||
|
@ -163,6 +181,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
|
|||
!udp_test_bit(ACCEPT_FRAGLIST, sk))
|
||||
return true;
|
||||
|
||||
/* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still
|
||||
* land in a tunnel as the socket check in udp_gro_receive cannot be
|
||||
* foolproof.
|
||||
*/
|
||||
if (udp_encap_needed() &&
|
||||
READ_ONCE(udp_sk(sk)->encap_rcv) &&
|
||||
!(skb_shinfo(skb)->gso_type &
|
||||
(SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -192,8 +192,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
|
|||
}
|
||||
|
||||
merge:
|
||||
/* sk owenrship - if any - completely transferred to the aggregated packet */
|
||||
/* sk ownership - if any - completely transferred to the aggregated packet */
|
||||
skb->destructor = NULL;
|
||||
skb->sk = NULL;
|
||||
delta_truesize = skb->truesize;
|
||||
if (offset > headlen) {
|
||||
unsigned int eat = offset - headlen;
|
||||
|
|
|
@ -582,6 +582,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk,
|
|||
}
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
|
||||
EXPORT_SYMBOL(udp_encap_needed_key);
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
|
||||
EXPORT_SYMBOL(udpv6_encap_needed_key);
|
||||
#endif
|
||||
|
||||
void udp_encap_enable(void)
|
||||
{
|
||||
static_branch_inc(&udp_encap_needed_key);
|
||||
|
|
|
@ -449,8 +449,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
|
|||
NAPI_GRO_CB(p)->count++;
|
||||
p->data_len += skb->len;
|
||||
|
||||
/* sk owenrship - if any - completely transferred to the aggregated packet */
|
||||
/* sk ownership - if any - completely transferred to the aggregated packet */
|
||||
skb->destructor = NULL;
|
||||
skb->sk = NULL;
|
||||
p->truesize += skb->truesize;
|
||||
p->len += skb->len;
|
||||
|
||||
|
@ -551,11 +552,19 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
|
|||
unsigned int off = skb_gro_offset(skb);
|
||||
int flush = 1;
|
||||
|
||||
/* we can do L4 aggregation only if the packet can't land in a tunnel
|
||||
* otherwise we could corrupt the inner stream
|
||||
/* We can do L4 aggregation only if the packet can't land in a tunnel
|
||||
* otherwise we could corrupt the inner stream. Detecting such packets
|
||||
* cannot be foolproof and the aggregation might still happen in some
|
||||
* cases. Such packets should be caught in udp_unexpected_gso later.
|
||||
*/
|
||||
NAPI_GRO_CB(skb)->is_flist = 0;
|
||||
if (!sk || !udp_sk(sk)->gro_receive) {
|
||||
/* If the packet was locally encapsulated in a UDP tunnel that
|
||||
* wasn't detected above, do not GRO.
|
||||
*/
|
||||
if (skb->encapsulation)
|
||||
goto out;
|
||||
|
||||
if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
|
||||
NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1;
|
||||
|
||||
|
@ -719,13 +728,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
|
|||
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
|
||||
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
|
||||
|
||||
if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
|
||||
if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
|
||||
skb->csum_level++;
|
||||
} else {
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
skb->csum_level = 0;
|
||||
}
|
||||
__skb_incr_checksum_unnecessary(skb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -447,7 +447,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
|||
goto try_again;
|
||||
}
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
|
||||
DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
|
||||
void udpv6_encap_enable(void)
|
||||
{
|
||||
static_branch_inc(&udpv6_encap_needed_key);
|
||||
|
|
|
@ -174,13 +174,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
|
|||
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
|
||||
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
|
||||
|
||||
if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
|
||||
if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
|
||||
skb->csum_level++;
|
||||
} else {
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
skb->csum_level = 0;
|
||||
}
|
||||
__skb_incr_checksum_unnecessary(skb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -244,7 +244,7 @@ for family in 4 6; do
|
|||
create_vxlan_pair
|
||||
ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
|
||||
ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
|
||||
run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
|
||||
run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10
|
||||
cleanup
|
||||
|
||||
# use NAT to circumvent GRO FWD check
|
||||
|
@ -258,13 +258,7 @@ for family in 4 6; do
|
|||
# load arp cache before running the test to reduce the amount of
|
||||
# stray traffic on top of the UDP tunnel
|
||||
ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
|
||||
run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
|
||||
cleanup
|
||||
|
||||
create_vxlan_pair
|
||||
run_bench "UDP tunnel fwd perf" $OL_NET$DST
|
||||
ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
|
||||
run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
|
||||
run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST
|
||||
cleanup
|
||||
done
|
||||
|
||||
|
|
Loading…
Reference in a new issue