From afcaf61be9d1dbdee5ec186d1dcc67b6b692180f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 10 Apr 2020 17:06:01 +0800 Subject: [PATCH 01/15] xfrm: allow to accept packets with ipv6 NEXTHDR_HOP in xfrm_input For beet mode, when it's ipv6 inner address with nexthdrs set, the packet format might be: ---------------------------------------------------- | outer | | dest | | | ESP | ESP | | IP hdr | ESP | opts.| TCP | Data | Trailer | ICV | ---------------------------------------------------- The nexthdr from ESP could be NEXTHDR_HOP(0), so it should continue processing the packet when nexthdr returns 0 in xfrm_input(). Otherwise, when ipv6 nexthdr is set, the packet will be dropped. I don't see any error cases that nexthdr may return 0. So fix it by removing the check for nexthdr == 0. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index aa35f23c4912..8a202c44f89a 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -644,7 +644,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) dev_put(skb->dev); spin_lock(&x->lock); - if (nexthdr <= 0) { + if (nexthdr < 0) { if (nexthdr == -EBADMSG) { xfrm_audit_state_icvfail(x, skb, x->type->proto); From 06a0afcfe2f551ff755849ea2549b0d8409fd9a0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 10 Apr 2020 17:06:31 +0800 Subject: [PATCH 02/15] xfrm: do pskb_pull properly in __xfrm_transport_prep For transport mode, when ipv6 nexthdr is set, the packet format might be like: ---------------------------------------------------- | | dest | | | | ESP | ESP | | IP6 hdr| opts.| ESP | TCP | Data | Trailer | ICV | ---------------------------------------------------- and in __xfrm_transport_prep(): pskb_pull(skb, skb->mac_len + sizeof(ip6hdr) + x->props.header_len); it will pull the data pointer to the wrong position, as it missed the nexthdrs/dest opts. This patch is to fix it by using: pskb_pull(skb, skb_transport_offset(skb) + x->props.header_len); as we can be sure transport_header points to ESP header at that moment. It also fixes a panic when packets with ipv6 nexthdr are sent over esp6 transport mode: [ 100.473845] kernel BUG at net/core/skbuff.c:4325! [ 100.478517] RIP: 0010:__skb_to_sgvec+0x252/0x260 [ 100.494355] Call Trace: [ 100.494829] skb_to_sgvec+0x11/0x40 [ 100.495492] esp6_output_tail+0x12e/0x550 [esp6] [ 100.496358] esp6_xmit+0x1d5/0x260 [esp6_offload] [ 100.498029] validate_xmit_xfrm+0x22f/0x2e0 [ 100.499604] __dev_queue_xmit+0x589/0x910 [ 100.502928] ip6_finish_output2+0x2a5/0x5a0 [ 100.503718] ip6_output+0x6c/0x120 [ 100.505198] xfrm_output_resume+0x4bf/0x530 [ 100.508683] xfrm6_output+0x3a/0xc0 [ 100.513446] inet6_csk_xmit+0xa1/0xf0 [ 100.517335] tcp_sendmsg+0x27/0x40 [ 100.517977] sock_sendmsg+0x3e/0x60 [ 100.518648] __sys_sendto+0xee/0x160 Fixes: c35fe4106b92 ("xfrm: Add mode handlers for IPsec on layer 2") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 6cc7f7f1dd68..f50d1f97cf8e 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -25,12 +25,10 @@ static void __xfrm_transport_prep(struct xfrm_state *x, struct sk_buff *skb, struct xfrm_offload *xo = xfrm_offload(skb); skb_reset_mac_len(skb); - pskb_pull(skb, skb->mac_len + hsize + x->props.header_len); - - if (xo->flags & XFRM_GSO_SEGMENT) { - skb_reset_transport_header(skb); + if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header -= x->props.header_len; - } + + pskb_pull(skb, skb_transport_offset(skb) + x->props.header_len); } static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb, From 3c96ec56828922e3fe5477f75eb3fc02f98f98b5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 10 Apr 2020 17:06:56 +0800 Subject: [PATCH 03/15] esp6: get the right proto for transport mode in esp6_gso_encap For transport mode, when ipv6 nexthdr is set, the packet format might be like: ---------------------------------------------------- | | dest | | | | ESP | ESP | | IP6 hdr| opts.| ESP | TCP | Data | Trailer | ICV | ---------------------------------------------------- What it wants to get for x-proto in esp6_gso_encap() is the proto that will be set in ESP nexthdr. So it should skip all ipv6 nexthdrs and get the real transport protocol. Othersize, the wrong proto number will be set into ESP nexthdr. This patch is to skip all ipv6 nexthdrs by calling ipv6_skip_exthdr() in esp6_gso_encap(). Fixes: 7862b4058b9f ("esp: Add gso handlers for esp4 and esp6") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/esp6_offload.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 8eab2c869d61..b82850886574 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -123,9 +123,16 @@ static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb) struct ip_esp_hdr *esph; struct ipv6hdr *iph = ipv6_hdr(skb); struct xfrm_offload *xo = xfrm_offload(skb); - int proto = iph->nexthdr; + u8 proto = iph->nexthdr; skb_push(skb, -skb_network_offset(skb)); + + if (x->outer_mode.encap == XFRM_MODE_TRANSPORT) { + __be16 frag; + + ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &proto, &frag); + } + esph = ip_esp_hdr(skb); *skb_mac_header(skb) = IPPROTO_ESP; From db87668ad1e4917cfe04e217307ba6ed9390716e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 10 Apr 2020 17:08:24 +0800 Subject: [PATCH 04/15] xfrm: remove the xfrm_state_put call becofe going to out_reset This xfrm_state_put call in esp4/6_gro_receive() will cause double put for state, as in out_reset path secpath_reset() will put all states set in skb sec_path. So fix it by simply remove the xfrm_state_put call. Fixes: 6ed69184ed9c ("xfrm: Reset secpath in xfrm failure") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/esp4_offload.c | 4 +--- net/ipv6/esp6_offload.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 731022cff600..231edcb84c08 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -63,10 +63,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, sp->olen++; xo = xfrm_offload(skb); - if (!xo) { - xfrm_state_put(x); + if (!xo) goto out_reset; - } } xo->flags |= XFRM_GRO; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index b82850886574..b92372b104ba 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -85,10 +85,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, sp->olen++; xo = xfrm_offload(skb); - if (!xo) { - xfrm_state_put(x); + if (!xo) goto out_reset; - } } xo->flags |= XFRM_GRO; From 29e4276667e24ee6b91d9f91064d8fda9a210ea1 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Wed, 15 Apr 2020 21:47:10 +0200 Subject: [PATCH 05/15] xfrm: fix error in comment s/xfrm_state_offload/xfrm_user_offload/ Fixes: d77e38e612a ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 5f3b9fec7b5f..ff7cfdc6cb44 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -304,7 +304,7 @@ enum xfrm_attr_type_t { XFRMA_PROTO, /* __u8 */ XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ XFRMA_PAD, - XFRMA_OFFLOAD_DEV, /* struct xfrm_state_offload */ + XFRMA_OFFLOAD_DEV, /* struct xfrm_user_offload */ XFRMA_SET_MARK, /* __u32 */ XFRMA_SET_MARK_MASK, /* __u32 */ XFRMA_IF_ID, /* __u32 */ From 9f0cadc32d738f0f0c8e30be83be7087c7b85ee5 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 16 Apr 2020 17:45:44 +0200 Subject: [PATCH 06/15] xfrm: espintcp: save and call old ->sk_destruct When ESP encapsulation is enabled on a TCP socket, I'm replacing the existing ->sk_destruct callback with espintcp_destruct. We still need to call the old callback to perform the other cleanups when the socket is destroyed. Save the old callback, and call it from espintcp_destruct. Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/espintcp.h | 1 + net/xfrm/espintcp.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/include/net/espintcp.h b/include/net/espintcp.h index dd7026a00066..0335bbd76552 100644 --- a/include/net/espintcp.h +++ b/include/net/espintcp.h @@ -25,6 +25,7 @@ struct espintcp_ctx { struct espintcp_msg partial; void (*saved_data_ready)(struct sock *sk); void (*saved_write_space)(struct sock *sk); + void (*saved_destruct)(struct sock *sk); struct work_struct work; bool tx_running; }; diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 037ea156d2f9..5a0ff665b71a 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -379,6 +379,7 @@ static void espintcp_destruct(struct sock *sk) { struct espintcp_ctx *ctx = espintcp_getctx(sk); + ctx->saved_destruct(sk); kfree(ctx); } @@ -419,6 +420,7 @@ static int espintcp_init_sk(struct sock *sk) sk->sk_socket->ops = &espintcp_ops; ctx->saved_data_ready = sk->sk_data_ready; ctx->saved_write_space = sk->sk_write_space; + ctx->saved_destruct = sk->sk_destruct; sk->sk_data_ready = espintcp_data_ready; sk->sk_write_space = espintcp_write_space; sk->sk_destruct = espintcp_destruct; From 25a44ae93d1a490f36d88a180f11aa2650bef074 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 19 Apr 2020 16:10:00 +0800 Subject: [PATCH 07/15] esp6: support ipv6 nexthdrs process for beet gso segment For beet mode, when it's ipv6 inner address with nexthdrs set, the packet format might be: ---------------------------------------------------- | outer | | dest | | | ESP | ESP | | IP6 hdr| ESP | opts.| TCP | Data | Trailer | ICV | ---------------------------------------------------- Before doing gso segment in xfrm6_beet_gso_segment(), it should skip all nexthdrs and get the real transport proto, and set transport_header properly. This patch is to fix it by simply calling ipv6_skip_exthdr() in xfrm6_beet_gso_segment(). v1->v2: - remove skb_transport_offset(), as it will always return 0 in xfrm6_beet_gso_segment(), thank Sabrina's check. Fixes: 7f9e40eb18a9 ("esp6: add gso_segment for esp6 beet mode") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/esp6_offload.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index b92372b104ba..9c03460b2760 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -171,7 +171,7 @@ static struct sk_buff *xfrm6_beet_gso_segment(struct xfrm_state *x, struct xfrm_offload *xo = xfrm_offload(skb); struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; - int proto = xo->proto; + u8 proto = xo->proto; skb->transport_header += x->props.header_len; @@ -182,7 +182,12 @@ static struct sk_buff *xfrm6_beet_gso_segment(struct xfrm_state *x, proto = ph->nexthdr; } - if (x->sel.family != AF_INET6) { + if (x->sel.family == AF_INET6) { + __be16 frag; + + skb->transport_header += + ipv6_skip_exthdr(skb, 0, &proto, &frag); + } else { skb->transport_header -= (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); From 6f297068a0696eafecef13852408b05a81edb560 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 19 Apr 2020 16:11:02 +0800 Subject: [PATCH 08/15] esp4: support ipv6 nexthdrs process for beet gso segment For beet mode, when it's ipv6 inner address with nexthdrs set, the packet format might be: ---------------------------------------------------- | outer | | dest | | | ESP | ESP | | IP hdr | ESP | opts.| TCP | Data | Trailer | ICV | ---------------------------------------------------- Before doing gso segment in xfrm4_beet_gso_segment(), the same thing is needed as it does in xfrm6_beet_gso_segment() in last patch 'esp6: support ipv6 nexthdrs process for beet gso segment'. v1->v2: - remove skb_transport_offset(), as it will always return 0 in xfrm6_beet_gso_segment(), thank Sabrina's check. Fixes: 384a46ea7bdc ("esp4: add gso_segment for esp4 beet mode") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/esp4_offload.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 231edcb84c08..9b1d451edae0 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -137,7 +137,7 @@ static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x, struct xfrm_offload *xo = xfrm_offload(skb); struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; - int proto = xo->proto; + u8 proto = xo->proto; skb->transport_header += x->props.header_len; @@ -146,10 +146,15 @@ static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x, skb->transport_header += ph->hdrlen * 8; proto = ph->nexthdr; - } else if (x->sel.family != AF_INET6) { + } else if (x->sel.family == AF_INET6) { + __be16 frag; + + skb->transport_header += + ipv6_skip_exthdr(skb, 0, &proto, &frag); + if (proto == IPPROTO_TCP) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + } else { skb->transport_header -= IPV4_BEET_PHMAXLEN; - } else if (proto == IPPROTO_TCP) { - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; } __skb_pull(skb, skb_transport_offset(skb)); From a204aef9fd77dce1efd9066ca4e44eede99cd858 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 20 Apr 2020 21:51:09 +0800 Subject: [PATCH 09/15] xfrm: call xfrm_output_gso when inner_protocol is set in xfrm_output An use-after-free crash can be triggered when sending big packets over vxlan over esp with esp offload enabled: [] BUG: KASAN: use-after-free in ipv6_gso_pull_exthdrs.part.8+0x32c/0x4e0 [] Call Trace: [] dump_stack+0x75/0xa0 [] kasan_report+0x37/0x50 [] ipv6_gso_pull_exthdrs.part.8+0x32c/0x4e0 [] ipv6_gso_segment+0x2c8/0x13c0 [] skb_mac_gso_segment+0x1cb/0x420 [] skb_udp_tunnel_segment+0x6b5/0x1c90 [] inet_gso_segment+0x440/0x1380 [] skb_mac_gso_segment+0x1cb/0x420 [] esp4_gso_segment+0xae8/0x1709 [esp4_offload] [] inet_gso_segment+0x440/0x1380 [] skb_mac_gso_segment+0x1cb/0x420 [] __skb_gso_segment+0x2d7/0x5f0 [] validate_xmit_skb+0x527/0xb10 [] __dev_queue_xmit+0x10f8/0x2320 <--- [] ip_finish_output2+0xa2e/0x1b50 [] ip_output+0x1a8/0x2f0 [] xfrm_output_resume+0x110e/0x15f0 [] __xfrm4_output+0xe1/0x1b0 [] xfrm4_output+0xa0/0x200 [] iptunnel_xmit+0x5a7/0x920 [] vxlan_xmit_one+0x1658/0x37a0 [vxlan] [] vxlan_xmit+0x5e4/0x3ec8 [vxlan] [] dev_hard_start_xmit+0x125/0x540 [] __dev_queue_xmit+0x17bd/0x2320 <--- [] ip6_finish_output2+0xb20/0x1b80 [] ip6_output+0x1b3/0x390 [] ip6_xmit+0xb82/0x17e0 [] inet6_csk_xmit+0x225/0x3d0 [] __tcp_transmit_skb+0x1763/0x3520 [] tcp_write_xmit+0xd64/0x5fe0 [] __tcp_push_pending_frames+0x8c/0x320 [] tcp_sendmsg_locked+0x2245/0x3500 [] tcp_sendmsg+0x27/0x40 As on the tx path of vxlan over esp, skb->inner_network_header would be set on vxlan_xmit() and xfrm4_tunnel_encap_add(), and the later one can overwrite the former one. It causes skb_udp_tunnel_segment() to use a wrong skb->inner_network_header, then the issue occurs. This patch is to fix it by calling xfrm_output_gso() instead when the inner_protocol is set, in which gso_segment of inner_protocol will be done first. While at it, also improve some code around. Fixes: 7862b4058b9f ("esp: Add gso handlers for esp4 and esp6") Reported-by: Xiumei Mu Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_output.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 2fd3d990d992..69c33cab8f49 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -583,18 +583,20 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) xfrm_state_hold(x); if (skb_is_gso(skb)) { - skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; + if (skb->inner_protocol) + return xfrm_output_gso(net, sk, skb); - return xfrm_output2(net, sk, skb); + skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; + goto out; } if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) goto out; + } else { + if (skb_is_gso(skb)) + return xfrm_output_gso(net, sk, skb); } - if (skb_is_gso(skb)) - return xfrm_output_gso(net, sk, skb); - if (skb->ip_summed == CHECKSUM_PARTIAL) { err = skb_checksum_help(skb); if (err) { From 976eba8ab596bab94b9714cd46d38d5c6a2c660d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 21 Apr 2020 20:46:11 +0800 Subject: [PATCH 10/15] ip_vti: receive ipip packet by calling ip_tunnel_rcv In Commit dd9ee3444014 ("vti4: Fix a ipip packet processing bug in 'IPCOMP' virtual tunnel"), it tries to receive IPIP packets in vti by calling xfrm_input(). This case happens when a small packet or frag sent by peer is too small to get compressed. However, xfrm_input() will still get to the IPCOMP path where skb sec_path is set, but never dropped while it should have been done in vti_ipcomp4_protocol.cb_handler(vti_rcv_cb), as it's not an ipcomp4 packet. This will cause that the packet can never pass xfrm4_policy_check() in the upper protocol rcv functions. So this patch is to call ip_tunnel_rcv() to process IPIP packets instead. Fixes: dd9ee3444014 ("vti4: Fix a ipip packet processing bug in 'IPCOMP' virtual tunnel") Reported-by: Xiumei Mu Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 1b4e6f298648..1dda7c155c48 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -93,7 +93,28 @@ static int vti_rcv_proto(struct sk_buff *skb) static int vti_rcv_tunnel(struct sk_buff *skb) { - return vti_rcv(skb, ip_hdr(skb)->saddr, true); + struct ip_tunnel_net *itn = net_generic(dev_net(skb->dev), vti_net_id); + const struct iphdr *iph = ip_hdr(skb); + struct ip_tunnel *tunnel; + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + if (tunnel) { + struct tnl_ptk_info tpi = { + .proto = htons(ETH_P_IP), + }; + + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + if (iptunnel_pull_header(skb, 0, tpi.proto, false)) + goto drop; + return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, false); + } + + return -EINVAL; +drop: + kfree_skb(skb); + return 0; } static int vti_rcv_cb(struct sk_buff *skb, int err) From c95c5f58b35ef995f66cb55547eee6093ab5fcb8 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 23 Apr 2020 00:06:45 +0200 Subject: [PATCH 11/15] xfrm interface: fix oops when deleting a x-netns interface Here is the steps to reproduce the problem: ip netns add foo ip netns add bar ip -n foo link add xfrmi0 type xfrm dev lo if_id 42 ip -n foo link set xfrmi0 netns bar ip netns del foo ip netns del bar Which results to: [ 186.686395] general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6bd3: 0000 [#1] SMP PTI [ 186.687665] CPU: 7 PID: 232 Comm: kworker/u16:2 Not tainted 5.6.0+ #1 [ 186.688430] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 [ 186.689420] Workqueue: netns cleanup_net [ 186.689903] RIP: 0010:xfrmi_dev_uninit+0x1b/0x4b [xfrm_interface] [ 186.690657] Code: 44 f6 ff ff 31 c0 5b 5d 41 5c 41 5d 41 5e c3 48 8d 8f c0 08 00 00 8b 05 ce 14 00 00 48 8b 97 d0 08 00 00 48 8b 92 c0 0e 00 00 <48> 8b 14 c2 48 8b 02 48 85 c0 74 19 48 39 c1 75 0c 48 8b 87 c0 08 [ 186.692838] RSP: 0018:ffffc900003b7d68 EFLAGS: 00010286 [ 186.693435] RAX: 000000000000000d RBX: ffff8881b0f31000 RCX: ffff8881b0f318c0 [ 186.694334] RDX: 6b6b6b6b6b6b6b6b RSI: 0000000000000246 RDI: ffff8881b0f31000 [ 186.695190] RBP: ffffc900003b7df0 R08: ffff888236c07740 R09: 0000000000000040 [ 186.696024] R10: ffffffff81fce1b8 R11: 0000000000000002 R12: ffffc900003b7d80 [ 186.696859] R13: ffff8881edcc6a40 R14: ffff8881a1b6e780 R15: ffffffff81ed47c8 [ 186.697738] FS: 0000000000000000(0000) GS:ffff888237dc0000(0000) knlGS:0000000000000000 [ 186.698705] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 186.699408] CR2: 00007f2129e93148 CR3: 0000000001e0a000 CR4: 00000000000006e0 [ 186.700221] Call Trace: [ 186.700508] rollback_registered_many+0x32b/0x3fd [ 186.701058] ? __rtnl_unlock+0x20/0x3d [ 186.701494] ? arch_local_irq_save+0x11/0x17 [ 186.702012] unregister_netdevice_many+0x12/0x55 [ 186.702594] default_device_exit_batch+0x12b/0x150 [ 186.703160] ? prepare_to_wait_exclusive+0x60/0x60 [ 186.703719] cleanup_net+0x17d/0x234 [ 186.704138] process_one_work+0x196/0x2e8 [ 186.704652] worker_thread+0x1a4/0x249 [ 186.705087] ? cancel_delayed_work+0x92/0x92 [ 186.705620] kthread+0x105/0x10f [ 186.706000] ? __kthread_bind_mask+0x57/0x57 [ 186.706501] ret_from_fork+0x35/0x40 [ 186.706978] Modules linked in: xfrm_interface nfsv3 nfs_acl auth_rpcgss nfsv4 nfs lockd grace fscache sunrpc button parport_pc parport serio_raw evdev pcspkr loop ext4 crc16 mbcache jbd2 crc32c_generic 8139too ide_cd_mod cdrom ide_gd_mod ata_generic ata_piix libata scsi_mod piix psmouse i2c_piix4 ide_core 8139cp i2c_core mii floppy [ 186.710423] ---[ end trace 463bba18105537e5 ]--- The problem is that x-netns xfrm interface are not removed when the link netns is removed. This causes later this oops when thoses interfaces are removed. Let's add a handler to remove all interfaces related to a netns when this netns is removed. Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Reported-by: Christophe Gouault Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 3361e3ac5714..1e115cbf21d3 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -750,7 +750,28 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly = { .get_link_net = xfrmi_get_link_net, }; +static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list) +{ + struct net *net; + LIST_HEAD(list); + + rtnl_lock(); + list_for_each_entry(net, net_exit_list, exit_list) { + struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); + struct xfrm_if __rcu **xip; + struct xfrm_if *xi; + + for (xip = &xfrmn->xfrmi[0]; + (xi = rtnl_dereference(*xip)) != NULL; + xip = &xi->next) + unregister_netdevice_queue(xi->dev, &list); + } + unregister_netdevice_many(&list); + rtnl_unlock(); +} + static struct pernet_operations xfrmi_net_ops = { + .exit_batch = xfrmi_exit_batch_net, .id = &xfrmi_net_id, .size = sizeof(struct xfrmi_net), }; From 56b1b7c667fbb9ee395f7506dfef3c04571e024a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 13 May 2020 18:38:54 +0800 Subject: [PATCH 12/15] esp6: calculate transport_header correctly when sel.family != AF_INET6 In esp6_init_state() for beet mode when x->sel.family != AF_INET6: x->props.header_len = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead) + IPV4_BEET_PHMAXLEN + (sizeof(struct ipv6hdr) - sizeof(struct iphdr)) In xfrm6_beet_gso_segment() skb->transport_header is supposed to move to the end of the ph header for IPPROTO_BEETPH, so if x->sel.family != AF_INET6 and it's IPPROTO_BEETPH, it should do: skb->transport_header -= (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); skb->transport_header += ph->hdrlen * 8; And IPV4_BEET_PHMAXLEN is only reserved for PH header, so if x->sel.family != AF_INET6 and it's not IPPROTO_BEETPH, it should do: skb->transport_header -= (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); skb->transport_header -= IPV4_BEET_PHMAXLEN; Thanks Sabrina for looking deep into this issue. Fixes: 7f9e40eb18a9 ("esp6: add gso_segment for esp6 beet mode") Reported-by: Sabrina Dubroca Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/esp6_offload.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 9c03460b2760..ab0eea336c70 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -175,24 +175,27 @@ static struct sk_buff *xfrm6_beet_gso_segment(struct xfrm_state *x, skb->transport_header += x->props.header_len; - if (proto == IPPROTO_BEETPH) { - struct ip_beet_phdr *ph = (struct ip_beet_phdr *)skb->data; + if (x->sel.family != AF_INET6) { + skb->transport_header -= + (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); - skb->transport_header += ph->hdrlen * 8; - proto = ph->nexthdr; - } + if (proto == IPPROTO_BEETPH) { + struct ip_beet_phdr *ph = + (struct ip_beet_phdr *)skb->data; - if (x->sel.family == AF_INET6) { + skb->transport_header += ph->hdrlen * 8; + proto = ph->nexthdr; + } else { + skb->transport_header -= IPV4_BEET_PHMAXLEN; + } + + if (proto == IPPROTO_TCP) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; + } else { __be16 frag; skb->transport_header += ipv6_skip_exthdr(skb, 0, &proto, &frag); - } else { - skb->transport_header -= - (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); - - if (proto == IPPROTO_TCP) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; } __skb_pull(skb, skb_transport_offset(skb)); From 3ffb93ba326f40b47b17a4e8b3399c0fa2e8cee6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 May 2020 13:35:19 +0800 Subject: [PATCH 13/15] esp4: improve xfrm4_beet_gso_segment() to be more readable This patch is to improve the code to make xfrm4_beet_gso_segment() more readable, and keep consistent with xfrm6_beet_gso_segment(). Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/esp4_offload.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 9b1d451edae0..d14133eac476 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -141,20 +141,23 @@ static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x, skb->transport_header += x->props.header_len; - if (proto == IPPROTO_BEETPH) { - struct ip_beet_phdr *ph = (struct ip_beet_phdr *)skb->data; + if (x->sel.family != AF_INET6) { + if (proto == IPPROTO_BEETPH) { + struct ip_beet_phdr *ph = + (struct ip_beet_phdr *)skb->data; - skb->transport_header += ph->hdrlen * 8; - proto = ph->nexthdr; - } else if (x->sel.family == AF_INET6) { + skb->transport_header += ph->hdrlen * 8; + proto = ph->nexthdr; + } else { + skb->transport_header -= IPV4_BEET_PHMAXLEN; + } + } else { __be16 frag; skb->transport_header += ipv6_skip_exthdr(skb, 0, &proto, &frag); if (proto == IPPROTO_TCP) skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; - } else { - skb->transport_header -= IPV4_BEET_PHMAXLEN; } __skb_pull(skb, skb_transport_offset(skb)); From ed17b8d377eaf6b4a01d46942b4c647378a79bdd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 25 May 2020 13:53:37 +0800 Subject: [PATCH 14/15] xfrm: fix a warning in xfrm_policy_insert_list This waring can be triggered simply by: # ip xfrm policy update src 192.168.1.1/24 dst 192.168.1.2/24 dir in \ priority 1 mark 0 mask 0x10 #[1] # ip xfrm policy update src 192.168.1.1/24 dst 192.168.1.2/24 dir in \ priority 2 mark 0 mask 0x1 #[2] # ip xfrm policy update src 192.168.1.1/24 dst 192.168.1.2/24 dir in \ priority 2 mark 0 mask 0x10 #[3] Then dmesg shows: [ ] WARNING: CPU: 1 PID: 7265 at net/xfrm/xfrm_policy.c:1548 [ ] RIP: 0010:xfrm_policy_insert_list+0x2f2/0x1030 [ ] Call Trace: [ ] xfrm_policy_inexact_insert+0x85/0xe50 [ ] xfrm_policy_insert+0x4ba/0x680 [ ] xfrm_add_policy+0x246/0x4d0 [ ] xfrm_user_rcv_msg+0x331/0x5c0 [ ] netlink_rcv_skb+0x121/0x350 [ ] xfrm_netlink_rcv+0x66/0x80 [ ] netlink_unicast+0x439/0x630 [ ] netlink_sendmsg+0x714/0xbf0 [ ] sock_sendmsg+0xe2/0x110 The issue was introduced by Commit 7cb8a93968e3 ("xfrm: Allow inserting policies with matching mark and different priorities"). After that, the policies [1] and [2] would be able to be added with different priorities. However, policy [3] will actually match both [1] and [2]. Policy [1] was matched due to the 1st 'return true' in xfrm_policy_mark_match(), and policy [2] was matched due to the 2nd 'return true' in there. It caused WARN_ON() in xfrm_policy_insert_list(). This patch is to fix it by only (the same value and priority) as the same policy in xfrm_policy_mark_match(). Thanks to Yuehaibing, we could make this fix better. v1->v2: - check policy->mark.v == pol->mark.v only without mask. Fixes: 7cb8a93968e3 ("xfrm: Allow inserting policies with matching mark and different priorities") Reported-by: Xiumei Mu Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 297b2fdb3c29..564aa6492e7c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1436,12 +1436,7 @@ static void xfrm_policy_requeue(struct xfrm_policy *old, static bool xfrm_policy_mark_match(struct xfrm_policy *policy, struct xfrm_policy *pol) { - u32 mark = policy->mark.v & policy->mark.m; - - if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m) - return true; - - if ((mark & pol->mark.m) == pol->mark.v && + if (policy->mark.v == pol->mark.v && policy->priority == pol->priority) return true; From f6a23d85d078c2ffde79c66ca81d0a1dde451649 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 26 May 2020 17:41:46 +0800 Subject: [PATCH 15/15] xfrm: fix a NULL-ptr deref in xfrm_local_error This patch is to fix a crash: [ ] kasan: GPF could be caused by NULL-ptr deref or user memory access [ ] general protection fault: 0000 [#1] SMP KASAN PTI [ ] RIP: 0010:ipv6_local_error+0xac/0x7a0 [ ] Call Trace: [ ] xfrm6_local_error+0x1eb/0x300 [ ] xfrm_local_error+0x95/0x130 [ ] __xfrm6_output+0x65f/0xb50 [ ] xfrm6_output+0x106/0x46f [ ] udp_tunnel6_xmit_skb+0x618/0xbf0 [ip6_udp_tunnel] [ ] vxlan_xmit_one+0xbc6/0x2c60 [vxlan] [ ] vxlan_xmit+0x6a0/0x4276 [vxlan] [ ] dev_hard_start_xmit+0x165/0x820 [ ] __dev_queue_xmit+0x1ff0/0x2b90 [ ] ip_finish_output2+0xd3e/0x1480 [ ] ip_do_fragment+0x182d/0x2210 [ ] ip_output+0x1d0/0x510 [ ] ip_send_skb+0x37/0xa0 [ ] raw_sendmsg+0x1b4c/0x2b80 [ ] sock_sendmsg+0xc0/0x110 This occurred when sending a v4 skb over vxlan6 over ipsec, in which case skb->protocol == htons(ETH_P_IPV6) while skb->sk->sk_family == AF_INET in xfrm_local_error(). Then it will go to xfrm6_local_error() where it tries to get ipv6 info from a ipv4 sk. This issue was actually fixed by Commit 628e341f319f ("xfrm: make local error reporting more robust"), but brought back by Commit 844d48746e4b ("xfrm: choose protocol family by skb protocol"). So to fix it, we should call xfrm6_local_error() only when skb->protocol is htons(ETH_P_IPV6) and skb->sk->sk_family is AF_INET6. Fixes: 844d48746e4b ("xfrm: choose protocol family by skb protocol") Reported-by: Xiumei Mu Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 69c33cab8f49..69c4900db817 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -642,7 +642,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu) if (skb->protocol == htons(ETH_P_IP)) proto = AF_INET; - else if (skb->protocol == htons(ETH_P_IPV6)) + else if (skb->protocol == htons(ETH_P_IPV6) && + skb->sk->sk_family == AF_INET6) proto = AF_INET6; else return;