netfilter: br_netfilter: skip conntrack input hook for promisc packets

For historical reasons, when bridge device is in promisc mode, packets
that are directed to the taps follow bridge input hook path. This patch
adds a workaround to reset conntrack for these packets.

Jianbo Liu reports warning splats in their test infrastructure where
cloned packets reach the br_netfilter input hook to confirm the
conntrack object.

Scratch one bit from BR_INPUT_SKB_CB to annotate that this packet has
reached the input hook because it is passed up to the bridge device to
reach the taps.

[   57.571874] WARNING: CPU: 1 PID: 0 at net/bridge/br_netfilter_hooks.c:616 br_nf_local_in+0x157/0x180 [br_netfilter]
[   57.572749] Modules linked in: xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_isc si ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5ctl mlx5_core
[   57.575158] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.8.0+ #19
[   57.575700] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
[   57.576662] RIP: 0010:br_nf_local_in+0x157/0x180 [br_netfilter]
[   57.577195] Code: fe ff ff 41 bd 04 00 00 00 be 04 00 00 00 e9 4a ff ff ff be 04 00 00 00 48 89 ef e8 f3 a9 3c e1 66 83 ad b4 00 00 00 04 eb 91 <0f> 0b e9 f1 fe ff ff 0f 0b e9 df fe ff ff 48 89 df e8 b3 53 47 e1
[   57.578722] RSP: 0018:ffff88885f845a08 EFLAGS: 00010202
[   57.579207] RAX: 0000000000000002 RBX: ffff88812dfe8000 RCX: 0000000000000000
[   57.579830] RDX: ffff88885f845a60 RSI: ffff8881022dc300 RDI: 0000000000000000
[   57.580454] RBP: ffff88885f845a60 R08: 0000000000000001 R09: 0000000000000003
[   57.581076] R10: 00000000ffff1300 R11: 0000000000000002 R12: 0000000000000000
[   57.581695] R13: ffff8881047ffe00 R14: ffff888108dbee00 R15: ffff88814519b800
[   57.582313] FS:  0000000000000000(0000) GS:ffff88885f840000(0000) knlGS:0000000000000000
[   57.583040] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   57.583564] CR2: 000000c4206aa000 CR3: 0000000103847001 CR4: 0000000000370eb0
[   57.584194] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[   57.584820] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[   57.585440] Call Trace:
[   57.585721]  <IRQ>
[   57.585976]  ? __warn+0x7d/0x130
[   57.586323]  ? br_nf_local_in+0x157/0x180 [br_netfilter]
[   57.586811]  ? report_bug+0xf1/0x1c0
[   57.587177]  ? handle_bug+0x3f/0x70
[   57.587539]  ? exc_invalid_op+0x13/0x60
[   57.587929]  ? asm_exc_invalid_op+0x16/0x20
[   57.588336]  ? br_nf_local_in+0x157/0x180 [br_netfilter]
[   57.588825]  nf_hook_slow+0x3d/0xd0
[   57.589188]  ? br_handle_vlan+0x4b/0x110
[   57.589579]  br_pass_frame_up+0xfc/0x150
[   57.589970]  ? br_port_flags_change+0x40/0x40
[   57.590396]  br_handle_frame_finish+0x346/0x5e0
[   57.590837]  ? ipt_do_table+0x32e/0x430
[   57.591221]  ? br_handle_local_finish+0x20/0x20
[   57.591656]  br_nf_hook_thresh+0x4b/0xf0 [br_netfilter]
[   57.592286]  ? br_handle_local_finish+0x20/0x20
[   57.592802]  br_nf_pre_routing_finish+0x178/0x480 [br_netfilter]
[   57.593348]  ? br_handle_local_finish+0x20/0x20
[   57.593782]  ? nf_nat_ipv4_pre_routing+0x25/0x60 [nf_nat]
[   57.594279]  br_nf_pre_routing+0x24c/0x550 [br_netfilter]
[   57.594780]  ? br_nf_hook_thresh+0xf0/0xf0 [br_netfilter]
[   57.595280]  br_handle_frame+0x1f3/0x3d0
[   57.595676]  ? br_handle_local_finish+0x20/0x20
[   57.596118]  ? br_handle_frame_finish+0x5e0/0x5e0
[   57.596566]  __netif_receive_skb_core+0x25b/0xfc0
[   57.597017]  ? __napi_build_skb+0x37/0x40
[   57.597418]  __netif_receive_skb_list_core+0xfb/0x220

Fixes: 62e7151ae3 ("netfilter: bridge: confirm multicast packets before passing them up the stack")
Reported-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
Pablo Neira Ayuso 2024-04-09 11:24:59 +02:00
parent d78d867dce
commit 751de2012e
4 changed files with 28 additions and 8 deletions

View File

@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
return netif_receive_skb(skb);
}
static int br_pass_frame_up(struct sk_buff *skb)
static int br_pass_frame_up(struct sk_buff *skb, bool promisc)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
struct net_bridge *br = netdev_priv(brdev);
@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb)
br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
BR_MCAST_DIR_TX);
BR_INPUT_SKB_CB(skb)->promisc = promisc;
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
dev_net(indev), NULL, skb, indev, NULL,
br_netif_receive_skb);
@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
struct net_bridge_mcast *brmctx;
struct net_bridge_vlan *vlan;
struct net_bridge *br;
bool promisc;
u16 vid = 0;
u8 state;
@ -137,7 +140,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (p->flags & BR_LEARNING)
br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
local_rcv = !!(br->dev->flags & IFF_PROMISC);
promisc = !!(br->dev->flags & IFF_PROMISC);
local_rcv = promisc;
if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
/* by definition the broadcast is also a multicast address */
if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) {
@ -200,7 +205,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
unsigned long now = jiffies;
if (test_bit(BR_FDB_LOCAL, &dst->flags))
return br_pass_frame_up(skb);
return br_pass_frame_up(skb, false);
if (now != dst->used)
dst->used = now;
@ -213,7 +218,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
}
if (local_rcv)
return br_pass_frame_up(skb);
return br_pass_frame_up(skb, promisc);
out:
return 0;
@ -386,6 +391,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
goto forward;
}
BR_INPUT_SKB_CB(skb)->promisc = false;
/* The else clause should be hit when nf_hook():
* - returns < 0 (drop/error)
* - returns = 0 (stolen/nf_queue)

View File

@ -600,11 +600,17 @@ static unsigned int br_nf_local_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
struct nf_conntrack *nfct = skb_nfct(skb);
const struct nf_ct_hook *ct_hook;
struct nf_conn *ct;
int ret;
if (promisc) {
nf_reset_ct(skb);
return NF_ACCEPT;
}
if (!nfct || skb->pkt_type == PACKET_HOST)
return NF_ACCEPT;

View File

@ -589,6 +589,7 @@ struct br_input_skb_cb {
#endif
u8 proxyarp_replied:1;
u8 src_port_isolated:1;
u8 promisc:1;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
u8 vlan_filtered:1;
#endif

View File

@ -294,18 +294,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
enum ip_conntrack_info ctinfo;
bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
struct nf_conntrack *nfct = skb_nfct(skb);
struct nf_conn *ct;
if (skb->pkt_type == PACKET_HOST)
if (promisc) {
nf_reset_ct(skb);
return NF_ACCEPT;
}
if (!nfct || skb->pkt_type == PACKET_HOST)
return NF_ACCEPT;
/* nf_conntrack_confirm() cannot handle concurrent clones,
* this happens for broad/multicast frames with e.g. macvlan on top
* of the bridge device.
*/
ct = nf_ct_get(skb, &ctinfo);
if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
ct = container_of(nfct, struct nf_conn, ct_general);
if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
return NF_ACCEPT;
/* let inet prerouting call conntrack again */