netfilter: nfnetlink: re-enable conntrack expectation events

To avoid allocation of the conntrack extension area when possible,
the default behaviour was changed to only allocate the event extension
if a userspace program is subscribed to a notification group.

Problem is that while 'conntrack -E' does enable the event allocation
behind the scenes, 'conntrack -E expect' does not: no expectation events
are delivered unless user sets
"net.netfilter.nf_conntrack_events" back to 1 (always on).

Fix the autodetection to also consider EXP type group.

We need to track the 6 event groups (3+3, new/update/destroy for events and
for expectations each) independently, else we'd disable events again
if an expectation group becomes empty while there is still an active
event group.

Fixes: 2794cdb0b9 ("netfilter: nfnetlink: allow to detect if ctnetlink listeners exist")
Reported-by: Yi Chen <yiche@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
This commit is contained in:
Florian Westphal 2022-08-05 10:59:57 +02:00
parent 2024439bd5
commit 0b2f3212b5
2 changed files with 72 additions and 13 deletions

View file

@ -95,7 +95,7 @@ struct nf_ip_net {
struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
bool ctnetlink_has_listener;
u8 ctnetlink_has_listener;
bool ecache_dwork_pending;
#endif
u8 sysctl_log_invalid; /* Log invalid packets */

View file

@ -44,6 +44,10 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket");
static unsigned int nfnetlink_pernet_id __read_mostly;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
static DEFINE_SPINLOCK(nfnl_grp_active_lock);
#endif
struct nfnl_net {
struct sock *nfnl;
};
@ -654,6 +658,44 @@ static void nfnetlink_rcv(struct sk_buff *skb)
netlink_rcv_skb(skb, nfnetlink_rcv_msg);
}
static void nfnetlink_bind_event(struct net *net, unsigned int group)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
int type, group_bit;
u8 v;
/* All NFNLGRP_CONNTRACK_* group bits fit into u8.
* The other groups are not relevant and can be ignored.
*/
if (group >= 8)
return;
type = nfnl_group2type[group];
switch (type) {
case NFNL_SUBSYS_CTNETLINK:
break;
case NFNL_SUBSYS_CTNETLINK_EXP:
break;
default:
return;
}
group_bit = (1 << group);
spin_lock(&nfnl_grp_active_lock);
v = READ_ONCE(net->ct.ctnetlink_has_listener);
if ((v & group_bit) == 0) {
v |= group_bit;
/* read concurrently without nfnl_grp_active_lock held. */
WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
}
spin_unlock(&nfnl_grp_active_lock);
#endif
}
static int nfnetlink_bind(struct net *net, int group)
{
const struct nfnetlink_subsystem *ss;
@ -670,28 +712,45 @@ static int nfnetlink_bind(struct net *net, int group)
if (!ss)
request_module_nowait("nfnetlink-subsys-%d", type);
#ifdef CONFIG_NF_CONNTRACK_EVENTS
if (type == NFNL_SUBSYS_CTNETLINK) {
nfnl_lock(NFNL_SUBSYS_CTNETLINK);
WRITE_ONCE(net->ct.ctnetlink_has_listener, true);
nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
}
#endif
nfnetlink_bind_event(net, group);
return 0;
}
static void nfnetlink_unbind(struct net *net, int group)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
int type, group_bit;
if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX)
return;
if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) {
nfnl_lock(NFNL_SUBSYS_CTNETLINK);
if (!nfnetlink_has_listeners(net, group))
WRITE_ONCE(net->ct.ctnetlink_has_listener, false);
nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
type = nfnl_group2type[group];
switch (type) {
case NFNL_SUBSYS_CTNETLINK:
break;
case NFNL_SUBSYS_CTNETLINK_EXP:
break;
default:
return;
}
/* ctnetlink_has_listener is u8 */
if (group >= 8)
return;
group_bit = (1 << group);
spin_lock(&nfnl_grp_active_lock);
if (!nfnetlink_has_listeners(net, group)) {
u8 v = READ_ONCE(net->ct.ctnetlink_has_listener);
v &= ~group_bit;
/* read concurrently without nfnl_grp_active_lock held. */
WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
}
spin_unlock(&nfnl_grp_active_lock);
#endif
}