netfilter: refuse insertion if chain has grown too large

Also add a stat counter for this that gets exported both via old /proc
interface and ctnetlink.

Assuming the old default size of 16536 buckets and max hash occupancy of
64k, this results in 128k insertions (origin+reply), so ~8 entries per
chain on average.

The revised settings in this series will result in about two entries per
bucket on average.

This allows a hard-limit ceiling of 64.

This is not tunable at the moment, but its possible to either increase
nf_conntrack_buckets or decrease nf_conntrack_max to reduce average
lengths.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
Florian Westphal 2021-08-26 15:54:22 +02:00 committed by Pablo Neira Ayuso
parent dd6d2910c5
commit d7e7747ac5
5 changed files with 42 additions and 10 deletions

View file

@ -18,6 +18,7 @@ struct ip_conntrack_stat {
unsigned int expect_create;
unsigned int expect_delete;
unsigned int search_restart;
unsigned int chaintoolong;
};
#define NFCT_INFOMASK 7UL

View file

@ -257,6 +257,7 @@ enum ctattr_stats_cpu {
CTA_STATS_ERROR,
CTA_STATS_SEARCH_RESTART,
CTA_STATS_CLASH_RESOLVE,
CTA_STATS_CHAIN_TOOLONG,
__CTA_STATS_MAX,
};
#define CTA_STATS_MAX (__CTA_STATS_MAX - 1)

View file

@ -77,6 +77,8 @@ static __read_mostly bool nf_conntrack_locks_all;
#define GC_SCAN_INTERVAL (120u * HZ)
#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
#define MAX_CHAINLEN 64u
static struct conntrack_gc_work conntrack_gc_work;
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
@ -840,7 +842,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
unsigned int chainlen = 0;
unsigned int sequence;
int err = -EEXIST;
zone = nf_ct_zone(ct);
@ -854,15 +858,24 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* See if there's one in the list already, including reverse */
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
if (chainlen++ > MAX_CHAINLEN)
goto chaintoolong;
}
chainlen = 0;
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
if (chainlen++ > MAX_CHAINLEN)
goto chaintoolong;
}
smp_wmb();
/* The caller holds a reference to this object */
@ -872,11 +885,13 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
NF_CT_STAT_INC(net, insert);
local_bh_enable();
return 0;
chaintoolong:
NF_CT_STAT_INC(net, chaintoolong);
err = -ENOSPC;
out:
nf_conntrack_double_unlock(hash, reply_hash);
local_bh_enable();
return -EEXIST;
return err;
}
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
@ -1089,6 +1104,7 @@ int
__nf_conntrack_confirm(struct sk_buff *skb)
{
const struct nf_conntrack_zone *zone;
unsigned int chainlen = 0, sequence;
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@ -1096,7 +1112,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
unsigned int sequence;
int ret = NF_DROP;
ct = nf_ct_get(skb, &ctinfo);
@ -1156,15 +1171,28 @@ __nf_conntrack_confirm(struct sk_buff *skb)
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
if (chainlen++ > MAX_CHAINLEN)
goto chaintoolong;
}
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
chainlen = 0;
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
if (chainlen++ > MAX_CHAINLEN) {
chaintoolong:
nf_ct_add_to_dying_list(ct);
NF_CT_STAT_INC(net, chaintoolong);
NF_CT_STAT_INC(net, insert_failed);
ret = NF_DROP;
goto dying;
}
}
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in

View file

@ -2484,7 +2484,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
nla_put_be32(skb, CTA_STATS_SEARCH_RESTART,
htonl(st->search_restart)) ||
nla_put_be32(skb, CTA_STATS_CLASH_RESOLVE,
htonl(st->clash_resolve)))
htonl(st->clash_resolve)) ||
nla_put_be32(skb, CTA_STATS_CHAIN_TOOLONG,
htonl(st->chaintoolong)))
goto nla_put_failure;
nlmsg_end(skb, nlh);

View file

@ -429,7 +429,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
unsigned int nr_conntracks;
if (v == SEQ_START_TOKEN) {
seq_puts(seq, "entries clashres found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
seq_puts(seq, "entries clashres found new invalid ignore delete chainlength insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
return 0;
}
@ -444,7 +444,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
st->invalid,
0,
0,
0,
st->chaintoolong,
st->insert,
st->insert_failed,
st->drop,