netfilter pull request 24-02-08

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEN9lkrMBJgcdVAPub1V2XiooUIOQFAmXEuicACgkQ1V2XiooU
 IOSbvA/9F2BC9TYKAh23/0EFbD4jOl4e26YE4E+Eu8AteoQ/nD+oI+mtWgw2hVXg
 zXvm1vfIc02jGuGfcPZ+EIv/dkznnDqqUpUGa4ixtgvRw2bKkb2kKMlrFsjzsihj
 yabXydwhxYE9b4Ch2AmRyApTLRMocte1IJ3ci4YUXwf68wZlOe2bIG5wyzGkFpjF
 QZN/Rr14UKjC57EYNdUG9UdybWSqSKD23LPZSaLvi6wxoZd8cIcIkng5K4N0WVKF
 lNskuNFY+j+bJz2Yn3mWIlCoM3R1N2B04t7wRkYnKWkSuwymG3O7JC3RUQaZDBZw
 8AogEbvXaIY3nxyN4lHZ/jzM/QzNB1WHlPx6RjWKHoNhnas+xuBYrjCdJZwtEu8g
 xs27Tjk3QtCIuaMuhN0RFqiq93MqZD/qx++kwMwJA0Wrg76MLPpf8yEWwVGYcAEG
 0EWa61UfPezbcVkW8XveW6lgDfcOIOpBevxDQ3Nf7JB0AcbVBks7oDpGwDc5Pdz5
 6y7WQIilxUtu9bHODUxrshxgTBwsocVkXUTIogCihUC+SgSZF+/G796c9Iy5/kPq
 BtmSNJOJyCbnivkqKTLF0Pv0BplOv7W1sx2/fo+IfRXYTHoXVjHe1BYP0Ck3WEtS
 9EPsFlI5f4AOtnPF3JrTPec9PvuHyVN+8aOPi82wlKiayJcXy1I=
 =Rh2n
 -----END PGP SIGNATURE-----

Merge tag 'nf-24-02-08' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) Narrow down target/match revision to u8 in nft_compat.

2) Bail out with unused flags in nft_compat.

3) Restrict layer 4 protocol to u16 in nft_compat.

4) Remove static in pipapo get command that slipped through when
   reducing set memory footprint.

5) Follow up incremental fix for the ipset performance regression,
   this includes the missing gc cancellation, from Jozsef Kadlecsik.

6) Allow to filter by zone 0 in ctnetlink, do not interpret zone 0
   as no filtering, from Felix Huettner.

7) Reject direction for NFT_CT_ID.

8) Use timestamp to check for set element expiration while transaction
   is handled to prevent garbage collection from removing set elements
   that were just added by this transaction. Packet path and netlink
   dump/get path still use current time to check for expiration.

9) Restore NF_REPEAT in nfnetlink_queue, from Florian Westphal.

10) map_index needs to be percpu and per-set, not just percpu.
    At this time its possible for a pipapo set to fill the all-zero part
    with ones and take the 'might have bits set' as 'start-from-zero' area.
    From Florian Westphal. This includes three patches:

    - Change scratchpad area to a structure that provides space for a
      per-set-and-cpu toggle and uses it of the percpu one.

    - Add a new free helper to prepare for the next patch.

    - Remove the scratch_aligned pointer and makes AVX2 implementation
      use the exact same memory addresses for read/store of the matching
      state.

netfilter pull request 24-02-08

* tag 'nf-24-02-08' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nft_set_pipapo: remove scratch_aligned pointer
  netfilter: nft_set_pipapo: add helper to release pcpu scratch area
  netfilter: nft_set_pipapo: store index in scratch maps
  netfilter: nft_set_rbtree: skip end interval element from gc
  netfilter: nfnetlink_queue: un-break NF_REPEAT
  netfilter: nf_tables: use timestamp to check for set element timeout
  netfilter: nft_ct: reject direction for ct id
  netfilter: ctnetlink: fix filtering for zone 0
  netfilter: ipset: Missing gc cancellations fixed
  netfilter: nft_set_pipapo: remove static in nft_pipapo_get()
  netfilter: nft_compat: restrict match/target protocol to u16
  netfilter: nft_compat: reject unused compat flag
  netfilter: nft_compat: narrow down revision to unsigned 8-bits
====================

Link: https://lore.kernel.org/r/20240208112834.1433-1-pablo@netfilter.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni 2024-02-08 12:56:39 +01:00
commit 63e4b9d693
15 changed files with 202 additions and 102 deletions

View file

@ -808,10 +808,16 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex
return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS);
}
static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext,
u64 tstamp)
{
return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext));
time_after_eq64(tstamp, *nft_set_ext_expiration(ext));
}
static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
{
return __nft_set_elem_expired(ext, get_jiffies_64());
}
static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
@ -1779,6 +1785,7 @@ struct nftables_pernet {
struct list_head notify_list;
struct mutex commit_mutex;
u64 table_handle;
u64 tstamp;
unsigned int base_seq;
unsigned int gc_seq;
u8 validate_state;
@ -1791,6 +1798,11 @@ static inline struct nftables_pernet *nft_pernet(const struct net *net)
return net_generic(net, nf_tables_net_id);
}
static inline u64 nft_net_tstamp(const struct net *net)
{
return nft_pernet(net)->tstamp;
}
#define __NFT_REDUCE_READONLY 1UL
#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY

View file

@ -285,9 +285,11 @@ enum nft_rule_attributes {
/**
* enum nft_rule_compat_flags - nf_tables rule compat flags
*
* @NFT_RULE_COMPAT_F_UNUSED: unused
* @NFT_RULE_COMPAT_F_INV: invert the check result
*/
enum nft_rule_compat_flags {
NFT_RULE_COMPAT_F_UNUSED = (1 << 0),
NFT_RULE_COMPAT_F_INV = (1 << 1),
NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV,
};

View file

@ -1154,6 +1154,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
return ret;
cleanup:
set->variant->cancel_gc(set);
set->variant->destroy(set);
put_out:
module_put(set->type->me);
@ -2378,6 +2379,7 @@ ip_set_net_exit(struct net *net)
set = ip_set(inst, i);
if (set) {
ip_set(inst, i) = NULL;
set->variant->cancel_gc(set);
ip_set_destroy_set(set);
}
}

View file

@ -432,7 +432,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;
for (i = 0; i < jhash_size(t->htable_bits); i++) {
n = __ipset_dereference(hbucket(t, i));
n = (__force struct hbucket *)hbucket(t, i);
if (!n)
continue;
if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
@ -452,7 +452,7 @@ mtype_destroy(struct ip_set *set)
struct htype *h = set->data;
struct list_head *l, *lt;
mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
mtype_ahash_destroy(set, (__force struct htable *)h->table, true);
list_for_each_safe(l, lt, &h->ad) {
list_del(l);
kfree(l);

View file

@ -876,6 +876,7 @@ struct ctnetlink_filter_u32 {
struct ctnetlink_filter {
u8 family;
bool zone_filter;
u_int32_t orig_flags;
u_int32_t reply_flags;
@ -992,9 +993,12 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
if (err)
goto err_filter;
err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
if (err < 0)
goto err_filter;
if (cda[CTA_ZONE]) {
err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
if (err < 0)
goto err_filter;
filter->zone_filter = true;
}
if (!cda[CTA_FILTER])
return filter;
@ -1148,7 +1152,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
if (filter->family && nf_ct_l3num(ct) != filter->family)
goto ignore_entry;
if (filter->zone.id != NF_CT_DEFAULT_ZONE_ID &&
if (filter->zone_filter &&
!nf_ct_zone_equal_any(ct, &filter->zone))
goto ignore_entry;

View file

@ -9827,6 +9827,7 @@ struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
{
struct nft_set_elem_catchall *catchall, *next;
u64 tstamp = nft_net_tstamp(gc->net);
const struct nft_set *set = gc->set;
struct nft_elem_priv *elem_priv;
struct nft_set_ext *ext;
@ -9836,7 +9837,7 @@ struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_expired(ext))
if (!__nft_set_elem_expired(ext, tstamp))
continue;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
@ -10622,6 +10623,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid)
bool genid_ok;
mutex_lock(&nft_net->commit_mutex);
nft_net->tstamp = get_jiffies_64();
genid_ok = genid == 0 || nft_net->base_seq == genid;
if (!genid_ok)

View file

@ -232,18 +232,25 @@ static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
if (verdict == NF_ACCEPT ||
verdict == NF_REPEAT ||
verdict == NF_STOP) {
unsigned int ct_verdict = verdict;
rcu_read_lock();
ct_hook = rcu_dereference(nf_ct_hook);
if (ct_hook)
verdict = ct_hook->update(entry->state.net, entry->skb);
ct_verdict = ct_hook->update(entry->state.net, entry->skb);
rcu_read_unlock();
switch (verdict & NF_VERDICT_MASK) {
switch (ct_verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
/* follow userspace verdict, could be REPEAT */
break;
case NF_STOLEN:
nf_queue_entry_free(entry);
return;
default:
verdict = ct_verdict & NF_VERDICT_MASK;
break;
}
}
nf_reinject(entry, verdict);
}

View file

@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
[NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING },
[NFTA_TARGET_REV] = { .type = NLA_U32 },
[NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_TARGET_INFO] = { .type = NLA_BINARY },
};
@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1]
static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
{
struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
u32 l4proto;
u32 flags;
int err;
@ -212,12 +213,18 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
return -EINVAL;
flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS]));
if (flags & ~NFT_RULE_COMPAT_F_MASK)
if (flags & NFT_RULE_COMPAT_F_UNUSED ||
flags & ~NFT_RULE_COMPAT_F_MASK)
return -EINVAL;
if (flags & NFT_RULE_COMPAT_F_INV)
*inv = true;
*proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
if (l4proto > U16_MAX)
return -EINVAL;
*proto = l4proto;
return 0;
}
@ -419,7 +426,7 @@ static void nft_match_eval(const struct nft_expr *expr,
static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
[NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING },
[NFTA_MATCH_REV] = { .type = NLA_U32 },
[NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_MATCH_INFO] = { .type = NLA_BINARY },
};
@ -724,7 +731,7 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
[NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING,
.len = NFT_COMPAT_NAME_MAX-1 },
[NFTA_COMPAT_REV] = { .type = NLA_U32 },
[NFTA_COMPAT_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_COMPAT_TYPE] = { .type = NLA_U32 },
};

View file

@ -476,6 +476,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
break;
#endif
case NFT_CT_ID:
if (tb[NFTA_CT_DIRECTION])
return -EINVAL;
len = sizeof(u32);
break;
default:

View file

@ -36,6 +36,7 @@ struct nft_rhash_cmp_arg {
const struct nft_set *set;
const u32 *key;
u8 genmask;
u64 tstamp;
};
static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
@ -62,7 +63,7 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
return 1;
if (nft_set_elem_is_dead(&he->ext))
return 1;
if (nft_set_elem_expired(&he->ext))
if (__nft_set_elem_expired(&he->ext, x->tstamp))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
return 1;
@ -87,6 +88,7 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = key,
.tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@ -106,6 +108,7 @@ nft_rhash_get(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = elem->key.val.data,
.tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@ -131,6 +134,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
.genmask = NFT_GENMASK_ANY,
.set = set,
.key = key,
.tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@ -175,6 +179,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
.tstamp = nft_net_tstamp(net),
};
struct nft_rhash_elem *prev;
@ -216,6 +221,7 @@ nft_rhash_deactivate(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
.tstamp = nft_net_tstamp(net),
};
rcu_read_lock();

View file

@ -342,9 +342,6 @@
#include "nft_set_pipapo_avx2.h"
#include "nft_set_pipapo.h"
/* Current working bitmap index, toggled between field matches */
static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
/**
* pipapo_refill() - For each set bit, set bits from selected mapping table item
* @map: Bitmap to be scanned for set bits
@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_scratch *scratch;
unsigned long *res_map, *fill_map;
u8 genmask = nft_genmask_cur(net);
const u8 *rp = (const u8 *)key;
@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
local_bh_disable();
map_index = raw_cpu_read(nft_pipapo_scratch_index);
m = rcu_dereference(priv->match);
if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
goto out;
res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0);
fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max);
scratch = *raw_cpu_ptr(m->scratch);
map_index = scratch->map_index;
res_map = scratch->map + (map_index ? m->bsize_max : 0);
fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
@ -460,7 +460,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
last);
if (b < 0) {
raw_cpu_write(nft_pipapo_scratch_index, map_index);
scratch->map_index = map_index;
local_bh_enable();
return false;
@ -477,7 +477,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
* current inactive bitmap is clean and can be reused as
* *next* bitmap (not initial) for the next packet.
*/
raw_cpu_write(nft_pipapo_scratch_index, map_index);
scratch->map_index = map_index;
local_bh_enable();
return true;
@ -504,6 +504,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
* @set: nftables API set representation
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
* @tstamp: timestamp to check for expired elements
*
* This is essentially the same as the lookup function, except that it matches
* key data against the uncommitted copy and doesn't use preallocated maps for
@ -513,7 +514,8 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
*/
static struct nft_pipapo_elem *pipapo_get(const struct net *net,
const struct nft_set *set,
const u8 *data, u8 genmask)
const u8 *data, u8 genmask,
u64 tstamp)
{
struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
struct nft_pipapo *priv = nft_set_priv(set);
@ -566,7 +568,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
goto out;
if (last) {
if (nft_set_elem_expired(&f->mt[b].e->ext))
if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp))
goto next_match;
if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
@ -603,10 +605,10 @@ static struct nft_elem_priv *
nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
static struct nft_pipapo_elem *e;
struct nft_pipapo_elem *e;
e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
nft_genmask_cur(net));
nft_genmask_cur(net), get_jiffies_64());
if (IS_ERR(e))
return ERR_CAST(e);
@ -1108,6 +1110,25 @@ static void pipapo_map(struct nft_pipapo_match *m,
f->mt[map[i].to + j].e = e;
}
/**
* pipapo_free_scratch() - Free per-CPU map at original (not aligned) address
* @m: Matching data
* @cpu: CPU number
*/
static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu)
{
struct nft_pipapo_scratch *s;
void *mem;
s = *per_cpu_ptr(m->scratch, cpu);
if (!s)
return;
mem = s;
mem -= s->align_off;
kfree(mem);
}
/**
* pipapo_realloc_scratch() - Reallocate scratch maps for partial match results
* @clone: Copy of matching data with pending insertions and deletions
@ -1121,12 +1142,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
int i;
for_each_possible_cpu(i) {
unsigned long *scratch;
struct nft_pipapo_scratch *scratch;
#ifdef NFT_PIPAPO_ALIGN
unsigned long *scratch_aligned;
void *scratch_aligned;
u32 align_off;
#endif
scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 +
scratch = kzalloc_node(struct_size(scratch, map,
bsize_max * 2) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL, cpu_to_node(i));
if (!scratch) {
@ -1140,14 +1162,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
return -ENOMEM;
}
kfree(*per_cpu_ptr(clone->scratch, i));
*per_cpu_ptr(clone->scratch, i) = scratch;
pipapo_free_scratch(clone, i);
#ifdef NFT_PIPAPO_ALIGN
scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch);
*per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
/* Align &scratch->map (not the struct itself): the extra
* %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
* above guarantee we can waste up to those bytes in order
* to align the map field regardless of its offset within
* the struct.
*/
BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
align_off = scratch_aligned - (void *)scratch;
scratch = scratch_aligned;
scratch->align_off = align_off;
#endif
*per_cpu_ptr(clone->scratch, i) = scratch;
}
return 0;
@ -1173,6 +1206,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
struct nft_pipapo_match *m = priv->clone;
u8 genmask = nft_genmask_next(net);
struct nft_pipapo_elem *e, *dup;
u64 tstamp = nft_net_tstamp(net);
struct nft_pipapo_field *f;
const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
@ -1182,7 +1216,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
end = start;
dup = pipapo_get(net, set, start, genmask);
dup = pipapo_get(net, set, start, genmask, tstamp);
if (!IS_ERR(dup)) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@ -1204,7 +1238,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
dup = pipapo_get(net, set, end, nft_genmask_next(net));
dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp);
}
if (PTR_ERR(dup) != -ENOENT) {
@ -1301,11 +1335,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
if (!new->scratch)
goto out_scratch;
#ifdef NFT_PIPAPO_ALIGN
new->scratch_aligned = alloc_percpu(*new->scratch_aligned);
if (!new->scratch_aligned)
goto out_scratch;
#endif
for_each_possible_cpu(i)
*per_cpu_ptr(new->scratch, i) = NULL;
@ -1357,10 +1386,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
}
out_scratch_realloc:
for_each_possible_cpu(i)
kfree(*per_cpu_ptr(new->scratch, i));
#ifdef NFT_PIPAPO_ALIGN
free_percpu(new->scratch_aligned);
#endif
pipapo_free_scratch(new, i);
out_scratch:
free_percpu(new->scratch);
kfree(new);
@ -1560,6 +1586,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
u64 tstamp = nft_net_tstamp(net);
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
struct nft_trans_gc *gc;
@ -1594,7 +1621,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
/* synchronous gc never fails, there is no need to set on
* NFT_SET_ELEM_DEAD_BIT.
*/
if (nft_set_elem_expired(&e->ext)) {
if (__nft_set_elem_expired(&e->ext, tstamp)) {
priv->dirty = true;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
@ -1640,13 +1667,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m)
int i;
for_each_possible_cpu(i)
kfree(*per_cpu_ptr(m->scratch, i));
pipapo_free_scratch(m, i);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
#endif
free_percpu(m->scratch);
pipapo_free_fields(m);
kfree(m);
@ -1769,7 +1792,7 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
{
struct nft_pipapo_elem *e;
e = pipapo_get(net, set, data, nft_genmask_next(net));
e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net));
if (IS_ERR(e))
return NULL;
@ -2132,7 +2155,7 @@ static int nft_pipapo_init(const struct nft_set *set,
m->field_count = field_count;
m->bsize_max = 0;
m->scratch = alloc_percpu(unsigned long *);
m->scratch = alloc_percpu(struct nft_pipapo_scratch *);
if (!m->scratch) {
err = -ENOMEM;
goto out_scratch;
@ -2140,16 +2163,6 @@ static int nft_pipapo_init(const struct nft_set *set,
for_each_possible_cpu(i)
*per_cpu_ptr(m->scratch, i) = NULL;
#ifdef NFT_PIPAPO_ALIGN
m->scratch_aligned = alloc_percpu(unsigned long *);
if (!m->scratch_aligned) {
err = -ENOMEM;
goto out_free;
}
for_each_possible_cpu(i)
*per_cpu_ptr(m->scratch_aligned, i) = NULL;
#endif
rcu_head_init(&m->rcu);
nft_pipapo_for_each_field(f, i, m) {
@ -2180,9 +2193,6 @@ static int nft_pipapo_init(const struct nft_set *set,
return 0;
out_free:
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
#endif
free_percpu(m->scratch);
out_scratch:
kfree(m);
@ -2236,11 +2246,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
nft_set_pipapo_match_destroy(ctx, set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
#endif
for_each_possible_cpu(cpu)
kfree(*per_cpu_ptr(m->scratch, cpu));
pipapo_free_scratch(m, cpu);
free_percpu(m->scratch);
pipapo_free_fields(m);
kfree(m);
@ -2253,11 +2260,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
if (priv->dirty)
nft_set_pipapo_match_destroy(ctx, set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(priv->clone->scratch_aligned);
#endif
for_each_possible_cpu(cpu)
kfree(*per_cpu_ptr(priv->clone->scratch, cpu));
pipapo_free_scratch(priv->clone, cpu);
free_percpu(priv->clone->scratch);
pipapo_free_fields(priv->clone);

View file

@ -130,21 +130,29 @@ struct nft_pipapo_field {
union nft_pipapo_map_bucket *mt;
};
/**
* struct nft_pipapo_scratch - percpu data used for lookup and matching
* @map_index: Current working bitmap index, toggled between field matches
* @align_off: Offset to get the originally allocated address
* @map: store partial matching results during lookup
*/
struct nft_pipapo_scratch {
u8 map_index;
u32 align_off;
unsigned long map[];
};
/**
* struct nft_pipapo_match - Data used for lookup and matching
* @field_count Amount of fields in set
* @scratch: Preallocated per-CPU maps for partial matching results
* @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
* @bsize_max: Maximum lookup table bucket size of all fields, in longs
* @rcu Matching data is swapped on commits
* @f: Fields, with lookup and mapping tables
*/
struct nft_pipapo_match {
int field_count;
#ifdef NFT_PIPAPO_ALIGN
unsigned long * __percpu *scratch_aligned;
#endif
unsigned long * __percpu *scratch;
struct nft_pipapo_scratch * __percpu *scratch;
size_t bsize_max;
struct rcu_head rcu;
struct nft_pipapo_field f[] __counted_by(field_count);

View file

@ -71,9 +71,6 @@
#define NFT_PIPAPO_AVX2_ZERO(reg) \
asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
/* Current working bitmap index, toggled between field matches */
static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index);
/**
* nft_pipapo_avx2_prepare() - Prepare before main algorithm body
*
@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
unsigned long *res, *fill, *scratch;
struct nft_pipapo_scratch *scratch;
u8 genmask = nft_genmask_cur(net);
const u8 *rp = (const u8 *)key;
struct nft_pipapo_match *m;
struct nft_pipapo_field *f;
unsigned long *res, *fill;
bool map_index;
int i, ret = 0;
@ -1141,15 +1139,16 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
*/
kernel_fpu_begin_mask(0);
scratch = *raw_cpu_ptr(m->scratch_aligned);
scratch = *raw_cpu_ptr(m->scratch);
if (unlikely(!scratch)) {
kernel_fpu_end();
return false;
}
map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index);
res = scratch + (map_index ? m->bsize_max : 0);
fill = scratch + (map_index ? 0 : m->bsize_max);
map_index = scratch->map_index;
res = scratch->map + (map_index ? m->bsize_max : 0);
fill = scratch->map + (map_index ? 0 : m->bsize_max);
/* Starting map doesn't need to be set for this implementation */
@ -1221,7 +1220,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
out:
if (i % 2)
raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index);
scratch->map_index = !map_index;
kernel_fpu_end();
return ret >= 0;

View file

@ -234,7 +234,7 @@ static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set,
static const struct nft_rbtree_elem *
nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
struct nft_rbtree_elem *rbe, u8 genmask)
struct nft_rbtree_elem *rbe)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
@ -253,7 +253,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
if (nft_rbtree_interval_end(rbe_prev) &&
nft_set_elem_active(&rbe_prev->ext, genmask))
nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY))
break;
prev = rb_prev(prev);
@ -313,6 +313,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
u64 tstamp = nft_net_tstamp(net);
int d;
/* Descend the tree to search for an existing element greater than the
@ -360,11 +361,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
/* perform garbage collection to avoid bogus overlap reports
* but skip new elements in this transaction.
*/
if (nft_set_elem_expired(&rbe->ext) &&
if (__nft_set_elem_expired(&rbe->ext, tstamp) &&
nft_set_elem_active(&rbe->ext, cur_genmask)) {
const struct nft_rbtree_elem *removed_end;
removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
removed_end = nft_rbtree_gc_elem(set, priv, rbe);
if (IS_ERR(removed_end))
return PTR_ERR(removed_end);
@ -551,6 +552,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
u8 genmask = nft_genmask_next(net);
u64 tstamp = nft_net_tstamp(net);
int d;
while (parent != NULL) {
@ -571,7 +573,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
} else if (nft_set_elem_expired(&rbe->ext)) {
} else if (__nft_set_elem_expired(&rbe->ext, tstamp)) {
break;
} else if (!nft_set_elem_active(&rbe->ext, genmask)) {
parent = parent->rb_left;
@ -624,9 +626,10 @@ static void nft_rbtree_gc(struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
struct net *net = read_pnet(&set->net);
u64 tstamp = nft_net_tstamp(net);
struct rb_node *node, *next;
struct nft_trans_gc *gc;
struct net *net;
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
@ -648,7 +651,7 @@ static void nft_rbtree_gc(struct nft_set *set)
rbe_end = rbe;
continue;
}
if (!nft_set_elem_expired(&rbe->ext))
if (!__nft_set_elem_expired(&rbe->ext, tstamp))
continue;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);

View file

@ -13,7 +13,7 @@
#include "../kselftest_harness.h"
#define TEST_ZONE_ID 123
#define CTA_FILTER_F_CTA_TUPLE_ZONE (1 << 2)
#define NF_CT_DEFAULT_ZONE_ID 0
static int reply_counter;
@ -336,6 +336,9 @@ FIXTURE_SETUP(conntrack_dump_flush)
ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5,
TEST_ZONE_ID + 2);
EXPECT_EQ(ret, 0);
ret = conntrack_data_generate_v4(self->sock, 0xf6f6f6f6, 0xf7f7f7f7,
NF_CT_DEFAULT_ZONE_ID);
EXPECT_EQ(ret, 0);
src = (struct in6_addr) {{
.__u6_addr32 = {
@ -395,6 +398,26 @@ FIXTURE_SETUP(conntrack_dump_flush)
TEST_ZONE_ID + 2);
EXPECT_EQ(ret, 0);
src = (struct in6_addr) {{
.__u6_addr32 = {
0xb80d0120,
0x00000000,
0x00000000,
0x07000000
}
}};
dst = (struct in6_addr) {{
.__u6_addr32 = {
0xb80d0120,
0x00000000,
0x00000000,
0x08000000
}
}};
ret = conntrack_data_generate_v6(self->sock, src, dst,
NF_CT_DEFAULT_ZONE_ID);
EXPECT_EQ(ret, 0);
ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
EXPECT_GE(ret, 2);
if (ret > 2)
@ -425,6 +448,24 @@ TEST_F(conntrack_dump_flush, test_flush_by_zone)
EXPECT_EQ(ret, 2);
ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
EXPECT_EQ(ret, 2);
ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
EXPECT_EQ(ret, 2);
}
TEST_F(conntrack_dump_flush, test_flush_by_zone_default)
{
int ret;
ret = conntrack_flush_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
EXPECT_EQ(ret, 0);
ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
EXPECT_EQ(ret, 2);
ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
EXPECT_EQ(ret, 2);
ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
EXPECT_EQ(ret, 2);
ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
EXPECT_EQ(ret, 0);
}
TEST_HARNESS_MAIN