netfilter pull request 23-10-25

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEN9lkrMBJgcdVAPub1V2XiooUIOQFAmU5gvgACgkQ1V2XiooU
 IOS9eA//QyIqcGRzr+tX1ZPwikkicmSb7w8vkrY7jXMNWNiye54tA5fSJsxqpIMy
 9J9k8eB+fI6AJV44tOq4K8XsYCcI4ZEst2mftumvuq8igX27ulz46uLvoVwnqhwc
 NlPO06RwSHQHR3S5tKRRwwYfUwpPjDRCW15c14pHw4EkHAaL+dItLwATrrJhPv93
 PUZNGbB+i+55QrLJdMMshvpoPAhLmo57cDvDcerhOWygaoxiaKIaR0bRQ40eM3Zl
 j9veG2oiuehv/RHqFJ5MBCiqrIYRHU8kTflqaVA+ODfgUSbijCcv/RQxaILnwnZd
 57vdLSBTgVFh2uiPTYGAxfUwv3BpA7g9uzmeBgMbl/t71HhYoPe/QhjvoVhlzNl/
 1JrCFbrQVaCdhtZbDt7f359mUtXv9yh1+9pBytEkdbxRfsDmzRVsFrUeLo0P9Ho8
 4jKpnaqPTdnzhfoQocZjL7+M22/zk6jZCu1Pcs318yqpTkhJwiTtByo4iaQ2f0F7
 xgA/auZ/33mmirFXnLzMoA4b0TbJNm+Jjye3tdbu0FlY2sKb943RW2kXg9rdfyOL
 OUvSSux7Fezyq5y55+KV4FFrKgYhrZ5tWR8mVBg1KHrYs9r9p7SmXL3KzbSPxBvn
 QrPdCQsQ187QNIkwli/ChYQcrahwSiEqFSJbdDmzIA7AFNLHVaY=
 =ZG+P
 -----END PGP SIGNATURE-----

Merge tag 'nf-next-23-10-25' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next. Mostly
nf_tables updates with two patches for connlabel and br_netfilter.

1) Rename function name to perform on-demand GC for rbtree elements,
   and replace async GC in rbtree by sync GC. Patches from Florian Westphal.

2) Use commit_mutex for NFT_MSG_GETRULE_RESET to ensure that two
   concurrent threads invoking this command do not underrun stateful
   objects. Patches from Phil Sutter.

3) Use single hook to deal with IP and ARP packets in br_netfilter.
   Patch from Florian Westphal.

4) Use atomic_t in netns->connlabel use counter instead of using a
   spinlock, also patch from Florian.

5) Cleanups for stateful objects infrastructure in nf_tables.
   Patches from Phil Sutter.

6) Flush path uses opaque set element offered by the iterator, instead of
   calling pipapo_deactivate() which looks up for it again.

7) Set backend .flush interface always succeeds, make it return void
   instead.

8) Add struct nft_elem_priv placeholder structure and use it by replacing
   void * to pass opaque set element representation from backend to frontend
   which defeats compiler type checks.

9) Shrink memory consumption of set element transactions, by reducing
   struct nft_trans_elem object size and reducing stack memory usage.

10) Use struct nft_elem_priv also for set backend .insert operation too.

11) Carry reset flag in nft_set_dump_ctx structure, instead of passing it
    as a function argument, from Phil Sutter.

netfilter pull request 23-10-25

* tag 'nf-next-23-10-25' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: nf_tables: Carry reset boolean in nft_set_dump_ctx
  netfilter: nf_tables: set->ops->insert returns opaque set element in case of EEXIST
  netfilter: nf_tables: shrink memory consumption of set elements
  netfilter: nf_tables: expose opaque set element as struct nft_elem_priv
  netfilter: nf_tables: set backend .flush always succeeds
  netfilter: nft_set_pipapo: no need to call pipapo_deactivate() from flush
  netfilter: nf_tables: Carry reset boolean in nft_obj_dump_ctx
  netfilter: nf_tables: nft_obj_filter fits into cb->ctx
  netfilter: nf_tables: Carry s_idx in nft_obj_dump_ctx
  netfilter: nf_tables: A better name for nft_obj_filter
  netfilter: nf_tables: Unconditionally allocate nft_obj_filter
  netfilter: nf_tables: Drop pointless memset in nf_tables_dump_obj
  netfilter: conntrack: switch connlabels to atomic_t
  br_netfilter: use single forward hook for ip and arp
  netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  netfilter: nf_tables: Introduce nf_tables_getrule_single()
  netfilter: nf_tables: Open-code audit log call in nf_tables_getrule()
  netfilter: nft_set_rbtree: prefer sync gc to async worker
  netfilter: nft_set_rbtree: rename gc deactivate+erase function
====================

Link: https://lore.kernel.org/r/20231025212555.132775-1-pablo@netfilter.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni 2023-10-26 12:20:35 +02:00
commit 3967336126
12 changed files with 565 additions and 509 deletions

View File

@ -39,7 +39,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
#ifdef CONFIG_NF_CONNTRACK_LABELS
struct net *net = nf_ct_net(ct);
if (net->ct.labels_used == 0)
if (atomic_read(&net->ct.labels_used) == 0)
return NULL;
return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC);

View File

@ -274,6 +274,9 @@ struct nft_userdata {
unsigned char data[];
};
/* placeholder structure for opaque set element backend representation. */
struct nft_elem_priv { };
/**
* struct nft_set_elem - generic representation of set elements
*
@ -294,9 +297,14 @@ struct nft_set_elem {
u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)];
struct nft_data val;
} data;
void *priv;
struct nft_elem_priv *priv;
};
static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
{
return (void *)priv;
}
struct nft_set;
struct nft_set_iter {
u8 genmask;
@ -306,7 +314,7 @@ struct nft_set_iter {
int (*fn)(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
struct nft_set_elem *elem);
struct nft_elem_priv *elem_priv);
};
/**
@ -430,7 +438,8 @@ struct nft_set_ops {
const struct nft_set_ext **ext);
bool (*update)(struct nft_set *set,
const u32 *key,
void *(*new)(struct nft_set *,
struct nft_elem_priv *
(*new)(struct nft_set *,
const struct nft_expr *,
struct nft_regs *),
const struct nft_expr *expr,
@ -442,23 +451,23 @@ struct nft_set_ops {
int (*insert)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext);
struct nft_elem_priv **priv);
void (*activate)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
void * (*deactivate)(const struct net *net,
struct nft_elem_priv *elem_priv);
struct nft_elem_priv * (*deactivate)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
bool (*flush)(const struct net *net,
void (*flush)(const struct net *net,
const struct nft_set *set,
void *priv);
struct nft_elem_priv *priv);
void (*remove)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
struct nft_elem_priv *elem_priv);
void (*walk)(const struct nft_ctx *ctx,
struct nft_set *set,
struct nft_set_iter *iter);
void * (*get)(const struct net *net,
struct nft_elem_priv * (*get)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem,
unsigned int flags);
@ -796,9 +805,9 @@ static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
}
static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
void *elem)
const struct nft_elem_priv *elem_priv)
{
return elem + set->ops->elemsize;
return (void *)elem_priv + set->ops->elemsize;
}
static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext)
@ -810,16 +819,19 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx,
const struct nft_set *set,
const struct nlattr *attr);
void *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *key_end, const u32 *data,
u64 timeout, u64 expiration, gfp_t gfp);
struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *key_end,
const u32 *data,
u64 timeout, u64 expiration, gfp_t gfp);
int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_expr *expr_array[]);
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
void nft_set_elem_destroy(const struct nft_set *set,
const struct nft_elem_priv *elem_priv,
bool destroy_expr);
void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
const struct nft_set *set, void *elem);
const struct nft_set *set,
const struct nft_elem_priv *elem_priv);
struct nft_expr_ops;
/**
@ -1061,7 +1073,7 @@ struct nft_chain {
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_set_iter *iter,
struct nft_set_elem *elem);
struct nft_elem_priv *elem_priv);
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
@ -1638,14 +1650,14 @@ struct nft_trans_table {
struct nft_trans_elem {
struct nft_set *set;
struct nft_set_elem elem;
struct nft_elem_priv *elem_priv;
bool bound;
};
#define nft_trans_elem_set(trans) \
(((struct nft_trans_elem *)trans->data)->set)
#define nft_trans_elem(trans) \
(((struct nft_trans_elem *)trans->data)->elem)
#define nft_trans_elem_priv(trans) \
(((struct nft_trans_elem *)trans->data)->elem_priv)
#define nft_trans_elem_set_bound(trans) \
(((struct nft_trans_elem *)trans->data)->bound)
@ -1686,7 +1698,7 @@ struct nft_trans_gc {
struct nft_set *set;
u32 seq;
u16 count;
void *priv[NFT_TRANS_GC_BATCHCOUNT];
struct nft_elem_priv *priv[NFT_TRANS_GC_BATCHCOUNT];
struct rcu_head rcu;
};
@ -1709,7 +1721,7 @@ struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc);
void nft_setelem_data_deactivate(const struct net *net,
const struct nft_set *set,
struct nft_set_elem *elem);
struct nft_elem_priv *elem_priv);
int __init nft_chain_filter_init(void);
void nft_chain_filter_fini(void);

View File

@ -107,7 +107,7 @@ struct netns_ct {
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_ip_net nf_ct_proto;
#if defined(CONFIG_NF_CONNTRACK_LABELS)
unsigned int labels_used;
atomic_t labels_used;
#endif
};
#endif

View File

@ -570,18 +570,12 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff
}
/* This is the 'purely bridged' case. For IP, we pass the packet to
* netfilter with indev and outdev set to the bridge device,
* but we are still able to filter on the 'real' indev/outdev
* because of the physdev module. For ARP, indev and outdev are the
* bridge ports. */
static unsigned int br_nf_forward_ip(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
static unsigned int br_nf_forward_ip(struct sk_buff *skb,
const struct nf_hook_state *state,
u8 pf)
{
struct nf_bridge_info *nf_bridge;
struct net_device *parent;
u_int8_t pf;
nf_bridge = nf_bridge_info_get(skb);
if (!nf_bridge)
@ -600,15 +594,6 @@ static unsigned int br_nf_forward_ip(void *priv,
if (!parent)
return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
is_pppoe_ip(skb, state->net))
pf = NFPROTO_IPV4;
else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) ||
is_pppoe_ipv6(skb, state->net))
pf = NFPROTO_IPV6;
else
return NF_ACCEPT;
nf_bridge_pull_encap_header(skb);
if (skb->pkt_type == PACKET_OTHERHOST) {
@ -620,19 +605,18 @@ static unsigned int br_nf_forward_ip(void *priv,
if (br_validate_ipv4(state->net, skb))
return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
}
if (pf == NFPROTO_IPV6) {
skb->protocol = htons(ETH_P_IP);
} else if (pf == NFPROTO_IPV6) {
if (br_validate_ipv6(state->net, skb))
return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
skb->protocol = htons(ETH_P_IPV6);
} else {
WARN_ON_ONCE(1);
return NF_DROP;
}
nf_bridge->physoutdev = skb->dev;
if (pf == NFPROTO_IPV4)
skb->protocol = htons(ETH_P_IP);
else
skb->protocol = htons(ETH_P_IPV6);
NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb,
brnf_get_logical_dev(skb, state->in, state->net),
@ -641,8 +625,7 @@ static unsigned int br_nf_forward_ip(void *priv,
return NF_STOLEN;
}
static unsigned int br_nf_forward_arp(void *priv,
struct sk_buff *skb,
static unsigned int br_nf_forward_arp(struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct net_bridge_port *p;
@ -659,11 +642,8 @@ static unsigned int br_nf_forward_arp(void *priv,
if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES))
return NF_ACCEPT;
if (!IS_ARP(skb)) {
if (!is_vlan_arp(skb, state->net))
return NF_ACCEPT;
if (is_vlan_arp(skb, state->net))
nf_bridge_pull_encap_header(skb);
}
if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr))))
return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0);
@ -680,6 +660,28 @@ static unsigned int br_nf_forward_arp(void *priv,
return NF_STOLEN;
}
/* This is the 'purely bridged' case. For IP, we pass the packet to
* netfilter with indev and outdev set to the bridge device,
* but we are still able to filter on the 'real' indev/outdev
* because of the physdev module. For ARP, indev and outdev are the
* bridge ports.
*/
static unsigned int br_nf_forward(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
is_pppoe_ip(skb, state->net))
return br_nf_forward_ip(skb, state, NFPROTO_IPV4);
if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) ||
is_pppoe_ipv6(skb, state->net))
return br_nf_forward_ip(skb, state, NFPROTO_IPV6);
if (IS_ARP(skb) || is_vlan_arp(skb, state->net))
return br_nf_forward_arp(skb, state);
return NF_ACCEPT;
}
static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct brnf_frag_data *data;
@ -937,13 +939,7 @@ static const struct nf_hook_ops br_nf_ops[] = {
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_forward_ip,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF - 1,
},
{
.hook = br_nf_forward_arp,
.hook = br_nf_forward,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF,

View File

@ -11,8 +11,6 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_labels.h>
static DEFINE_SPINLOCK(nf_connlabels_lock);
static int replace_u32(u32 *address, u32 mask, u32 new)
{
u32 old, tmp;
@ -60,23 +58,24 @@ EXPORT_SYMBOL_GPL(nf_connlabels_replace);
int nf_connlabels_get(struct net *net, unsigned int bits)
{
int v;
if (BIT_WORD(bits) >= NF_CT_LABELS_MAX_SIZE / sizeof(long))
return -ERANGE;
spin_lock(&nf_connlabels_lock);
net->ct.labels_used++;
spin_unlock(&nf_connlabels_lock);
BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX);
v = atomic_inc_return_relaxed(&net->ct.labels_used);
WARN_ON_ONCE(v <= 0);
return 0;
}
EXPORT_SYMBOL_GPL(nf_connlabels_get);
void nf_connlabels_put(struct net *net)
{
spin_lock(&nf_connlabels_lock);
net->ct.labels_used--;
spin_unlock(&nf_connlabels_lock);
int v = atomic_dec_return_relaxed(&net->ct.labels_used);
WARN_ON_ONCE(v < 0);
}
EXPORT_SYMBOL_GPL(nf_connlabels_put);

File diff suppressed because it is too large Load Diff

View File

@ -44,33 +44,34 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv,
return 0;
}
static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
struct nft_regs *regs)
static struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
const struct nft_expr *expr,
struct nft_regs *regs)
{
const struct nft_dynset *priv = nft_expr_priv(expr);
struct nft_set_ext *ext;
void *elem_priv;
u64 timeout;
void *elem;
if (!atomic_add_unless(&set->nelems, 1, set->size))
return NULL;
timeout = priv->timeout ? : set->timeout;
elem = nft_set_elem_init(set, &priv->tmpl,
&regs->data[priv->sreg_key], NULL,
&regs->data[priv->sreg_data],
timeout, 0, GFP_ATOMIC);
if (IS_ERR(elem))
elem_priv = nft_set_elem_init(set, &priv->tmpl,
&regs->data[priv->sreg_key], NULL,
&regs->data[priv->sreg_data],
timeout, 0, GFP_ATOMIC);
if (IS_ERR(elem_priv))
goto err1;
ext = nft_set_elem_ext(set, elem);
ext = nft_set_elem_ext(set, elem_priv);
if (priv->num_exprs && nft_dynset_expr_setup(priv, ext) < 0)
goto err2;
return elem;
return elem_priv;
err2:
nft_set_elem_destroy(set, elem, false);
nft_set_elem_destroy(set, elem_priv, false);
err1:
if (set->size)
atomic_dec(&set->nelems);

View File

@ -13,6 +13,7 @@
#include <net/netfilter/nf_tables_core.h>
struct nft_bitmap_elem {
struct nft_elem_priv priv;
struct list_head head;
struct nft_set_ext ext;
};
@ -104,8 +105,9 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
return NULL;
}
static void *nft_bitmap_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
static struct nft_elem_priv *
nft_bitmap_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
const struct nft_bitmap *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
@ -116,23 +118,23 @@ static void *nft_bitmap_get(const struct net *net, const struct nft_set *set,
!nft_set_elem_active(&be->ext, genmask))
continue;
return be;
return &be->priv;
}
return ERR_PTR(-ENOENT);
}
static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext)
struct nft_elem_priv **elem_priv)
{
struct nft_bitmap_elem *new = nft_elem_priv_cast(elem->priv), *be;
struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *new = elem->priv, *be;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
be = nft_bitmap_elem_find(set, new, genmask);
if (be) {
*ext = &be->ext;
*elem_priv = &be->priv;
return -EEXIST;
}
@ -144,12 +146,11 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
return 0;
}
static void nft_bitmap_remove(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
static void nft_bitmap_remove(const struct net *net, const struct nft_set *set,
struct nft_elem_priv *elem_priv)
{
struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv);
struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be = elem->priv;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
@ -161,10 +162,10 @@ static void nft_bitmap_remove(const struct net *net,
static void nft_bitmap_activate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
struct nft_elem_priv *elem_priv)
{
struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv);
struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be = elem->priv;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
@ -174,28 +175,27 @@ static void nft_bitmap_activate(const struct net *net,
nft_set_elem_change_active(net, set, &be->ext);
}
static bool nft_bitmap_flush(const struct net *net,
const struct nft_set *set, void *_be)
static void nft_bitmap_flush(const struct net *net,
const struct nft_set *set,
struct nft_elem_priv *elem_priv)
{
struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv);
struct nft_bitmap *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
struct nft_bitmap_elem *be = _be;
u32 idx, off;
nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
/* Enter 10 state, similar to deactivation. */
priv->bitmap[idx] &= ~(genmask << off);
nft_set_elem_change_active(net, set, &be->ext);
return true;
}
static void *nft_bitmap_deactivate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
static struct nft_elem_priv *
nft_bitmap_deactivate(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_bitmap_elem *this = nft_elem_priv_cast(elem->priv), *be;
struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *this = elem->priv, *be;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
@ -209,7 +209,7 @@ static void *nft_bitmap_deactivate(const struct net *net,
priv->bitmap[idx] &= ~(genmask << off);
nft_set_elem_change_active(net, set, &be->ext);
return be;
return &be->priv;
}
static void nft_bitmap_walk(const struct nft_ctx *ctx,
@ -218,7 +218,6 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
{
const struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be;
struct nft_set_elem elem;
list_for_each_entry_rcu(be, &priv->list, head) {
if (iter->count < iter->skip)
@ -226,9 +225,7 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
if (!nft_set_elem_active(&be->ext, iter->genmask))
goto cont;
elem.priv = be;
iter->err = iter->fn(ctx, set, iter, &elem);
iter->err = iter->fn(ctx, set, iter, &be->priv);
if (iter->err < 0)
return;
@ -265,6 +262,8 @@ static int nft_bitmap_init(const struct nft_set *set,
{
struct nft_bitmap *priv = nft_set_priv(set);
BUILD_BUG_ON(offsetof(struct nft_bitmap_elem, priv) != 0);
INIT_LIST_HEAD(&priv->list);
priv->bitmap_size = nft_bitmap_size(set->klen);
@ -278,7 +277,7 @@ static void nft_bitmap_destroy(const struct nft_ctx *ctx,
struct nft_bitmap_elem *be, *n;
list_for_each_entry_safe(be, n, &priv->list, head)
nf_tables_set_elem_destroy(ctx, set, be);
nf_tables_set_elem_destroy(ctx, set, &be->priv);
}
static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,

View File

@ -27,6 +27,7 @@ struct nft_rhash {
};
struct nft_rhash_elem {
struct nft_elem_priv priv;
struct rhash_head node;
struct nft_set_ext ext;
};
@ -95,8 +96,9 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
return !!he;
}
static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
static struct nft_elem_priv *
nft_rhash_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he;
@ -108,13 +110,14 @@ static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
return he;
return &he->priv;
return ERR_PTR(-ENOENT);
}
static bool nft_rhash_update(struct nft_set *set, const u32 *key,
void *(*new)(struct nft_set *,
struct nft_elem_priv *
(*new)(struct nft_set *,
const struct nft_expr *,
struct nft_regs *regs),
const struct nft_expr *expr,
@ -123,6 +126,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
{
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he, *prev;
struct nft_elem_priv *elem_priv;
struct nft_rhash_cmp_arg arg = {
.genmask = NFT_GENMASK_ANY,
.set = set,
@ -133,10 +137,11 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
if (he != NULL)
goto out;
he = new(set, expr, regs);
if (he == NULL)
elem_priv = new(set, expr, regs);
if (!elem_priv)
goto err1;
he = nft_elem_priv_cast(elem_priv);
prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
nft_rhash_params);
if (IS_ERR(prev))
@ -144,7 +149,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
/* Another cpu may race to insert the element with the same key */
if (prev) {
nft_set_elem_destroy(set, he, true);
nft_set_elem_destroy(set, &he->priv, true);
atomic_dec(&set->nelems);
he = prev;
}
@ -154,7 +159,7 @@ out:
return true;
err2:
nft_set_elem_destroy(set, he, true);
nft_set_elem_destroy(set, &he->priv, true);
atomic_dec(&set->nelems);
err1:
return false;
@ -162,10 +167,10 @@ err1:
static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext)
struct nft_elem_priv **elem_priv)
{
struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv);
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he = elem->priv;
struct nft_rhash_cmp_arg arg = {
.genmask = nft_genmask_next(net),
.set = set,
@ -178,33 +183,32 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
if (IS_ERR(prev))
return PTR_ERR(prev);
if (prev) {
*ext = &prev->ext;
*elem_priv = &prev->priv;
return -EEXIST;
}
return 0;
}
static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
struct nft_elem_priv *elem_priv)
{
struct nft_rhash_elem *he = elem->priv;
struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &he->ext);
}
static bool nft_rhash_flush(const struct net *net,
const struct nft_set *set, void *priv)
static void nft_rhash_flush(const struct net *net,
const struct nft_set *set,
struct nft_elem_priv *elem_priv)
{
struct nft_rhash_elem *he = priv;
struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &he->ext);
return true;
}
static void *nft_rhash_deactivate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
static struct nft_elem_priv *
nft_rhash_deactivate(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he;
@ -221,15 +225,15 @@ static void *nft_rhash_deactivate(const struct net *net,
rcu_read_unlock();
return he;
return &he->priv;
}
static void nft_rhash_remove(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
struct nft_elem_priv *elem_priv)
{
struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he = elem->priv;
rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
}
@ -260,7 +264,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he;
struct rhashtable_iter hti;
struct nft_set_elem elem;
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
@ -280,9 +283,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (!nft_set_elem_active(&he->ext, iter->genmask))
goto cont;
elem.priv = he;
iter->err = iter->fn(ctx, set, iter, &elem);
iter->err = iter->fn(ctx, set, iter, &he->priv);
if (iter->err < 0)
break;
@ -406,6 +407,8 @@ static int nft_rhash_init(const struct nft_set *set,
struct rhashtable_params params = nft_rhash_params;
int err;
BUILD_BUG_ON(offsetof(struct nft_rhash_elem, priv) != 0);
params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT;
params.key_len = set->klen;
@ -428,8 +431,9 @@ struct nft_rhash_ctx {
static void nft_rhash_elem_destroy(void *ptr, void *arg)
{
struct nft_rhash_ctx *rhash_ctx = arg;
struct nft_rhash_elem *he = ptr;
nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr);
nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, &he->priv);
}
static void nft_rhash_destroy(const struct nft_ctx *ctx,
@ -476,6 +480,7 @@ struct nft_hash {
};
struct nft_hash_elem {
struct nft_elem_priv priv;
struct hlist_node node;
struct nft_set_ext ext;
};
@ -501,8 +506,9 @@ bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
return false;
}
static void *nft_hash_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
static struct nft_elem_priv *
nft_hash_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
@ -514,7 +520,7 @@ static void *nft_hash_get(const struct net *net, const struct nft_set *set,
hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) &&
nft_set_elem_active(&he->ext, genmask))
return he;
return &he->priv;
}
return ERR_PTR(-ENOENT);
}
@ -562,9 +568,9 @@ static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv,
static int nft_hash_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext)
struct nft_elem_priv **elem_priv)
{
struct nft_hash_elem *this = elem->priv, *he;
struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he;
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
u32 hash;
@ -574,7 +580,7 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set,
if (!memcmp(nft_set_ext_key(&this->ext),
nft_set_ext_key(&he->ext), set->klen) &&
nft_set_elem_active(&he->ext, genmask)) {
*ext = &he->ext;
*elem_priv = &he->priv;
return -EEXIST;
}
}
@ -583,28 +589,28 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set,
}
static void nft_hash_activate(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
struct nft_elem_priv *elem_priv)
{
struct nft_hash_elem *he = elem->priv;
struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &he->ext);
}
static bool nft_hash_flush(const struct net *net,
const struct nft_set *set, void *priv)
static void nft_hash_flush(const struct net *net,
const struct nft_set *set,
struct nft_elem_priv *elem_priv)
{
struct nft_hash_elem *he = priv;
struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &he->ext);
return true;
}
static void *nft_hash_deactivate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
static struct nft_elem_priv *
nft_hash_deactivate(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he;
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *this = elem->priv, *he;
u8 genmask = nft_genmask_next(net);
u32 hash;
@ -614,7 +620,7 @@ static void *nft_hash_deactivate(const struct net *net,
set->klen) &&
nft_set_elem_active(&he->ext, genmask)) {
nft_set_elem_change_active(net, set, &he->ext);
return he;
return &he->priv;
}
}
return NULL;
@ -622,9 +628,9 @@ static void *nft_hash_deactivate(const struct net *net,
static void nft_hash_remove(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
struct nft_elem_priv *elem_priv)
{
struct nft_hash_elem *he = elem->priv;
struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
hlist_del_rcu(&he->node);
}
@ -634,7 +640,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he;
struct nft_set_elem elem;
int i;
for (i = 0; i < priv->buckets; i++) {
@ -644,9 +649,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (!nft_set_elem_active(&he->ext, iter->genmask))
goto cont;
elem.priv = he;
iter->err = iter->fn(ctx, set, iter, &elem);
iter->err = iter->fn(ctx, set, iter, &he->priv);
if (iter->err < 0)
return;
cont:
@ -685,7 +688,7 @@ static void nft_hash_destroy(const struct nft_ctx *ctx,
for (i = 0; i < priv->buckets; i++) {
hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
hlist_del_rcu(&he->node);
nf_tables_set_elem_destroy(ctx, set, he);
nf_tables_set_elem_destroy(ctx, set, &he->priv);
}
}
}

View File

@ -599,11 +599,18 @@ out:
* @elem: nftables API element representation containing key data
* @flags: Unused
*/
static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
static struct nft_elem_priv *
nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
return pipapo_get(net, set, (const u8 *)elem->key.val.data,
nft_genmask_cur(net));
static struct nft_pipapo_elem *e;
e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
nft_genmask_cur(net));
if (IS_ERR(e))
return ERR_CAST(e);
return &e->priv;
}
/**
@ -1151,21 +1158,21 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
* @net: Network namespace
* @set: nftables API set representation
* @elem: nftables API element representation containing key data
* @ext2: Filled with pointer to &struct nft_set_ext in inserted element
* @elem_priv: Filled with pointer to &struct nft_set_ext in inserted element
*
* Return: 0 on success, error pointer on failure.
*/
static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext2)
struct nft_elem_priv **elem_priv)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *start = (const u8 *)elem->key.val.data, *end;
struct nft_pipapo_elem *e = elem->priv, *dup;
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m = priv->clone;
u8 genmask = nft_genmask_next(net);
struct nft_pipapo_elem *e, *dup;
struct nft_pipapo_field *f;
const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
@ -1188,7 +1195,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) &&
!memcmp(end, dup_end->data, sizeof(*dup_end->data))) {
*ext2 = &dup->ext;
*elem_priv = &dup->priv;
return -EEXIST;
}
@ -1203,7 +1210,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (PTR_ERR(dup) != -ENOENT) {
if (IS_ERR(dup))
return PTR_ERR(dup);
*ext2 = &dup->ext;
*elem_priv = &dup->priv;
return -ENOTEMPTY;
}
@ -1263,7 +1270,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
put_cpu_ptr(m->scratch);
}
*ext2 = &e->ext;
e = nft_elem_priv_cast(elem->priv);
*elem_priv = &e->priv;
pipapo_map(m, rulemap, e);
@ -1540,11 +1548,7 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
struct nft_pipapo_elem *e)
{
struct nft_set_elem elem = {
.priv = e,
};
nft_setelem_data_deactivate(net, set, &elem);
nft_setelem_data_deactivate(net, set, &e->priv);
}
/**
@ -1731,7 +1735,7 @@ static void nft_pipapo_abort(const struct nft_set *set)
* nft_pipapo_activate() - Mark element reference as active given key, commit
* @net: Network namespace
* @set: nftables API set representation
* @elem: nftables API element representation containing key data
* @elem_priv: nftables API element representation containing key data
*
* On insertion, elements are added to a copy of the matching data currently
* in use for lookups, and not directly inserted into current lookup data. Both
@ -1740,9 +1744,9 @@ static void nft_pipapo_abort(const struct nft_set *set)
*/
static void nft_pipapo_activate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
struct nft_elem_priv *elem_priv)
{
struct nft_pipapo_elem *e = elem->priv;
struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &e->ext);
}
@ -1782,9 +1786,9 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
*
* Return: deactivated element if found, NULL otherwise.
*/
static void *nft_pipapo_deactivate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
static struct nft_elem_priv *
nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
@ -1795,7 +1799,7 @@ static void *nft_pipapo_deactivate(const struct net *net,
* nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive
* @net: Network namespace
* @set: nftables API set representation
* @elem: nftables API element representation containing key data
* @elem_priv: nftables API element representation containing key data
*
* This is functionally the same as nft_pipapo_deactivate(), with a slightly
* different interface, and it's also called once for each element in a set
@ -1809,13 +1813,12 @@ static void *nft_pipapo_deactivate(const struct net *net,
*
* Return: true if element was found and deactivated.
*/
static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set,
void *elem)
static void nft_pipapo_flush(const struct net *net, const struct nft_set *set,
struct nft_elem_priv *elem_priv)
{
struct nft_pipapo_elem *e = elem;
struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv);
return pipapo_deactivate(net, set, (const u8 *)nft_set_ext_key(&e->ext),
&e->ext);
nft_set_elem_change_active(net, set, &e->ext);
}
/**
@ -1938,7 +1941,7 @@ static bool pipapo_match_field(struct nft_pipapo_field *f,
* nft_pipapo_remove() - Remove element given key, commit
* @net: Network namespace
* @set: nftables API set representation
* @elem: nftables API element representation containing key data
* @elem_priv: nftables API element representation containing key data
*
* Similarly to nft_pipapo_activate(), this is used as commit operation by the
* API, but it's called once per element in the pending transaction, so we can't
@ -1946,14 +1949,15 @@ static bool pipapo_match_field(struct nft_pipapo_field *f,
* the matched element here, if any, and commit the updated matching data.
*/
static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
struct nft_elem_priv *elem_priv)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m = priv->clone;
struct nft_pipapo_elem *e = elem->priv;
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
const u8 *data;
e = nft_elem_priv_cast(elem_priv);
data = (const u8 *)nft_set_ext_key(&e->ext);
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
@ -2030,7 +2034,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
for (r = 0; r < f->rules; r++) {
struct nft_pipapo_elem *e;
struct nft_set_elem elem;
if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
continue;
@ -2040,9 +2043,7 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
e = f->mt[r].e;
elem.priv = e;
iter->err = iter->fn(ctx, set, iter, &elem);
iter->err = iter->fn(ctx, set, iter, &e->priv);
if (iter->err < 0)
goto out;
@ -2114,6 +2115,8 @@ static int nft_pipapo_init(const struct nft_set *set,
struct nft_pipapo_field *f;
int err, i, field_count;
BUILD_BUG_ON(offsetof(struct nft_pipapo_elem, priv) != 0);
field_count = desc->field_count ? : 1;
if (field_count > NFT_PIPAPO_MAX_FIELDS)
@ -2208,7 +2211,7 @@ static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx,
e = f->mt[r].e;
nf_tables_set_elem_destroy(ctx, set, e);
nf_tables_set_elem_destroy(ctx, set, &e->priv);
}
}

View File

@ -170,10 +170,12 @@ struct nft_pipapo_elem;
/**
* struct nft_pipapo_elem - API-facing representation of single set element
* @priv: element placeholder
* @ext: nftables API extensions
*/
struct nft_pipapo_elem {
struct nft_set_ext ext;
struct nft_elem_priv priv;
struct nft_set_ext ext;
};
int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,

View File

@ -19,10 +19,11 @@ struct nft_rbtree {
struct rb_root root;
rwlock_t lock;
seqcount_rwlock_t count;
struct delayed_work gc_work;
unsigned long last_gc;
};
struct nft_rbtree_elem {
struct nft_elem_priv priv;
struct rb_node node;
struct nft_set_ext ext;
};
@ -48,8 +49,7 @@ static int nft_rbtree_cmp(const struct nft_set *set,
static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
{
return nft_set_elem_expired(&rbe->ext) ||
nft_set_elem_is_dead(&rbe->ext);
return nft_set_elem_expired(&rbe->ext);
}
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
@ -197,8 +197,9 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
return false;
}
static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
static struct nft_elem_priv *
nft_rbtree_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_rbtree *priv = nft_set_priv(set);
unsigned int seq = read_seqcount_begin(&priv->count);
@ -209,27 +210,25 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
if (ret || !read_seqcount_retry(&priv->count, seq))
return rbe;
return &rbe->priv;
read_lock_bh(&priv->lock);
seq = read_seqcount_begin(&priv->count);
ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
if (!ret)
rbe = ERR_PTR(-ENOENT);
read_unlock_bh(&priv->lock);
return rbe;
if (!ret)
return ERR_PTR(-ENOENT);
return &rbe->priv;
}
static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
struct nft_rbtree *priv,
struct nft_rbtree_elem *rbe)
static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set,
struct nft_rbtree *priv,
struct nft_rbtree_elem *rbe)
{
struct nft_set_elem elem = {
.priv = rbe,
};
nft_setelem_data_deactivate(net, set, &elem);
lockdep_assert_held_write(&priv->lock);
nft_setelem_data_deactivate(net, set, &rbe->priv);
rb_erase(&rbe->node, &priv->root);
}
@ -263,7 +262,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
rbe_prev = NULL;
if (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
nft_rbtree_gc_remove(net, set, priv, rbe_prev);
nft_rbtree_gc_elem_remove(net, set, priv, rbe_prev);
/* There is always room in this trans gc for this element,
* memory allocation never actually happens, hence, the warning
@ -277,7 +276,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
nft_trans_gc_elem_add(gc, rbe_prev);
}
nft_rbtree_gc_remove(net, set, priv, rbe);
nft_rbtree_gc_elem_remove(net, set, priv, rbe);
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc))
return ERR_PTR(-ENOMEM);
@ -307,7 +306,7 @@ static bool nft_rbtree_update_first(const struct nft_set *set,
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_set_ext **ext)
struct nft_elem_priv **elem_priv)
{
struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
struct rb_node *node, *next, *parent, **p, *first = NULL;
@ -424,7 +423,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*/
if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
*ext = &rbe_ge->ext;
*elem_priv = &rbe_ge->priv;
return -EEXIST;
}
@ -433,7 +432,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*/
if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
*ext = &rbe_le->ext;
*elem_priv = &rbe_le->priv;
return -EEXIST;
}
@ -485,10 +484,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext)
struct nft_elem_priv **elem_priv)
{
struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv);
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->priv;
int err;
do {
@ -499,7 +498,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
err = __nft_rbtree_insert(net, set, rbe, ext);
err = __nft_rbtree_insert(net, set, rbe, elem_priv);
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
} while (err == -EAGAIN);
@ -507,13 +506,8 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
return err;
}
static void nft_rbtree_remove(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->priv;
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
rb_erase(&rbe->node, &priv->root);
@ -521,32 +515,41 @@ static void nft_rbtree_remove(const struct net *net,
write_unlock_bh(&priv->lock);
}
static void nft_rbtree_remove(const struct net *net,
const struct nft_set *set,
struct nft_elem_priv *elem_priv)
{
struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
struct nft_rbtree *priv = nft_set_priv(set);
nft_rbtree_erase(priv, rbe);
}
static void nft_rbtree_activate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
struct nft_elem_priv *elem_priv)
{
struct nft_rbtree_elem *rbe = elem->priv;
struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &rbe->ext);
}
static bool nft_rbtree_flush(const struct net *net,
const struct nft_set *set, void *priv)
static void nft_rbtree_flush(const struct net *net,
const struct nft_set *set,
struct nft_elem_priv *elem_priv)
{
struct nft_rbtree_elem *rbe = priv;
struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &rbe->ext);
return true;
}
static void *nft_rbtree_deactivate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
static struct nft_elem_priv *
nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv);
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
struct nft_rbtree_elem *rbe, *this = elem->priv;
u8 genmask = nft_genmask_next(net);
int d;
@ -574,8 +577,8 @@ static void *nft_rbtree_deactivate(const struct net *net,
parent = parent->rb_left;
continue;
}
nft_rbtree_flush(net, set, rbe);
return rbe;
nft_rbtree_flush(net, set, &rbe->priv);
return &rbe->priv;
}
}
return NULL;
@ -587,7 +590,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
struct nft_set_elem elem;
struct rb_node *node;
read_lock_bh(&priv->lock);
@ -599,9 +601,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
if (!nft_set_elem_active(&rbe->ext, iter->genmask))
goto cont;
elem.priv = rbe;
iter->err = iter->fn(ctx, set, iter, &elem);
iter->err = iter->fn(ctx, set, iter, &rbe->priv);
if (iter->err < 0) {
read_unlock_bh(&priv->lock);
return;
@ -612,45 +612,36 @@ cont:
read_unlock_bh(&priv->lock);
}
static void nft_rbtree_gc(struct work_struct *work)
static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
struct nft_rbtree *priv,
struct nft_rbtree_elem *rbe)
{
nft_setelem_data_deactivate(net, set, &rbe->priv);
nft_rbtree_erase(priv, rbe);
}
static void nft_rbtree_gc(struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
struct nftables_pernet *nft_net;
struct nft_rbtree *priv;
struct rb_node *node, *next;
struct nft_trans_gc *gc;
struct rb_node *node;
struct nft_set *set;
unsigned int gc_seq;
struct net *net;
priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
nft_net = nft_pernet(net);
gc_seq = READ_ONCE(nft_net->gc_seq);
if (nft_set_gc_is_pending(set))
goto done;
gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
if (!gc)
goto done;
return;
read_lock_bh(&priv->lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
/* Ruleset has been updated, try later. */
if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
nft_trans_gc_destroy(gc);
gc = NULL;
goto try_later;
}
for (node = rb_first(&priv->root); node ; node = next) {
next = rb_next(node);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
if (nft_set_elem_is_dead(&rbe->ext))
goto dead_elem;
/* elements are reversed in the rbtree for historical reasons,
* from highest to lowest value, that is why end element is
* always visited before the start element.
@ -662,37 +653,34 @@ static void nft_rbtree_gc(struct work_struct *work)
if (!nft_set_elem_expired(&rbe->ext))
continue;
nft_set_elem_dead(&rbe->ext);
if (!rbe_end)
continue;
nft_set_elem_dead(&rbe_end->ext);
gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
goto try_later;
nft_trans_gc_elem_add(gc, rbe_end);
rbe_end = NULL;
dead_elem:
gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
/* end element needs to be removed first, it has
* no timeout extension.
*/
if (rbe_end) {
nft_rbtree_gc_remove(net, set, priv, rbe_end);
nft_trans_gc_elem_add(gc, rbe_end);
rbe_end = NULL;
}
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
goto try_later;
nft_rbtree_gc_remove(net, set, priv, rbe);
nft_trans_gc_elem_add(gc, rbe);
}
gc = nft_trans_gc_catchall_async(gc, gc_seq);
try_later:
read_unlock_bh(&priv->lock);
if (gc)
nft_trans_gc_queue_async_done(gc);
done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
if (gc) {
gc = nft_trans_gc_catchall_sync(gc);
nft_trans_gc_queue_sync_done(gc);
priv->last_gc = jiffies;
}
}
static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
@ -707,15 +695,12 @@ static int nft_rbtree_init(const struct nft_set *set,
{
struct nft_rbtree *priv = nft_set_priv(set);
BUILD_BUG_ON(offsetof(struct nft_rbtree_elem, priv) != 0);
rwlock_init(&priv->lock);
seqcount_rwlock_init(&priv->count, &priv->lock);
priv->root = RB_ROOT;
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
if (set->flags & NFT_SET_TIMEOUT)
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
return 0;
}
@ -726,12 +711,10 @@ static void nft_rbtree_destroy(const struct nft_ctx *ctx,
struct nft_rbtree_elem *rbe;
struct rb_node *node;
cancel_delayed_work_sync(&priv->gc_work);
rcu_barrier();
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
nf_tables_set_elem_destroy(ctx, set, rbe);
nf_tables_set_elem_destroy(ctx, set, &rbe->priv);
}
}
@ -753,6 +736,21 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
static void nft_rbtree_commit(struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
nft_rbtree_gc(set);
}
static void nft_rbtree_gc_init(const struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
priv->last_gc = jiffies;
}
const struct nft_set_type nft_set_rbtree_type = {
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
.ops = {
@ -766,6 +764,8 @@ const struct nft_set_type nft_set_rbtree_type = {
.deactivate = nft_rbtree_deactivate,
.flush = nft_rbtree_flush,
.activate = nft_rbtree_activate,
.commit = nft_rbtree_commit,
.gc_init = nft_rbtree_gc_init,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
.get = nft_rbtree_get,