From 74f99482eae03195ced512b440b31d62bdb6e943 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 21 Apr 2020 10:04:16 -0500 Subject: [PATCH 1/6] netfilter: nf_conntrack: add IPS_HW_OFFLOAD status bit This bit indicates that the conntrack entry is offloaded to hardware flow table. nf_conntrack entry will be tagged with [HW_OFFLOAD] if it's offload to hardware. cat /proc/net/nf_conntrack ipv4 2 tcp 6 \ src=1.1.1.17 dst=1.1.1.16 sport=56394 dport=5001 \ src=1.1.1.16 dst=1.1.1.17 sport=5001 dport=56394 [HW_OFFLOAD] \ mark=0 zone=0 use=3 Note that HW_OFFLOAD/OFFLOAD/ASSURED are mutually exclusive. Changelog: * V1->V2: - Remove check of lastused from stats. It was meant for cases such as removing driver module while traffic still running. Better to handle such cases from garbage collector. Signed-off-by: Bodong Wang Reviewed-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 8 ++++++-- net/netfilter/nf_conntrack_standalone.c | 4 +++- net/netfilter/nf_flow_table_offload.c | 3 +++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index b6f0bb1dc799..4b3395082d15 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -114,15 +114,19 @@ enum ip_conntrack_status { IPS_OFFLOAD_BIT = 14, IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT), + /* Conntrack has been offloaded to hardware. */ + IPS_HW_OFFLOAD_BIT = 15, + IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT), + /* Be careful here, modifying these bits can make things messy, * so don't let users modify them directly. */ IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_UNTRACKED | - IPS_OFFLOAD), + IPS_OFFLOAD | IPS_HW_OFFLOAD), - __IPS_MAX_BIT = 15, + __IPS_MAX_BIT = 16, }; /* Connection tracking event types */ diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 9b57330c81f8..5a3e6c43ee68 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -348,7 +348,9 @@ static int ct_seq_show(struct seq_file *s, void *v) if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) goto release; - if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) + if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status)) + seq_puts(s, "[HW_OFFLOAD] "); + else if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) seq_puts(s, "[OFFLOAD] "); else if (test_bit(IPS_ASSURED_BIT, &ct->status)) seq_puts(s, "[ASSURED] "); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index e3b099c14eff..a2abb0feab7f 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -754,12 +754,15 @@ static void flow_offload_work_add(struct flow_offload_work *offload) err = flow_offload_rule_add(offload, flow_rule); if (err < 0) set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags); + else + set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); nf_flow_offload_destroy(flow_rule); } static void flow_offload_work_del(struct flow_offload_work *offload) { + clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL); flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY); set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags); From fdb9c405e35bdc6e305b9b4e20ebc141ed14fc81 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Apr 2020 21:55:33 +0200 Subject: [PATCH 2/6] netfilter: nf_tables: allow up to 64 bytes in the set element data area So far, the set elements could store up to 128-bits in the data area. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++++ net/netfilter/nf_tables_api.c | 38 +++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4ff7c81e6717..d4e29c952c40 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -243,6 +243,10 @@ struct nft_set_elem { u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; struct nft_data val; } key_end; + union { + u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; + struct nft_data val; + } data; void *priv; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9780bd93b7e4..3558e76e2733 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4669,6 +4669,25 @@ static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set, return 0; } +static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, + struct nft_data_desc *desc, + struct nft_data *data, + struct nlattr *attr) +{ + int err; + + err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr); + if (err < 0) + return err; + + if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) { + nft_data_release(data, desc->type); + return -EINVAL; + } + + return 0; +} + static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { @@ -4946,7 +4965,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr = NULL; struct nft_userdata *udata; struct nft_data_desc desc; - struct nft_data data; enum nft_registers dreg; struct nft_trans *trans; u32 flags = 0; @@ -5072,15 +5090,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } if (nla[NFTA_SET_ELEM_DATA] != NULL) { - err = nft_data_init(ctx, &data, sizeof(data), &desc, - nla[NFTA_SET_ELEM_DATA]); + err = nft_setelem_parse_data(ctx, set, &desc, &elem.data.val, + nla[NFTA_SET_ELEM_DATA]); if (err < 0) goto err_parse_key_end; - err = -EINVAL; - if (set->dtype != NFT_DATA_VERDICT && desc.len != set->dlen) - goto err_parse_data; - dreg = nft_type_to_reg(set->dtype); list_for_each_entry(binding, &set->bindings, list) { struct nft_ctx bind_ctx = { @@ -5094,14 +5108,14 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, continue; err = nft_validate_register_store(&bind_ctx, dreg, - &data, + &elem.data.val, desc.type, desc.len); if (err < 0) goto err_parse_data; if (desc.type == NFT_DATA_VERDICT && - (data.verdict.code == NFT_GOTO || - data.verdict.code == NFT_JUMP)) + (elem.data.val.verdict.code == NFT_GOTO || + elem.data.val.verdict.code == NFT_JUMP)) nft_validate_state_update(ctx->net, NFT_VALIDATE_NEED); } @@ -5123,7 +5137,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = -ENOMEM; elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, - elem.key_end.val.data, data.data, + elem.key_end.val.data, elem.data.val.data, timeout, expiration, GFP_KERNEL); if (elem.priv == NULL) goto err_parse_data; @@ -5201,7 +5215,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nf_tables_set_elem_destroy(ctx, set, elem.priv); err_parse_data: if (nla[NFTA_SET_ELEM_DATA] != NULL) - nft_data_release(&data, desc.type); + nft_data_release(&elem.data.val, desc.type); err_parse_key_end: nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); err_parse_key: From 0d7c83463fdf7841350f37960a7abadd3e650b41 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Apr 2020 21:55:34 +0200 Subject: [PATCH 3/6] netfilter: nft_nat: return EOPNOTSUPP if type or flags are not supported Instead of EINVAL which should be used for malformed netlink messages. Fixes: eb31628e37a0 ("netfilter: nf_tables: Add support for IPv6 NAT") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 8b44a4de5329..bb49a217635e 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -129,7 +129,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->type = NF_NAT_MANIP_DST; break; default: - return -EINVAL; + return -EOPNOTSUPP; } if (tb[NFTA_NAT_FAMILY] == NULL) @@ -196,7 +196,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (tb[NFTA_NAT_FLAGS]) { priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS])); if (priv->flags & ~NF_NAT_RANGE_MASK) - return -EINVAL; + return -EOPNOTSUPP; } return nf_ct_netns_get(ctx->net, family); From 4566aa440008103c9bd364f38c03ca5309acc8f4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Apr 2020 21:55:35 +0200 Subject: [PATCH 4/6] netfilter: nft_nat: set flags from initialization path This patch sets the NAT flags from the control plane path. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index bb49a217635e..5c7ff213c030 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -55,7 +55,6 @@ static void nft_nat_eval(const struct nft_expr *expr, ®s->data[priv->sreg_addr_max], sizeof(range.max_addr.ip6)); } - range.flags |= NF_NAT_RANGE_MAP_IPS; } if (priv->sreg_proto_min) { @@ -63,10 +62,9 @@ static void nft_nat_eval(const struct nft_expr *expr, ®s->data[priv->sreg_proto_min]); range.max_proto.all = (__force __be16)nft_reg_load16( ®s->data[priv->sreg_proto_max]); - range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } - range.flags |= priv->flags; + range.flags = priv->flags; regs->verdict.code = nf_nat_setup_info(ct, &range, priv->type); } @@ -169,6 +167,8 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, } else { priv->sreg_addr_max = priv->sreg_addr_min; } + + priv->flags |= NF_NAT_RANGE_MAP_IPS; } plen = sizeof_field(struct nf_nat_range, min_addr.all); @@ -191,10 +191,12 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, } else { priv->sreg_proto_max = priv->sreg_proto_min; } + + priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } if (tb[NFTA_NAT_FLAGS]) { - priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS])); + priv->flags |= ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS])); if (priv->flags & ~NF_NAT_RANGE_MASK) return -EOPNOTSUPP; } From acd766e31bb96b90c2dc4954f86e573c9ac16c66 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Apr 2020 21:55:36 +0200 Subject: [PATCH 5/6] netfilter: nft_nat: add helper function to set up NAT address and protocol This patch add nft_nat_setup_addr() and nft_nat_setup_proto() to set up the NAT mangling. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 56 +++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 5c7ff213c030..7442aa8b1555 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -30,6 +30,36 @@ struct nft_nat { u16 flags; }; +static void nft_nat_setup_addr(struct nf_nat_range2 *range, + const struct nft_regs *regs, + const struct nft_nat *priv) +{ + switch (priv->family) { + case AF_INET: + range->min_addr.ip = (__force __be32) + regs->data[priv->sreg_addr_min]; + range->max_addr.ip = (__force __be32) + regs->data[priv->sreg_addr_max]; + break; + case AF_INET6: + memcpy(range->min_addr.ip6, ®s->data[priv->sreg_addr_min], + sizeof(range->min_addr.ip6)); + memcpy(range->max_addr.ip6, ®s->data[priv->sreg_addr_max], + sizeof(range->max_addr.ip6)); + break; + } +} + +static void nft_nat_setup_proto(struct nf_nat_range2 *range, + const struct nft_regs *regs, + const struct nft_nat *priv) +{ + range->min_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_min]); + range->max_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_max]); +} + static void nft_nat_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -40,29 +70,11 @@ static void nft_nat_eval(const struct nft_expr *expr, struct nf_nat_range2 range; memset(&range, 0, sizeof(range)); - if (priv->sreg_addr_min) { - if (priv->family == AF_INET) { - range.min_addr.ip = (__force __be32) - regs->data[priv->sreg_addr_min]; - range.max_addr.ip = (__force __be32) - regs->data[priv->sreg_addr_max]; + if (priv->sreg_addr_min) + nft_nat_setup_addr(&range, regs, priv); - } else { - memcpy(range.min_addr.ip6, - ®s->data[priv->sreg_addr_min], - sizeof(range.min_addr.ip6)); - memcpy(range.max_addr.ip6, - ®s->data[priv->sreg_addr_max], - sizeof(range.max_addr.ip6)); - } - } - - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - } + if (priv->sreg_proto_min) + nft_nat_setup_proto(&range, regs, priv); range.flags = priv->flags; From 3ff7ddb1353da9b535e65702704cbadea1da9a00 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Apr 2020 21:55:37 +0200 Subject: [PATCH 6/6] netfilter: nft_nat: add netmap support This patch allows you to NAT the network address prefix onto another network address prefix, a.k.a. netmapping. Userspace must specify the NF_NAT_RANGE_NETMAP flag and the prefix address through the NFTA_NAT_REG_ADDR_MIN and NFTA_NAT_REG_ADDR_MAX netlink attributes. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_nat.h | 4 ++- net/netfilter/nft_nat.c | 46 ++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h index 4a95c0db14d4..a64586e77b24 100644 --- a/include/uapi/linux/netfilter/nf_nat.h +++ b/include/uapi/linux/netfilter/nf_nat.h @@ -11,6 +11,7 @@ #define NF_NAT_RANGE_PERSISTENT (1 << 3) #define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4) #define NF_NAT_RANGE_PROTO_OFFSET (1 << 5) +#define NF_NAT_RANGE_NETMAP (1 << 6) #define NF_NAT_RANGE_PROTO_RANDOM_ALL \ (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY) @@ -18,7 +19,8 @@ #define NF_NAT_RANGE_MASK \ (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED | \ NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT | \ - NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET) + NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \ + NF_NAT_RANGE_NETMAP) struct nf_nat_ipv4_range { unsigned int flags; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 7442aa8b1555..23a7bfd10521 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -60,6 +60,46 @@ static void nft_nat_setup_proto(struct nf_nat_range2 *range, nft_reg_load16(®s->data[priv->sreg_proto_max]); } +static void nft_nat_setup_netmap(struct nf_nat_range2 *range, + const struct nft_pktinfo *pkt, + const struct nft_nat *priv) +{ + struct sk_buff *skb = pkt->skb; + union nf_inet_addr new_addr; + __be32 netmask; + int i, len = 0; + + switch (priv->type) { + case NFT_NAT_SNAT: + if (nft_pf(pkt) == NFPROTO_IPV4) { + new_addr.ip = ip_hdr(skb)->saddr; + len = sizeof(struct in_addr); + } else { + new_addr.in6 = ipv6_hdr(skb)->saddr; + len = sizeof(struct in6_addr); + } + break; + case NFT_NAT_DNAT: + if (nft_pf(pkt) == NFPROTO_IPV4) { + new_addr.ip = ip_hdr(skb)->daddr; + len = sizeof(struct in_addr); + } else { + new_addr.in6 = ipv6_hdr(skb)->daddr; + len = sizeof(struct in6_addr); + } + break; + } + + for (i = 0; i < len / sizeof(__be32); i++) { + netmask = ~(range->min_addr.ip6[i] ^ range->max_addr.ip6[i]); + new_addr.ip6[i] &= ~netmask; + new_addr.ip6[i] |= range->min_addr.ip6[i] & netmask; + } + + range->min_addr = new_addr; + range->max_addr = new_addr; +} + static void nft_nat_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -70,8 +110,12 @@ static void nft_nat_eval(const struct nft_expr *expr, struct nf_nat_range2 range; memset(&range, 0, sizeof(range)); - if (priv->sreg_addr_min) + + if (priv->sreg_addr_min) { nft_nat_setup_addr(&range, regs, priv); + if (priv->flags & NF_NAT_RANGE_NETMAP) + nft_nat_setup_netmap(&range, pkt, priv); + } if (priv->sreg_proto_min) nft_nat_setup_proto(&range, regs, priv);