From dd2602d00f80dd8bf30f083fde20b4fbbcf82266 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 26 Sep 2016 21:23:37 +0800 Subject: [PATCH 01/12] netfilter: xt_multiport: Use switch case instead of multiple condition checks There are multiple equality condition checks in the original codes, so it is better to use switch case instead of them. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_multiport.c | 40 +++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index ac1d3c3d09e7..ec06fb1cb16f 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -42,29 +42,31 @@ ports_match_v1(const struct xt_multiport_v1 *minfo, e = minfo->ports[++i]; pr_debug("src or dst matches with %d-%d?\n", s, e); - if (minfo->flags == XT_MULTIPORT_SOURCE - && src >= s && src <= e) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_DESTINATION - && dst >= s && dst <= e) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_EITHER - && ((dst >= s && dst <= e) - || (src >= s && src <= e))) - return true ^ minfo->invert; + switch (minfo->flags) { + case XT_MULTIPORT_SOURCE: + return (src >= s && src <= e) ^ minfo->invert; + case XT_MULTIPORT_DESTINATION: + return (dst >= s && dst <= e) ^ minfo->invert; + case XT_MULTIPORT_EITHER: + return ((dst >= s && dst <= e) || + (src >= s && src <= e)) ^ minfo->invert; + default: + break; + } } else { /* exact port matching */ pr_debug("src or dst matches with %d?\n", s); - if (minfo->flags == XT_MULTIPORT_SOURCE - && src == s) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_DESTINATION - && dst == s) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_EITHER - && (src == s || dst == s)) - return true ^ minfo->invert; + switch (minfo->flags) { + case XT_MULTIPORT_SOURCE: + return (src == s) ^ minfo->invert; + case XT_MULTIPORT_DESTINATION: + return (dst == s) ^ minfo->invert; + case XT_MULTIPORT_EITHER: + return (src == s || dst == s) ^ minfo->invert; + default: + break; + } } } From 0813fbc913582feef688265f2a5d7c077d6a3d1f Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Fri, 7 Oct 2016 14:02:16 -0700 Subject: [PATCH 02/12] netfilter: nfnetlink_log: Use GFP_NOWARN for skb allocation Since the code explicilty falls back to a smaller allocation when the large one fails, we shouldn't complain when that happens. Signed-off-by: Calvin Owens Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index eb086a192c5a..7435505037b7 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -330,7 +330,7 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); - skb = alloc_skb(n, GFP_ATOMIC); + skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current From 5efa0fc6d7f7930b18801f07cefae8eeacd6ac02 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 18 Oct 2016 17:35:18 +0200 Subject: [PATCH 03/12] netfilter: nf_tables: allow expressions to return STOLEN Currently not supported, we'd oops as skb was (or is) free'd elsewhere. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 0dd5c695482f..70de32a6d5c0 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -178,6 +178,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) case NF_ACCEPT: case NF_DROP: case NF_QUEUE: + case NF_STOLEN: nft_trace_packet(&info, chain, rule, rulenum, NFT_TRACETYPE_RULE); return regs.verdict.code; From 0ecba4d9d1db51f670af7ba9049461116a3a2bea Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 19 Oct 2016 23:29:53 +0800 Subject: [PATCH 04/12] netfilter: nft_numgen: start round robin from zero Currently we start round robin from 1, but it's better to start round robin from 0. This is to keep consistent with xt_statistic in iptables. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_numgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 55bc5ab78d4a..a66b36097b8f 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -65,7 +65,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, return -EOVERFLOW; priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); - atomic_set(&priv->counter, 0); + atomic_set(&priv->counter, priv->modulus - 1); return nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, sizeof(u32)); From 96d9f2a72c719d985fdbae4e3d63ddc874947a7e Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 19 Oct 2016 23:31:29 +0800 Subject: [PATCH 05/12] netfilter: nft_meta: permit pkttype mangling in ip/ip6 prerouting After supporting this, we can combine it with hash expression to emulate the 'cluster match'. Suggested-by: Pablo Neira Ayuso Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 6c1e0246706e..64994023bf81 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -310,6 +310,11 @@ int nft_meta_set_validate(const struct nft_ctx *ctx, case NFPROTO_NETDEV: hooks = 1 << NF_NETDEV_INGRESS; break; + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_INET: + hooks = 1 << NF_INET_PRE_ROUTING; + break; default: return -EOPNOTSUPP; } From 254432613c588640f8b8b5c3641a3c27bbe14688 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 20 Oct 2016 18:07:14 +0200 Subject: [PATCH 06/12] netfilter: nft_ct: add notrack support This patch adds notrack support. I decided to add a new expression, given that this doesn't fit into the existing set operation. Notrack doesn't need a source register, and an hypothetical NFT_CT_NOTRACK key makes no sense since matching the untracked state is done through NFT_CT_STATE. I'm placing this new notrack expression into nft_ct.c, I think a single module is too much. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 50 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d7b0d171172a..6837348c8993 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy + * Copyright (c) 2016 Pablo Neira Ayuso * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -518,15 +519,61 @@ static struct nft_expr_type nft_ct_type __read_mostly = { .owner = THIS_MODULE, }; +static void nft_notrack_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct sk_buff *skb = pkt->skb; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(pkt->skb, &ctinfo); + /* Previously seen (loopback or untracked)? Ignore. */ + if (ct) + return; + + ct = nf_ct_untracked_get(); + atomic_inc(&ct->ct_general.use); + skb->nfct = &ct->ct_general; + skb->nfctinfo = IP_CT_NEW; +} + +static struct nft_expr_type nft_notrack_type; +static const struct nft_expr_ops nft_notrack_ops = { + .type = &nft_notrack_type, + .size = NFT_EXPR_SIZE(0), + .eval = nft_notrack_eval, +}; + +static struct nft_expr_type nft_notrack_type __read_mostly = { + .name = "notrack", + .ops = &nft_notrack_ops, + .owner = THIS_MODULE, +}; + static int __init nft_ct_module_init(void) { + int err; + BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE); - return nft_register_expr(&nft_ct_type); + err = nft_register_expr(&nft_ct_type); + if (err < 0) + return err; + + err = nft_register_expr(&nft_notrack_type); + if (err < 0) + goto err1; + + return 0; +err1: + nft_unregister_expr(&nft_ct_type); + return err; } static void __exit nft_ct_module_exit(void) { + nft_unregister_expr(&nft_notrack_type); nft_unregister_expr(&nft_ct_type); } @@ -536,3 +583,4 @@ module_exit(nft_ct_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS_NFT_EXPR("ct"); +MODULE_ALIAS_NFT_EXPR("notrack"); From f6d0cbcf09c506b9b022df8f9d7693a7cec3c732 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Oct 2016 16:56:40 +0200 Subject: [PATCH 07/12] netfilter: nf_tables: add fib expression Add FIB expression, supported for ipv4, ipv6 and inet family (the latter just dispatches to ipv4 or ipv6 one based on nfproto). Currently supports fetching output interface index/name and the rtm_type associated with an address. This can be used for adding path filtering. rtm_type is useful to e.g. enforce a strong-end host model where packets are only accepted if daddr is configured on the interface the packet arrived on. The fib expression is a native nftables alternative to the xtables addrtype and rp_filter matches. FIB result order for oif/oifname retrieval is as follows: - if packet is local (skb has rtable, RTF_LOCAL set, this will also catch looped-back multicast packets), set oif to the loopback interface. - if fib lookup returns an error, or result points to local, store zero result. This means '--local' option of -m rpfilter is not supported. It is possible to use 'fib type local' or add explicit saddr/daddr matching rules to create exceptions if this is really needed. - store result in the destination register. In case of multiple routes, search set for desired oif in case strict matching is requested. ipv4 and ipv6 behave fib expressions are supposed to behave the same. [ I have collapsed Arnd Bergmann's ("netfilter: nf_tables: fib warnings") http://patchwork.ozlabs.org/patch/688615/ to address fallout from this patch after rebasing nf-next, that was posted to address compilation warnings. --pablo ] Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_fib.h | 31 +++ include/uapi/linux/netfilter/nf_tables.h | 36 +++ net/ipv4/netfilter/Kconfig | 8 + net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/nft_fib_ipv4.c | 238 ++++++++++++++++++++ net/ipv6/netfilter/Kconfig | 8 + net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/nft_fib_ipv6.c | 275 +++++++++++++++++++++++ net/netfilter/Kconfig | 13 ++ net/netfilter/Makefile | 2 + net/netfilter/nft_fib.c | 159 +++++++++++++ net/netfilter/nft_fib_inet.c | 82 +++++++ 12 files changed, 854 insertions(+) create mode 100644 include/net/netfilter/nft_fib.h create mode 100644 net/ipv4/netfilter/nft_fib_ipv4.c create mode 100644 net/ipv6/netfilter/nft_fib_ipv6.c create mode 100644 net/netfilter/nft_fib.c create mode 100644 net/netfilter/nft_fib_inet.c diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h new file mode 100644 index 000000000000..cbedda077db2 --- /dev/null +++ b/include/net/netfilter/nft_fib.h @@ -0,0 +1,31 @@ +#ifndef _NFT_FIB_H_ +#define _NFT_FIB_H_ + +struct nft_fib { + enum nft_registers dreg:8; + u8 result; + u32 flags; +}; + +extern const struct nla_policy nft_fib_policy[]; + +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]); +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data); + + +void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_fib_store_result(void *reg, enum nft_fib_result r, + const struct nft_pktinfo *pkt, int index); +#endif diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index c6c4477c136b..a054ad2c8853 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1109,6 +1109,42 @@ enum nft_gen_attributes { }; #define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) +/* + * enum nft_fib_attributes - nf_tables fib expression netlink attributes + * + * @NFTA_FIB_DREG: destination register (NLA_U32) + * @NFTA_FIB_RESULT: desired result (NLA_U32) + * @NFTA_FIB_FLAGS: flowi fields to initialize when querying the FIB (NLA_U32) + * + * The FIB expression performs a route lookup according + * to the packet data. + */ +enum nft_fib_attributes { + NFTA_FIB_UNSPEC, + NFTA_FIB_DREG, + NFTA_FIB_RESULT, + NFTA_FIB_FLAGS, + __NFTA_FIB_MAX +}; +#define NFTA_FIB_MAX (__NFTA_FIB_MAX - 1) + +enum nft_fib_result { + NFT_FIB_RESULT_UNSPEC, + NFT_FIB_RESULT_OIF, + NFT_FIB_RESULT_OIFNAME, + NFT_FIB_RESULT_ADDRTYPE, + __NFT_FIB_RESULT_MAX +}; +#define NFT_FIB_RESULT_MAX (__NFT_FIB_RESULT_MAX - 1) + +enum nft_fib_flags { + NFTA_FIB_F_SADDR = 1 << 0, /* look up src */ + NFTA_FIB_F_DADDR = 1 << 1, /* look up dst */ + NFTA_FIB_F_MARK = 1 << 2, /* use skb->mark */ + NFTA_FIB_F_IIF = 1 << 3, /* restrict to iif */ + NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */ +}; + /** * enum nft_trace_attributes - nf_tables trace netlink attributes * diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index d613309e3e5d..f80bb8f457c8 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -54,6 +54,14 @@ config NFT_DUP_IPV4 help This module enables IPv4 packet duplication support for nf_tables. +config NFT_FIB_IPV4 + select NFT_FIB + tristate "nf_tables fib / ip route lookup support" + help + This module enables IPv4 FIB lookups, e.g. for reverse path filtering. + It also allows query of the FIB for the route type, e.g. local, unicast, + multicast or blackhole. + endif # NF_TABLES_IPV4 config NF_TABLES_ARP diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 853328f8fd05..59d7786ddce1 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o +obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c new file mode 100644 index 000000000000..db91fd42db67 --- /dev/null +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -0,0 +1,238 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* don't try to find route from mcast/bcast/zeronet */ +static __be32 get_saddr(__be32 addr) +{ + if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || + ipv4_is_zeronet(addr)) + return 0; + return addr; +} + +static bool fib4_is_local(const struct sk_buff *skb) +{ + const struct rtable *rt = skb_rtable(skb); + + return rt && (rt->rt_flags & RTCF_LOCAL); +} + +#define DSCP_BITS 0xfc + +void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + u32 *dst = ®s->data[priv->dreg]; + const struct net_device *dev = NULL; + const struct iphdr *iph; + __be32 addr; + + if (priv->flags & NFTA_FIB_F_IIF) + dev = pkt->in; + else if (priv->flags & NFTA_FIB_F_OIF) + dev = pkt->out; + + iph = ip_hdr(pkt->skb); + if (priv->flags & NFTA_FIB_F_DADDR) + addr = iph->daddr; + else + addr = iph->saddr; + + *dst = inet_dev_addr_type(pkt->net, dev, addr); +} +EXPORT_SYMBOL_GPL(nft_fib4_eval_type); + +static int get_ifindex(const struct net_device *dev) +{ + return dev ? dev->ifindex : 0; +} + +void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + u32 *dest = ®s->data[priv->dreg]; + const struct iphdr *iph; + struct fib_result res; + struct flowi4 fl4 = { + .flowi4_scope = RT_SCOPE_UNIVERSE, + .flowi4_iif = LOOPBACK_IFINDEX, + }; + const struct net_device *oif; + struct net_device *found; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + int i; +#endif + + /* + * Do not set flowi4_oif, it restricts results (for example, asking + * for oif 3 will get RTN_UNICAST result even if the daddr exits + * on another interface. + * + * Search results for the desired outinterface instead. + */ + if (priv->flags & NFTA_FIB_F_OIF) + oif = pkt->out; + else if (priv->flags & NFTA_FIB_F_IIF) + oif = pkt->in; + else + oif = NULL; + + if (pkt->hook == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) { + nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX); + return; + } + + iph = ip_hdr(pkt->skb); + if (ipv4_is_multicast(iph->daddr) && + ipv4_is_zeronet(iph->saddr) && + ipv4_is_local_multicast(iph->daddr)) { + nft_fib_store_result(dest, priv->result, pkt, + get_ifindex(pkt->skb->dev)); + return; + } + + if (priv->flags & NFTA_FIB_F_MARK) + fl4.flowi4_mark = pkt->skb->mark; + + fl4.flowi4_tos = iph->tos & DSCP_BITS; + + if (priv->flags & NFTA_FIB_F_DADDR) { + fl4.daddr = iph->daddr; + fl4.saddr = get_saddr(iph->saddr); + } else { + fl4.daddr = iph->saddr; + fl4.saddr = get_saddr(iph->daddr); + } + + if (fib_lookup(pkt->net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) + return; + + switch (res.type) { + case RTN_UNICAST: + break; + case RTN_LOCAL: /* should not appear here, see fib4_is_local() above */ + return; + default: + break; + } + + if (!oif) { + found = FIB_RES_DEV(res); + goto ok; + } + +#ifdef CONFIG_IP_ROUTE_MULTIPATH + for (i = 0; i < res.fi->fib_nhs; i++) { + struct fib_nh *nh = &res.fi->fib_nh[i]; + + if (nh->nh_dev == oif) { + found = nh->nh_dev; + goto ok; + } + } + return; +#else + found = FIB_RES_DEV(res); + if (found != oif) + return; +#endif +ok: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + *dest = found->ifindex; + break; + case NFT_FIB_RESULT_OIFNAME: + strncpy((char *)dest, found->name, IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + break; + } +} +EXPORT_SYMBOL_GPL(nft_fib4_eval); + +static struct nft_expr_type nft_fib4_type; + +static const struct nft_expr_ops nft_fib4_type_ops = { + .type = &nft_fib4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib4_eval_type, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops nft_fib4_ops = { + .type = &nft_fib4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib4_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops * +nft_fib4_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + enum nft_fib_result result; + + if (!tb[NFTA_FIB_RESULT]) + return ERR_PTR(-EINVAL); + + result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + + switch (result) { + case NFT_FIB_RESULT_OIF: + return &nft_fib4_ops; + case NFT_FIB_RESULT_OIFNAME: + return &nft_fib4_ops; + case NFT_FIB_RESULT_ADDRTYPE: + return &nft_fib4_type_ops; + default: + return ERR_PTR(-EOPNOTSUPP); + } +} + +static struct nft_expr_type nft_fib4_type __read_mostly = { + .name = "fib", + .select_ops = &nft_fib4_select_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, +}; + +static int __init nft_fib4_module_init(void) +{ + return nft_register_expr(&nft_fib4_type); +} + +static void __exit nft_fib4_module_exit(void) +{ + nft_unregister_expr(&nft_fib4_type); +} + +module_init(nft_fib4_module_init); +module_exit(nft_fib4_module_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal "); +MODULE_ALIAS_NFT_AF_EXPR(2, "fib"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index e10a04c9cdc7..144f1de33836 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -54,6 +54,14 @@ config NFT_DUP_IPV6 help This module enables IPv6 packet duplication support for nf_tables. +config NFT_FIB_IPV6 + tristate "nf_tables fib / ipv6 route lookup support" + select NFT_FIB + help + This module enables IPv6 FIB lookups, e.g. for reverse path filtering. + It also allows query of the FIB for the route type, e.g. local, unicast, + multicast or blackhole. + endif # NF_TABLES_IPV6 endif # NF_TABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index b4f7d0b4e2af..7984f3071f05 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o +obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c new file mode 100644 index 000000000000..ff1f1b6b4a4a --- /dev/null +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -0,0 +1,275 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static bool fib6_is_local(const struct sk_buff *skb) +{ + const struct rt6_info *rt = (const void *)skb_dst(skb); + + return rt && (rt->rt6i_flags & RTF_LOCAL); +} + +static int get_ifindex(const struct net_device *dev) +{ + return dev ? dev->ifindex : 0; +} + +static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, + const struct nft_pktinfo *pkt, + const struct net_device *dev) +{ + const struct ipv6hdr *iph = ipv6_hdr(pkt->skb); + int lookup_flags = 0; + + if (priv->flags & NFTA_FIB_F_DADDR) { + fl6->daddr = iph->daddr; + fl6->saddr = iph->saddr; + } else { + fl6->daddr = iph->saddr; + fl6->saddr = iph->daddr; + } + + if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) { + lookup_flags |= RT6_LOOKUP_F_IFACE; + fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev); + } + + if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST) + lookup_flags |= RT6_LOOKUP_F_HAS_SADDR; + + if (priv->flags & NFTA_FIB_F_MARK) + fl6->flowi6_mark = pkt->skb->mark; + + fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK; + + return lookup_flags; +} + +static u32 __nft_fib6_eval_type(const struct nft_fib *priv, + const struct nft_pktinfo *pkt) +{ + const struct net_device *dev = NULL; + const struct nf_ipv6_ops *v6ops; + const struct nf_afinfo *afinfo; + int route_err, addrtype; + struct rt6_info *rt; + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_proto = pkt->tprot, + }; + u32 ret = 0; + + afinfo = nf_get_afinfo(NFPROTO_IPV6); + if (!afinfo) + return RTN_UNREACHABLE; + + if (priv->flags & NFTA_FIB_F_IIF) + dev = pkt->in; + else if (priv->flags & NFTA_FIB_F_OIF) + dev = pkt->out; + + nft_fib6_flowi_init(&fl6, priv, pkt, dev); + + v6ops = nf_get_ipv6_ops(); + if (dev && v6ops && v6ops->chk_addr(pkt->net, &fl6.daddr, dev, true)) + ret = RTN_LOCAL; + + route_err = afinfo->route(pkt->net, (struct dst_entry **)&rt, + flowi6_to_flowi(&fl6), false); + if (route_err) + goto err; + + if (rt->rt6i_flags & RTF_REJECT) { + route_err = rt->dst.error; + dst_release(&rt->dst); + goto err; + } + + if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr)) + ret = RTN_ANYCAST; + else if (!dev && rt->rt6i_flags & RTF_LOCAL) + ret = RTN_LOCAL; + + dst_release(&rt->dst); + + if (ret) + return ret; + + addrtype = ipv6_addr_type(&fl6.daddr); + + if (addrtype & IPV6_ADDR_MULTICAST) + return RTN_MULTICAST; + if (addrtype & IPV6_ADDR_UNICAST) + return RTN_UNICAST; + + return RTN_UNSPEC; + err: + switch (route_err) { + case -EINVAL: + return RTN_BLACKHOLE; + case -EACCES: + return RTN_PROHIBIT; + case -EAGAIN: + return RTN_THROW; + default: + break; + } + + return RTN_UNREACHABLE; +} + +void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + u32 *dest = ®s->data[priv->dreg]; + + *dest = __nft_fib6_eval_type(priv, pkt); +} +EXPORT_SYMBOL_GPL(nft_fib6_eval_type); + +void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + const struct net_device *oif = NULL; + u32 *dest = ®s->data[priv->dreg]; + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_proto = pkt->tprot, + }; + struct rt6_info *rt; + int lookup_flags; + + if (priv->flags & NFTA_FIB_F_IIF) + oif = pkt->in; + else if (priv->flags & NFTA_FIB_F_OIF) + oif = pkt->out; + + lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif); + + if (pkt->hook == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) { + nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX); + return; + } + + *dest = 0; + again: + rt = (void *)ip6_route_lookup(pkt->net, &fl6, lookup_flags); + if (rt->dst.error) + goto put_rt_err; + + /* Should not see RTF_LOCAL here */ + if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) + goto put_rt_err; + + if (oif && oif != rt->rt6i_idev->dev) { + /* multipath route? Try again with F_IFACE */ + if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) { + lookup_flags |= RT6_LOOKUP_F_IFACE; + fl6.flowi6_oif = oif->ifindex; + ip6_rt_put(rt); + goto again; + } + } + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + *dest = rt->rt6i_idev->dev->ifindex; + break; + case NFT_FIB_RESULT_OIFNAME: + strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + break; + } + + put_rt_err: + ip6_rt_put(rt); +} +EXPORT_SYMBOL_GPL(nft_fib6_eval); + +static struct nft_expr_type nft_fib6_type; + +static const struct nft_expr_ops nft_fib6_type_ops = { + .type = &nft_fib6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib6_eval_type, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops nft_fib6_ops = { + .type = &nft_fib6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib6_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops * +nft_fib6_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + enum nft_fib_result result; + + if (!tb[NFTA_FIB_RESULT]) + return ERR_PTR(-EINVAL); + + result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + + switch (result) { + case NFT_FIB_RESULT_OIF: + return &nft_fib6_ops; + case NFT_FIB_RESULT_OIFNAME: + return &nft_fib6_ops; + case NFT_FIB_RESULT_ADDRTYPE: + return &nft_fib6_type_ops; + default: + return ERR_PTR(-EOPNOTSUPP); + } +} + +static struct nft_expr_type nft_fib6_type __read_mostly = { + .name = "fib", + .select_ops = &nft_fib6_select_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, +}; + +static int __init nft_fib6_module_init(void) +{ + return nft_register_expr(&nft_fib6_type); +} + +static void __exit nft_fib6_module_exit(void) +{ + nft_unregister_expr(&nft_fib6_type); +} +module_init(nft_fib6_module_init); +module_exit(nft_fib6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal "); +MODULE_ALIAS_NFT_AF_EXPR(10, "fib"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e8d56d9a4df2..9bcf899ce16e 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -581,6 +581,19 @@ config NFT_HASH This option adds the "hash" expression that you can use to perform a hash operation on registers. +config NFT_FIB + tristate + +config NFT_FIB_INET + depends on NF_TABLES_INET + depends on NFT_FIB_IPV4 + depends on NFT_FIB_IPV6 + tristate "Netfilter nf_tables fib inet support" + help + This option allows using the FIB expression from the inet table. + The lookup will be delegated to the IPv4 or IPv6 FIB depending + on the protocol of the packet. + if NF_TABLES_NETDEV config NF_DUP_NETDEV diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c23c3c84416f..8faa36c0686d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -96,6 +96,8 @@ obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o obj-$(CONFIG_NFT_REDIR) += nft_redir.o obj-$(CONFIG_NFT_HASH) += nft_hash.o +obj-$(CONFIG_NFT_FIB) += nft_fib.o +obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o # nf_tables netdev obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c new file mode 100644 index 000000000000..4944a8b7f7a7 --- /dev/null +++ b/net/netfilter/nft_fib.c @@ -0,0 +1,159 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Generic part shared by ipv4 and ipv6 backends. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = { + [NFTA_FIB_DREG] = { .type = NLA_U32 }, + [NFTA_FIB_RESULT] = { .type = NLA_U32 }, + [NFTA_FIB_FLAGS] = { .type = NLA_U32 }, +}; +EXPORT_SYMBOL(nft_fib_policy); + +#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \ + NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF) + +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + unsigned int hooks; + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: /* fallthrough */ + case NFT_FIB_RESULT_OIFNAME: + hooks = (1 << NF_INET_PRE_ROUTING); + break; + case NFT_FIB_RESULT_ADDRTYPE: + if (priv->flags & NFTA_FIB_F_IIF) + hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD); + else if (priv->flags & NFTA_FIB_F_OIF) + hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_FORWARD); + else + hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING); + + break; + default: + return -EINVAL; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} +EXPORT_SYMBOL_GPL(nft_fib_validate); + +int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_fib *priv = nft_expr_priv(expr); + unsigned int len; + int err; + + if (!tb[NFTA_FIB_DREG] || !tb[NFTA_FIB_RESULT] || !tb[NFTA_FIB_FLAGS]) + return -EINVAL; + + priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS])); + + if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL)) + return -EINVAL; + + if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == + (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) + return -EINVAL; + if ((priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) == + (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) + return -EINVAL; + if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0) + return -EINVAL; + + priv->result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]); + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + if (priv->flags & NFTA_FIB_F_OIF) + return -EINVAL; + len = sizeof(int); + break; + case NFT_FIB_RESULT_OIFNAME: + if (priv->flags & NFTA_FIB_F_OIF) + return -EINVAL; + len = IFNAMSIZ; + break; + case NFT_FIB_RESULT_ADDRTYPE: + len = sizeof(u32); + break; + default: + return -EINVAL; + } + + err = nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); + if (err < 0) + return err; + + return nft_fib_validate(ctx, expr, NULL); +} +EXPORT_SYMBOL_GPL(nft_fib_init); + +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_FIB_DREG, priv->dreg)) + return -1; + + if (nla_put_be32(skb, NFTA_FIB_RESULT, htonl(priv->result))) + return -1; + + if (nla_put_be32(skb, NFTA_FIB_FLAGS, htonl(priv->flags))) + return -1; + + return 0; +} +EXPORT_SYMBOL_GPL(nft_fib_dump); + +void nft_fib_store_result(void *reg, enum nft_fib_result r, + const struct nft_pktinfo *pkt, int index) +{ + struct net_device *dev; + u32 *dreg = reg; + + switch (r) { + case NFT_FIB_RESULT_OIF: + *dreg = index; + break; + case NFT_FIB_RESULT_OIFNAME: + dev = dev_get_by_index_rcu(pkt->net, index); + strncpy(reg, dev ? dev->name : "", IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + *dreg = 0; + break; + } +} +EXPORT_SYMBOL_GPL(nft_fib_store_result); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal "); diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c new file mode 100644 index 000000000000..fe8943b572b7 --- /dev/null +++ b/net/netfilter/nft_fib_inet.c @@ -0,0 +1,82 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static void nft_fib_inet_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + + switch (pkt->pf) { + case NFPROTO_IPV4: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib4_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib4_eval_type(expr, regs, pkt); + } + break; + case NFPROTO_IPV6: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib6_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib6_eval_type(expr, regs, pkt); + } + break; + } + + regs->verdict.code = NF_DROP; +} + +static struct nft_expr_type nft_fib_inet_type; +static const struct nft_expr_ops nft_fib_inet_ops = { + .type = &nft_fib_inet_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib_inet_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static struct nft_expr_type nft_fib_inet_type __read_mostly = { + .family = NFPROTO_INET, + .name = "fib", + .ops = &nft_fib_inet_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_fib_inet_module_init(void) +{ + return nft_register_expr(&nft_fib_inet_type); +} + +static void __exit nft_fib_inet_module_exit(void) +{ + nft_unregister_expr(&nft_fib_inet_type); +} + +module_init(nft_fib_inet_module_init); +module_exit(nft_fib_inet_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal "); +MODULE_ALIAS_NFT_AF_EXPR(1, "fib"); From 1fddf4bad0ac9f4d32c74af286fc1eec2a03c82c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 27 Oct 2016 19:49:42 +0100 Subject: [PATCH 08/12] netfilter: nf_log: add packet logging for netdev family Move layer 2 packet logging into nf_log_l2packet() that resides in nf_log_common.c, so this can be shared by both bridge and netdev families. This patch adds the boiler plate code to register the netdev logging family. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 5 ++ net/bridge/netfilter/Kconfig | 1 + net/bridge/netfilter/nf_log_bridge.c | 16 +----- net/netfilter/Kconfig | 4 ++ net/netfilter/Makefile | 3 ++ net/netfilter/nf_log_common.c | 27 ++++++++++ net/netfilter/nf_log_netdev.c | 80 ++++++++++++++++++++++++++++ 7 files changed, 121 insertions(+), 15 deletions(-) create mode 100644 net/netfilter/nf_log_netdev.c diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 309cd267be4f..a559aa41253c 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -109,5 +109,10 @@ void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix); +void nf_log_l2packet(struct net *net, u_int8_t pf, unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, const char *prefix); #endif /* _NF_LOG_H */ diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 9cebf47ac840..e7ef1a1ef3a6 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -22,6 +22,7 @@ config NFT_BRIDGE_REJECT config NF_LOG_BRIDGE tristate "Bridge packet logging" + select NF_LOG_COMMON endif # NF_TABLES_BRIDGE diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c index 1663df598545..c197b1f844ee 100644 --- a/net/bridge/netfilter/nf_log_bridge.c +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -24,21 +24,7 @@ static void nf_log_bridge_packet(struct net *net, u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - switch (eth_hdr(skb)->h_proto) { - case htons(ETH_P_IP): - nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - case htons(ETH_P_IPV6): - nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - case htons(ETH_P_ARP): - case htons(ETH_P_RARP): - nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - } + nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix); } static struct nf_logger nf_bridge_logger __read_mostly = { diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 9bcf899ce16e..854dadb196a1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -57,6 +57,10 @@ config NF_CONNTRACK config NF_LOG_COMMON tristate +config NF_LOG_NETDEV + tristate "Netdev packet logging" + select NF_LOG_COMMON + if NF_CONNTRACK config NF_CONNTRACK_MARK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 8faa36c0686d..8edd791743fe 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -48,6 +48,9 @@ nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ # generic transport layer logging obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o +# packet logging for netdev family +obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o + obj-$(CONFIG_NF_NAT) += nf_nat.o obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index 119fe1cb1ea9..ed9b80815fa0 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -175,6 +175,33 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, } EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); +/* bridge and netdev logging families share this code. */ +void nf_log_l2packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + case htons(ETH_P_IPV6): + nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + case htons(ETH_P_ARP): + case htons(ETH_P_RARP): + nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + } +} +EXPORT_SYMBOL_GPL(nf_log_l2packet); + static int __init nf_log_common_init(void) { return 0; diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c new file mode 100644 index 000000000000..1f645949f3d8 --- /dev/null +++ b/net/netfilter/nf_log_netdev.c @@ -0,0 +1,80 @@ +/* + * (C) 2016 by Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include + +static void nf_log_netdev_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix); +} + +static struct nf_logger nf_netdev_logger __read_mostly = { + .name = "nf_log_netdev", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_netdev_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_netdev_net_init(struct net *net) +{ + return nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger); +} + +static void __net_exit nf_log_netdev_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_netdev_logger); +} + +static struct pernet_operations nf_log_netdev_net_ops = { + .init = nf_log_netdev_net_init, + .exit = nf_log_netdev_net_exit, +}; + +static int __init nf_log_netdev_init(void) +{ + int ret; + + /* Request to load the real packet loggers. */ + nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG); + + ret = register_pernet_subsys(&nf_log_netdev_net_ops); + if (ret < 0) + return ret; + + nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger); + return 0; +} + +static void __exit nf_log_netdev_exit(void) +{ + unregister_pernet_subsys(&nf_log_netdev_net_ops); + nf_log_unregister(&nf_netdev_logger); +} + +module_init(nf_log_netdev_init); +module_exit(nf_log_netdev_exit); + +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_DESCRIPTION("Netfilter netdev packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */ From 8db4c5be88f62ffd7a552f70687a10c614dc697b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 27 Oct 2016 19:49:48 +0100 Subject: [PATCH 09/12] netfilter: move socket lookup infrastructure to nf_socket_ipv{4,6}.c We need this split to reuse existing codebase for the upcoming nf_tables socket expression. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_socket.h | 27 +++ net/ipv4/netfilter/Kconfig | 6 + net/ipv4/netfilter/Makefile | 2 + net/ipv4/netfilter/nf_socket_ipv4.c | 163 +++++++++++++++ net/ipv6/netfilter/Kconfig | 6 + net/ipv6/netfilter/Makefile | 2 + net/ipv6/netfilter/nf_socket_ipv6.c | 151 ++++++++++++++ net/netfilter/Kconfig | 3 +- net/netfilter/xt_socket.c | 305 +--------------------------- 9 files changed, 369 insertions(+), 296 deletions(-) create mode 100644 include/net/netfilter/nf_socket.h create mode 100644 net/ipv4/netfilter/nf_socket_ipv4.c create mode 100644 net/ipv6/netfilter/nf_socket_ipv6.c diff --git a/include/net/netfilter/nf_socket.h b/include/net/netfilter/nf_socket.h new file mode 100644 index 000000000000..f2fc39c97d43 --- /dev/null +++ b/include/net/netfilter/nf_socket.h @@ -0,0 +1,27 @@ +#ifndef _NF_SOCK_H_ +#define _NF_SOCK_H_ + +struct net_device; +struct sk_buff; +struct sock; +struct net; + +static inline bool nf_sk_is_transparent(struct sock *sk) +{ + switch (sk->sk_state) { + case TCP_TIME_WAIT: + return inet_twsk(sk)->tw_transparent; + case TCP_NEW_SYN_RECV: + return inet_rsk(inet_reqsk(sk))->no_srccheck; + default: + return inet_sk(sk)->transparent; + } +} + +struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, + const struct net_device *indev); + +struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, + const struct net_device *indev); + +#endif diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f80bb8f457c8..c11eb1744ab1 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV4 To compile it as a module, choose M here. If unsure, say N. +config NF_SOCKET_IPV4 + tristate "IPv4 socket lookup support" + help + This option enables the IPv4 socket lookup infrastructure. This is + is required by the iptables socket match. + if NF_TABLES config NF_TABLES_IPV4 diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 59d7786ddce1..f462fee66ac8 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -14,6 +14,8 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o # defrag obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o +obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o + # logging obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c new file mode 100644 index 000000000000..a83d558e1aae --- /dev/null +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include +#endif + +static int +extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, + __be32 *raddr, __be32 *laddr, + __be16 *rport, __be16 *lport) +{ + unsigned int outside_hdrlen = ip_hdrlen(skb); + struct iphdr *inside_iph, _inside_iph; + struct icmphdr *icmph, _icmph; + __be16 *ports, _ports[2]; + + icmph = skb_header_pointer(skb, outside_hdrlen, + sizeof(_icmph), &_icmph); + if (icmph == NULL) + return 1; + + switch (icmph->type) { + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_REDIRECT: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + break; + default: + return 1; + } + + inside_iph = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr), + sizeof(_inside_iph), &_inside_iph); + if (inside_iph == NULL) + return 1; + + if (inside_iph->protocol != IPPROTO_TCP && + inside_iph->protocol != IPPROTO_UDP) + return 1; + + ports = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr) + + (inside_iph->ihl << 2), + sizeof(_ports), &_ports); + if (ports == NULL) + return 1; + + /* the inside IP packet is the one quoted from our side, thus + * its saddr is the local address */ + *protocol = inside_iph->protocol; + *laddr = inside_iph->saddr; + *lport = ports[0]; + *raddr = inside_iph->daddr; + *rport = ports[1]; + + return 0; +} + +static struct sock * +nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, + const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in) +{ + switch (protocol) { + case IPPROTO_TCP: + return inet_lookup(net, &tcp_hashinfo, skb, doff, + saddr, sport, daddr, dport, + in->ifindex); + case IPPROTO_UDP: + return udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + } + return NULL; +} + +struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, + const struct net_device *indev) +{ + __be32 uninitialized_var(daddr), uninitialized_var(saddr); + __be16 uninitialized_var(dport), uninitialized_var(sport); + const struct iphdr *iph = ip_hdr(skb); + struct sk_buff *data_skb = NULL; + u8 uninitialized_var(protocol); +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + enum ip_conntrack_info ctinfo; + struct nf_conn const *ct; +#endif + int doff = 0; + + if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { + struct udphdr _hdr, *hp; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), + sizeof(_hdr), &_hdr); + if (hp == NULL) + return NULL; + + protocol = iph->protocol; + saddr = iph->saddr; + sport = hp->source; + daddr = iph->daddr; + dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = iph->protocol == IPPROTO_TCP ? + ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : + ip_hdrlen(skb) + sizeof(*hp); + + } else if (iph->protocol == IPPROTO_ICMP) { + if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, + &sport, &dport)) + return NULL; + } else { + return NULL; + } + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + /* Do the lookup with the original socket address in + * case this is a reply packet of an established + * SNAT-ted connection. + */ + ct = nf_ct_get(skb, &ctinfo); + if (ct && !nf_ct_is_untracked(ct) && + ((iph->protocol != IPPROTO_ICMP && + ctinfo == IP_CT_ESTABLISHED_REPLY) || + (iph->protocol == IPPROTO_ICMP && + ctinfo == IP_CT_RELATED_REPLY)) && + (ct->status & IPS_SRC_NAT_DONE)) { + + daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + dport = (iph->protocol == IPPROTO_TCP) ? + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; + } +#endif + + return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, + daddr, sport, dport, indev); +} +EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); +MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 144f1de33836..6acb2eecd986 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +config NF_SOCKET_IPV6 + tristate "IPv6 socket lookup support" + help + This option enables the IPv6 socket lookup infrastructure. This + is used by the ip6tables socket match. + if NF_TABLES config NF_TABLES_IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 7984f3071f05..fe180c96040e 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -24,6 +24,8 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o +obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o + # logging obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c new file mode 100644 index 000000000000..ebb2bf84232a --- /dev/null +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include +#endif + +static int +extract_icmp6_fields(const struct sk_buff *skb, + unsigned int outside_hdrlen, + int *protocol, + const struct in6_addr **raddr, + const struct in6_addr **laddr, + __be16 *rport, + __be16 *lport, + struct ipv6hdr *ipv6_var) +{ + const struct ipv6hdr *inside_iph; + struct icmp6hdr *icmph, _icmph; + __be16 *ports, _ports[2]; + u8 inside_nexthdr; + __be16 inside_fragoff; + int inside_hdrlen; + + icmph = skb_header_pointer(skb, outside_hdrlen, + sizeof(_icmph), &_icmph); + if (icmph == NULL) + return 1; + + if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) + return 1; + + inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), + sizeof(*ipv6_var), ipv6_var); + if (inside_iph == NULL) + return 1; + inside_nexthdr = inside_iph->nexthdr; + + inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + + sizeof(*ipv6_var), + &inside_nexthdr, &inside_fragoff); + if (inside_hdrlen < 0) + return 1; /* hjm: Packet has no/incomplete transport layer headers. */ + + if (inside_nexthdr != IPPROTO_TCP && + inside_nexthdr != IPPROTO_UDP) + return 1; + + ports = skb_header_pointer(skb, inside_hdrlen, + sizeof(_ports), &_ports); + if (ports == NULL) + return 1; + + /* the inside IP packet is the one quoted from our side, thus + * its saddr is the local address */ + *protocol = inside_nexthdr; + *laddr = &inside_iph->saddr; + *lport = ports[0]; + *raddr = &inside_iph->daddr; + *rport = ports[1]; + + return 0; +} + +static struct sock * +nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, + const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in) +{ + switch (protocol) { + case IPPROTO_TCP: + return inet6_lookup(net, &tcp_hashinfo, skb, doff, + saddr, sport, daddr, dport, + in->ifindex); + case IPPROTO_UDP: + return udp6_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + } + + return NULL; +} + +struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, + const struct net_device *indev) +{ + __be16 uninitialized_var(dport), uninitialized_var(sport); + const struct in6_addr *daddr = NULL, *saddr = NULL; + struct ipv6hdr *iph = ipv6_hdr(skb); + struct sk_buff *data_skb = NULL; + int doff = 0; + int thoff = 0, tproto; + + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); + if (tproto < 0) { + pr_debug("unable to find transport header in IPv6 packet, dropping\n"); + return NULL; + } + + if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { + struct udphdr _hdr, *hp; + + hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); + if (hp == NULL) + return NULL; + + saddr = &iph->saddr; + sport = hp->source; + daddr = &iph->daddr; + dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = tproto == IPPROTO_TCP ? + thoff + __tcp_hdrlen((struct tcphdr *)hp) : + thoff + sizeof(*hp); + + } else if (tproto == IPPROTO_ICMPV6) { + struct ipv6hdr ipv6_var; + + if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, + &sport, &dport, &ipv6_var)) + return NULL; + } else { + return NULL; + } + + return nf_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, + sport, dport, indev); +} +EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v6); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); +MODULE_DESCRIPTION("Netfilter IPv6 socket lookup infrastructure"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 854dadb196a1..ed00ec114ea2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1426,9 +1426,10 @@ config NETFILTER_XT_MATCH_SOCKET tristate '"socket" match support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED - depends on !NF_CONNTRACK || NF_CONNTRACK depends on IPV6 || IPV6=n depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n + depends on NF_SOCKET_IPV4 + depends on NF_SOCKET_IPV6 select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n help diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index b10ade272b50..018c369c9f0d 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,76 +22,14 @@ #include #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -#define XT_SOCKET_HAVE_IPV6 1 #include #include #include #endif +#include #include -#if IS_ENABLED(CONFIG_NF_CONNTRACK) -#define XT_SOCKET_HAVE_CONNTRACK 1 -#include -#endif - -static int -extract_icmp4_fields(const struct sk_buff *skb, - u8 *protocol, - __be32 *raddr, - __be32 *laddr, - __be16 *rport, - __be16 *lport) -{ - unsigned int outside_hdrlen = ip_hdrlen(skb); - struct iphdr *inside_iph, _inside_iph; - struct icmphdr *icmph, _icmph; - __be16 *ports, _ports[2]; - - icmph = skb_header_pointer(skb, outside_hdrlen, - sizeof(_icmph), &_icmph); - if (icmph == NULL) - return 1; - - switch (icmph->type) { - case ICMP_DEST_UNREACH: - case ICMP_SOURCE_QUENCH: - case ICMP_REDIRECT: - case ICMP_TIME_EXCEEDED: - case ICMP_PARAMETERPROB: - break; - default: - return 1; - } - - inside_iph = skb_header_pointer(skb, outside_hdrlen + - sizeof(struct icmphdr), - sizeof(_inside_iph), &_inside_iph); - if (inside_iph == NULL) - return 1; - - if (inside_iph->protocol != IPPROTO_TCP && - inside_iph->protocol != IPPROTO_UDP) - return 1; - - ports = skb_header_pointer(skb, outside_hdrlen + - sizeof(struct icmphdr) + - (inside_iph->ihl << 2), - sizeof(_ports), &_ports); - if (ports == NULL) - return 1; - - /* the inside IP packet is the one quoted from our side, thus - * its saddr is the local address */ - *protocol = inside_iph->protocol; - *laddr = inside_iph->saddr; - *lport = ports[0]; - *raddr = inside_iph->daddr; - *rport = ports[1]; - - return 0; -} - /* "socket" match based redirection (no specific rule) * =================================================== * @@ -111,104 +49,6 @@ extract_icmp4_fields(const struct sk_buff *skb, * then local services could intercept traffic going through the * box. */ -static struct sock * -xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, - const u8 protocol, - const __be32 saddr, const __be32 daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in) -{ - switch (protocol) { - case IPPROTO_TCP: - return inet_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, - in->ifindex); - case IPPROTO_UDP: - return udp4_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - } - return NULL; -} - -static bool xt_socket_sk_is_transparent(struct sock *sk) -{ - switch (sk->sk_state) { - case TCP_TIME_WAIT: - return inet_twsk(sk)->tw_transparent; - - case TCP_NEW_SYN_RECV: - return inet_rsk(inet_reqsk(sk))->no_srccheck; - - default: - return inet_sk(sk)->transparent; - } -} - -static struct sock *xt_socket_lookup_slow_v4(struct net *net, - const struct sk_buff *skb, - const struct net_device *indev) -{ - const struct iphdr *iph = ip_hdr(skb); - struct sk_buff *data_skb = NULL; - int doff = 0; - __be32 uninitialized_var(daddr), uninitialized_var(saddr); - __be16 uninitialized_var(dport), uninitialized_var(sport); - u8 uninitialized_var(protocol); -#ifdef XT_SOCKET_HAVE_CONNTRACK - struct nf_conn const *ct; - enum ip_conntrack_info ctinfo; -#endif - - if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { - struct udphdr _hdr, *hp; - - hp = skb_header_pointer(skb, ip_hdrlen(skb), - sizeof(_hdr), &_hdr); - if (hp == NULL) - return NULL; - - protocol = iph->protocol; - saddr = iph->saddr; - sport = hp->source; - daddr = iph->daddr; - dport = hp->dest; - data_skb = (struct sk_buff *)skb; - doff = iph->protocol == IPPROTO_TCP ? - ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : - ip_hdrlen(skb) + sizeof(*hp); - - } else if (iph->protocol == IPPROTO_ICMP) { - if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, - &sport, &dport)) - return NULL; - } else { - return NULL; - } - -#ifdef XT_SOCKET_HAVE_CONNTRACK - /* Do the lookup with the original socket address in - * case this is a reply packet of an established - * SNAT-ted connection. - */ - ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct) && - ((iph->protocol != IPPROTO_ICMP && - ctinfo == IP_CT_ESTABLISHED_REPLY) || - (iph->protocol == IPPROTO_ICMP && - ctinfo == IP_CT_RELATED_REPLY)) && - (ct->status & IPS_SRC_NAT_DONE)) { - - daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; - dport = (iph->protocol == IPPROTO_TCP) ? - ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : - ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; - } -#endif - - return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, - daddr, sport, dport, indev); -} - static bool socket_match(const struct sk_buff *skb, struct xt_action_param *par, const struct xt_socket_mtinfo1 *info) @@ -217,7 +57,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v4(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v4(par->net, skb, par->in); if (sk) { bool wildcard; bool transparent = true; @@ -233,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = xt_socket_sk_is_transparent(sk); + transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent) @@ -265,132 +105,7 @@ socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) return socket_match(skb, par, par->matchinfo); } -#ifdef XT_SOCKET_HAVE_IPV6 - -static int -extract_icmp6_fields(const struct sk_buff *skb, - unsigned int outside_hdrlen, - int *protocol, - const struct in6_addr **raddr, - const struct in6_addr **laddr, - __be16 *rport, - __be16 *lport, - struct ipv6hdr *ipv6_var) -{ - const struct ipv6hdr *inside_iph; - struct icmp6hdr *icmph, _icmph; - __be16 *ports, _ports[2]; - u8 inside_nexthdr; - __be16 inside_fragoff; - int inside_hdrlen; - - icmph = skb_header_pointer(skb, outside_hdrlen, - sizeof(_icmph), &_icmph); - if (icmph == NULL) - return 1; - - if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) - return 1; - - inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), - sizeof(*ipv6_var), ipv6_var); - if (inside_iph == NULL) - return 1; - inside_nexthdr = inside_iph->nexthdr; - - inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + - sizeof(*ipv6_var), - &inside_nexthdr, &inside_fragoff); - if (inside_hdrlen < 0) - return 1; /* hjm: Packet has no/incomplete transport layer headers. */ - - if (inside_nexthdr != IPPROTO_TCP && - inside_nexthdr != IPPROTO_UDP) - return 1; - - ports = skb_header_pointer(skb, inside_hdrlen, - sizeof(_ports), &_ports); - if (ports == NULL) - return 1; - - /* the inside IP packet is the one quoted from our side, thus - * its saddr is the local address */ - *protocol = inside_nexthdr; - *laddr = &inside_iph->saddr; - *lport = ports[0]; - *raddr = &inside_iph->daddr; - *rport = ports[1]; - - return 0; -} - -static struct sock * -xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, - const u8 protocol, - const struct in6_addr *saddr, const struct in6_addr *daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in) -{ - switch (protocol) { - case IPPROTO_TCP: - return inet6_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, - in->ifindex); - case IPPROTO_UDP: - return udp6_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - } - - return NULL; -} - -static struct sock *xt_socket_lookup_slow_v6(struct net *net, - const struct sk_buff *skb, - const struct net_device *indev) -{ - __be16 uninitialized_var(dport), uninitialized_var(sport); - const struct in6_addr *daddr = NULL, *saddr = NULL; - struct ipv6hdr *iph = ipv6_hdr(skb); - struct sk_buff *data_skb = NULL; - int doff = 0; - int thoff = 0, tproto; - - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) { - pr_debug("unable to find transport header in IPv6 packet, dropping\n"); - return NULL; - } - - if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { - struct udphdr _hdr, *hp; - - hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); - if (hp == NULL) - return NULL; - - saddr = &iph->saddr; - sport = hp->source; - daddr = &iph->daddr; - dport = hp->dest; - data_skb = (struct sk_buff *)skb; - doff = tproto == IPPROTO_TCP ? - thoff + __tcp_hdrlen((struct tcphdr *)hp) : - thoff + sizeof(*hp); - - } else if (tproto == IPPROTO_ICMPV6) { - struct ipv6hdr ipv6_var; - - if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, - &sport, &dport, &ipv6_var)) - return NULL; - } else { - return NULL; - } - - return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, - sport, dport, indev); -} - +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static bool socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) { @@ -399,7 +114,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v6(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v6(par->net, skb, par->in); if (sk) { bool wildcard; bool transparent = true; @@ -415,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = xt_socket_sk_is_transparent(sk); + transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent) @@ -488,7 +203,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 1, @@ -512,7 +227,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 2, @@ -536,7 +251,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 3, @@ -554,7 +269,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { static int __init socket_mt_init(void) { nf_defrag_ipv4_enable(); -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) nf_defrag_ipv6_enable(); #endif From 2fa841938c648fe4359691f41e8e1f37ff1a3aa2 Mon Sep 17 00:00:00 2001 From: "Anders K. Pedersen" Date: Fri, 28 Oct 2016 05:54:15 +0000 Subject: [PATCH 10/12] netfilter: nf_tables: introduce routing expression Introduces an nftables rt expression for routing related data with support for nexthop (i.e. the directly connected IP address that an outgoing packet is sent to), which can be used either for matching or accounting, eg. # nft add rule filter postrouting \ ip daddr 192.168.1.0/24 rt nexthop != 192.168.0.1 drop This will drop any traffic to 192.168.1.0/24 that is not routed via 192.168.0.1. # nft add rule filter postrouting \ flow table acct { rt nexthop timeout 600s counter } # nft add rule ip6 filter postrouting \ flow table acct { rt nexthop timeout 600s counter } These rules count outgoing traffic per nexthop. Note that the timeout releases an entry if no traffic is seen for this nexthop within 10 minutes. # nft add rule inet filter postrouting \ ether type ip \ flow table acct { rt nexthop timeout 600s counter } # nft add rule inet filter postrouting \ ether type ip6 \ flow table acct { rt nexthop timeout 600s counter } Same as above, but via the inet family, where the ether type must be specified explicitly. "rt classid" is also implemented identical to "meta rtclassid", since it is more logical to have this match in the routing expression going forward. Signed-off-by: Anders K. Pedersen Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 27 ++++ net/netfilter/Kconfig | 6 + net/netfilter/Makefile | 1 + net/netfilter/nft_rt.c | 153 +++++++++++++++++++++++ 4 files changed, 187 insertions(+) create mode 100644 net/netfilter/nft_rt.c diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index a054ad2c8853..14e5f619167e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -758,6 +758,19 @@ enum nft_meta_keys { NFT_META_PRANDOM, }; +/** + * enum nft_rt_keys - nf_tables routing expression keys + * + * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_RT_NEXTHOP4: routing nexthop for IPv4 + * @NFT_RT_NEXTHOP6: routing nexthop for IPv6 + */ +enum nft_rt_keys { + NFT_RT_CLASSID, + NFT_RT_NEXTHOP4, + NFT_RT_NEXTHOP6, +}; + /** * enum nft_hash_attributes - nf_tables hash expression netlink attributes * @@ -796,6 +809,20 @@ enum nft_meta_attributes { }; #define NFTA_META_MAX (__NFTA_META_MAX - 1) +/** + * enum nft_rt_attributes - nf_tables routing expression netlink attributes + * + * @NFTA_RT_DREG: destination register (NLA_U32) + * @NFTA_RT_KEY: routing data item to load (NLA_U32: nft_rt_keys) + */ +enum nft_rt_attributes { + NFTA_RT_UNSPEC, + NFTA_RT_DREG, + NFTA_RT_KEY, + __NFTA_RT_MAX +}; +#define NFTA_RT_MAX (__NFTA_RT_MAX - 1) + /** * enum nft_ct_keys - nf_tables ct expression keys * diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ed00ec114ea2..44410d30d461 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -478,6 +478,12 @@ config NFT_META This option adds the "meta" expression that you can use to match and to set packet metainformation such as the packet mark. +config NFT_RT + tristate "Netfilter nf_tables routing module" + help + This option adds the "rt" expression that you can use to match + packet routing information such as the packet nexthop. + config NFT_NUMGEN tristate "Netfilter nf_tables number generator module" help diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 8edd791743fe..5bbf767672ec 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -84,6 +84,7 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o +obj-$(CONFIG_NFT_RT) += nft_rt.o obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c new file mode 100644 index 000000000000..9e5ec1f67020 --- /dev/null +++ b/net/netfilter/nft_rt.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2016 Anders K. Pedersen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_rt { + enum nft_rt_keys key:8; + enum nft_registers dreg:8; +}; + +void nft_rt_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_rt *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + u32 *dest = ®s->data[priv->dreg]; + const struct dst_entry *dst; + + dst = skb_dst(skb); + if (!dst) + goto err; + + switch (priv->key) { +#ifdef CONFIG_IP_ROUTE_CLASSID + case NFT_RT_CLASSID: + *dest = dst->tclassid; + break; +#endif + case NFT_RT_NEXTHOP4: + if (pkt->pf != NFPROTO_IPV4) + goto err; + + *dest = rt_nexthop((const struct rtable *)dst, + ip_hdr(skb)->daddr); + break; + case NFT_RT_NEXTHOP6: + if (pkt->pf != NFPROTO_IPV6) + goto err; + + memcpy(dest, rt6_nexthop((struct rt6_info *)dst, + &ipv6_hdr(skb)->daddr), + sizeof(struct in6_addr)); + break; + default: + WARN_ON(1); + goto err; + } + return; + +err: + regs->verdict.code = NFT_BREAK; +} + +const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = { + [NFTA_RT_DREG] = { .type = NLA_U32 }, + [NFTA_RT_KEY] = { .type = NLA_U32 }, +}; + +int nft_rt_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_rt *priv = nft_expr_priv(expr); + unsigned int len; + + if (tb[NFTA_RT_KEY] == NULL || + tb[NFTA_RT_DREG] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_RT_KEY])); + switch (priv->key) { +#ifdef CONFIG_IP_ROUTE_CLASSID + case NFT_RT_CLASSID: +#endif + case NFT_RT_NEXTHOP4: + len = sizeof(u32); + break; + case NFT_RT_NEXTHOP6: + len = sizeof(struct in6_addr); + break; + default: + return -EOPNOTSUPP; + } + + priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); +} + +int nft_rt_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_rt *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_RT_KEY, htonl(priv->key))) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_RT_DREG, priv->dreg)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_rt_type; +static const struct nft_expr_ops nft_rt_get_ops = { + .type = &nft_rt_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_rt)), + .eval = nft_rt_get_eval, + .init = nft_rt_get_init, + .dump = nft_rt_get_dump, +}; + +static struct nft_expr_type nft_rt_type __read_mostly = { + .name = "rt", + .ops = &nft_rt_get_ops, + .policy = nft_rt_policy, + .maxattr = NFTA_RT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_rt_module_init(void) +{ + return nft_register_expr(&nft_rt_type); +} + +static void __exit nft_rt_module_exit(void) +{ + nft_unregister_expr(&nft_rt_type); +} + +module_init(nft_rt_module_init); +module_exit(nft_rt_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Anders K. Pedersen "); +MODULE_ALIAS_NFT_EXPR("rt"); From e41e9d623cd77dbe970b15a4c06a0765c3657bdd Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 29 Oct 2016 21:56:27 +0800 Subject: [PATCH 11/12] netfilter: nf_tables: remove useless U8_MAX validation After call nft_data_init, size is already validated and desc.len will not exceed the sizeof(struct nft_data), i.e. 16 bytes. So it will never exceed U8_MAX. Furthermore, in nft_immediate_init, we forget to call nft_data_uninit when desc.len exceeds U8_MAX, although this will not happen, but it's a logical mistake. Now remove these redundant validation introduced by commit 36b701fae12a ("netfilter: nf_tables: validate maximum value of u32 netlink attributes") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_cmp.c | 3 --- net/netfilter/nft_immediate.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 2e53739812b1..e25b35d70e4d 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -84,9 +84,6 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; - if (desc.len > U8_MAX) - return -ERANGE; - priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); priv->len = desc.len; return 0; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index d17018ff54e6..4528adea7ede 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -54,9 +54,6 @@ static int nft_immediate_init(const struct nft_ctx *ctx, if (err < 0) return err; - if (desc.len > U8_MAX) - return -ERANGE; - priv->dlen = desc.len; priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); From 886bc50348fd69972baa4616ce95b7dc9ff8afe7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 31 Oct 2016 00:35:07 +0100 Subject: [PATCH 12/12] netfilter: nf_queue: place volatile data in own cacheline As the comment indicates, the data at the end of nfqnl_instance struct is written on every queue/dequeue, so it should reside in its own cacheline. Before this change, 'lock' was in first cacheline so we dirtied both. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index af832c526048..5379f788a372 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -69,7 +69,7 @@ struct nfqnl_instance { * Following fields are dirtied for each queued packet, * keep them in same cache line if possible. */ - spinlock_t lock; + spinlock_t lock ____cacheline_aligned_in_smp; unsigned int queue_total; unsigned int id_sequence; /* 'sequence' of pkt ids */ struct list_head queue_list; /* packets in queue */