From ba062ebb2cd561d404e0fba8ee4b3f5ebce7cbfc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Jun 2018 09:13:39 -0700 Subject: [PATCH 1/6] netfilter: nf_queue: augment nfqa_cfg_policy Three attributes are currently not verified, thus can trigger KMSAN warnings such as : BUG: KMSAN: uninit-value in __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] BUG: KMSAN: uninit-value in __fswab32 include/uapi/linux/swab.h:59 [inline] BUG: KMSAN: uninit-value in nfqnl_recv_config+0x939/0x17d0 net/netfilter/nfnetlink_queue.c:1268 CPU: 1 PID: 4521 Comm: syz-executor120 Not tainted 4.17.0+ #5 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:113 kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1117 __msan_warning_32+0x70/0xc0 mm/kmsan/kmsan_instr.c:620 __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] __fswab32 include/uapi/linux/swab.h:59 [inline] nfqnl_recv_config+0x939/0x17d0 net/netfilter/nfnetlink_queue.c:1268 nfnetlink_rcv_msg+0xb2e/0xc80 net/netfilter/nfnetlink.c:212 netlink_rcv_skb+0x37e/0x600 net/netlink/af_netlink.c:2448 nfnetlink_rcv+0x2fe/0x680 net/netfilter/nfnetlink.c:513 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x1680/0x1750 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg net/socket.c:639 [inline] ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117 __sys_sendmsg net/socket.c:2155 [inline] __do_sys_sendmsg net/socket.c:2164 [inline] __se_sys_sendmsg net/socket.c:2162 [inline] __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x43fd59 RSP: 002b:00007ffde0e30d28 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fd59 RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000213 R12: 0000000000401680 R13: 0000000000401710 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:189 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:315 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan.c:322 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2753 [inline] __kmalloc_node_track_caller+0xb35/0x11b0 mm/slub.c:4395 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cb/0x9e0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:988 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline] netlink_sendmsg+0x76e/0x1350 net/netlink/af_netlink.c:1876 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg net/socket.c:639 [inline] ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117 __sys_sendmsg net/socket.c:2155 [inline] __do_sys_sendmsg net/socket.c:2164 [inline] __se_sys_sendmsg net/socket.c:2162 [inline] __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: fdb694a01f1f ("netfilter: Add fail-open support") Fixes: 829e17a1a602 ("[NETFILTER]: nfnetlink_queue: allow changing queue length through netlink") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 4ccd2988f9db..ea4ba551abb2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1243,6 +1243,9 @@ static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl, static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, + [NFQA_CFG_QUEUE_MAXLEN] = { .type = NLA_U32 }, + [NFQA_CFG_MASK] = { .type = NLA_U32 }, + [NFQA_CFG_FLAGS] = { .type = NLA_U32 }, }; static const struct nf_queue_handler nfqh = { From 9ce7bc036ae4cfe3393232c86e9e1fea2153c237 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Jun 2018 10:11:56 -0700 Subject: [PATCH 2/6] netfilter: ipv6: nf_defrag: reduce struct net memory waste It is a waste of memory to use a full "struct netns_sysctl_ipv6" while only one pointer is really used, considering netns_sysctl_ipv6 keeps growing. Also, since "struct netns_frags" has cache line alignment, it is better to move the frags_hdr pointer outside, otherwise we spend a full cache line for this pointer. This saves 192 bytes of memory per netns. Fixes: c038a767cd69 ("ipv6: add a new namespace for nf_conntrack_reasm") Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/net/net_namespace.h | 1 + include/net/netns/ipv6.h | 1 - net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +++--- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 47e35cce3b64..a71264d75d7f 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -128,6 +128,7 @@ struct net { #endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag nf_frag; + struct ctl_table_header *nf_frag_frags_hdr; #endif struct sock *nfnl; struct sock *nfnl_stash; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index c978a31b0f84..762ac9931b62 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -109,7 +109,6 @@ struct netns_ipv6 { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag { - struct netns_sysctl_ipv6 sysctl; struct netns_frags frags; }; #endif diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 5e0332014c17..a452d99c9f52 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -107,7 +107,7 @@ static int nf_ct_frag6_sysctl_register(struct net *net) if (hdr == NULL) goto err_reg; - net->nf_frag.sysctl.frags_hdr = hdr; + net->nf_frag_frags_hdr = hdr; return 0; err_reg: @@ -121,8 +121,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) { struct ctl_table *table; - table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg; - unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr); + table = net->nf_frag_frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->nf_frag_frags_hdr); if (!net_eq(net, &init_net)) kfree(table); } From ad9852af97587b8abe8102f9ddcb05c9769656f6 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Wed, 13 Jun 2018 12:26:13 +0800 Subject: [PATCH 3/6] netfilter: nf_ct_helper: Fix possible panic after nf_conntrack_helper_unregister The helper module would be unloaded after nf_conntrack_helper_unregister, so it may cause a possible panic caused by race. nf_ct_iterate_destroy(unhelp, me) reset the helper of conntrack as NULL, but maybe someone has gotten the helper pointer during this period. Then it would panic, when it accesses the helper and the module was unloaded. Take an example as following: CPU0 CPU1 ctnetlink_dump_helpinfo helper = rcu_dereference(help->helper); unhelp set helper as NULL unload helper module helper->to_nlattr(skb, ct); As above, the cpu0 tries to access the helper and its module is unloaded, then the panic happens. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 551a1eddf0fa..a75b11c39312 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -465,6 +465,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); + + /* Maybe someone has gotten the helper already when unhelp above. + * So need to wait it. + */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); From dffd22aed2aa1e804bccf19b30a421e89ee2ae61 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 20 Jun 2018 18:33:45 +0200 Subject: [PATCH 4/6] netfilter: nf_log: fix uninit read in nf_log_proc_dostring When proc_dostring() is called with a non-zero offset in strict mode, it doesn't just write to the ->data buffer, it also reads. Make sure it doesn't read uninitialized data. Fixes: c6ac37d8d884 ("netfilter: nf_log: fix error on write NONE to [...]") Signed-off-by: Jann Horn Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_log.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 426457047578..2c47f9ec3511 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -424,6 +424,10 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, if (write) { struct ctl_table tmp = *table; + /* proc_dostring() can append to existing strings, so we need to + * initialize it as an empty string. + */ + buf[0] = '\0'; tmp.data = buf; r = proc_dostring(&tmp, write, buffer, lenp, ppos); if (r) From ce00bf07cc95a57cd20b208e02b3c2604e532ae8 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 25 Jun 2018 17:22:00 +0200 Subject: [PATCH 5/6] netfilter: nf_log: don't hold nf_log_mutex during user access The old code would indefinitely block other users of nf_log_mutex if a userspace access in proc_dostring() blocked e.g. due to a userfaultfd region. Fix it by moving proc_dostring() out of the locked region. This is a followup to commit 266d07cb1c9a ("netfilter: nf_log: fix sleeping function called from invalid context"), which changed this code from using rcu_read_lock() to taking nf_log_mutex. Fixes: 266d07cb1c9a ("netfilter: nf_log: fix sleeping function calle[...]") Signed-off-by: Jann Horn Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_log.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 2c47f9ec3511..a61d6df6e5f6 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -446,14 +446,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, rcu_assign_pointer(net->nf.nf_loggers[tindex], logger); mutex_unlock(&nf_log_mutex); } else { + struct ctl_table tmp = *table; + + tmp.data = buf; mutex_lock(&nf_log_mutex); logger = nft_log_dereference(net->nf.nf_loggers[tindex]); if (!logger) - table->data = "NONE"; + strlcpy(buf, "NONE", sizeof(buf)); else - table->data = logger->name; - r = proc_dostring(table, write, buffer, lenp, ppos); + strlcpy(buf, logger->name, sizeof(buf)); mutex_unlock(&nf_log_mutex); + r = proc_dostring(&tmp, write, buffer, lenp, ppos); } return r; From b36e4523d4d56e2595e28f16f6ccf1cd6a9fc452 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Jun 2018 23:32:26 +0200 Subject: [PATCH 6/6] netfilter: nf_conncount: fix garbage collection confirm race Yi-Hung Wei and Justin Pettit found a race in the garbage collection scheme used by nf_conncount. When doing list walk, we lookup the tuple in the conntrack table. If the lookup fails we remove this tuple from our list because the conntrack entry is gone. This is the common cause, but turns out its not the only one. The list entry could have been created just before by another cpu, i.e. the conntrack entry might not yet have been inserted into the global hash. The avoid this, we introduce a timestamp and the owning cpu. If the entry appears to be stale, evict only if: 1. The current cpu is the one that added the entry, or, 2. The timestamp is older than two jiffies The second constraint allows GC to be taken over by other cpu too (e.g. because a cpu was offlined or napi got moved to another cpu). We can't pretend the 'doubtful' entry wasn't in our list. Instead, when we don't find an entry indicate via IS_ERR that entry was removed ('did not exist' or withheld ('might-be-unconfirmed'). This most likely also fixes a xt_connlimit imbalance earlier reported by Dmitry Andrianov. Cc: Dmitry Andrianov Reported-by: Justin Pettit Reported-by: Yi-Hung Wei Signed-off-by: Florian Westphal Acked-by: Yi-Hung Wei Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 52 ++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index d8383609fe28..510039862aa9 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -47,6 +47,8 @@ struct nf_conncount_tuple { struct hlist_node node; struct nf_conntrack_tuple tuple; struct nf_conntrack_zone zone; + int cpu; + u32 jiffies32; }; struct nf_conncount_rb { @@ -91,11 +93,42 @@ bool nf_conncount_add(struct hlist_head *head, return false; conn->tuple = *tuple; conn->zone = *zone; + conn->cpu = raw_smp_processor_id(); + conn->jiffies32 = (u32)jiffies; hlist_add_head(&conn->node, head); return true; } EXPORT_SYMBOL_GPL(nf_conncount_add); +static const struct nf_conntrack_tuple_hash * +find_or_evict(struct net *net, struct nf_conncount_tuple *conn) +{ + const struct nf_conntrack_tuple_hash *found; + unsigned long a, b; + int cpu = raw_smp_processor_id(); + __s32 age; + + found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); + if (found) + return found; + b = conn->jiffies32; + a = (u32)jiffies; + + /* conn might have been added just before by another cpu and + * might still be unconfirmed. In this case, nf_conntrack_find() + * returns no result. Thus only evict if this cpu added the + * stale entry or if the entry is older than two jiffies. + */ + age = a - b; + if (conn->cpu == cpu || age >= 2) { + hlist_del(&conn->node); + kmem_cache_free(conncount_conn_cachep, conn); + return ERR_PTR(-ENOENT); + } + + return ERR_PTR(-EAGAIN); +} + unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone, @@ -103,18 +136,27 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, { const struct nf_conntrack_tuple_hash *found; struct nf_conncount_tuple *conn; - struct hlist_node *n; struct nf_conn *found_ct; + struct hlist_node *n; unsigned int length = 0; *addit = tuple ? true : false; /* check the saved connections */ hlist_for_each_entry_safe(conn, n, head, node) { - found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); - if (found == NULL) { - hlist_del(&conn->node); - kmem_cache_free(conncount_conn_cachep, conn); + found = find_or_evict(net, conn); + if (IS_ERR(found)) { + /* Not found, but might be about to be confirmed */ + if (PTR_ERR(found) == -EAGAIN) { + length++; + if (!tuple) + continue; + + if (nf_ct_tuple_equal(&conn->tuple, tuple) && + nf_ct_zone_id(&conn->zone, conn->zone.dir) == + nf_ct_zone_id(zone, zone->dir)) + *addit = false; + } continue; }