mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-15 23:25:07 +00:00
f9da455b93
Pull networking updates from David Miller: 1) Seccomp BPF filters can now be JIT'd, from Alexei Starovoitov. 2) Multiqueue support in xen-netback and xen-netfront, from Andrew J Benniston. 3) Allow tweaking of aggregation settings in cdc_ncm driver, from Bjørn Mork. 4) BPF now has a "random" opcode, from Chema Gonzalez. 5) Add more BPF documentation and improve test framework, from Daniel Borkmann. 6) Support TCP fastopen over ipv6, from Daniel Lee. 7) Add software TSO helper functions and use them to support software TSO in mvneta and mv643xx_eth drivers. From Ezequiel Garcia. 8) Support software TSO in fec driver too, from Nimrod Andy. 9) Add Broadcom SYSTEMPORT driver, from Florian Fainelli. 10) Handle broadcasts more gracefully over macvlan when there are large numbers of interfaces configured, from Herbert Xu. 11) Allow more control over fwmark used for non-socket based responses, from Lorenzo Colitti. 12) Do TCP congestion window limiting based upon measurements, from Neal Cardwell. 13) Support busy polling in SCTP, from Neal Horman. 14) Allow RSS key to be configured via ethtool, from Venkata Duvvuru. 15) Bridge promisc mode handling improvements from Vlad Yasevich. 16) Don't use inetpeer entries to implement ID generation any more, it performs poorly, from Eric Dumazet. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1522 commits) rtnetlink: fix userspace API breakage for iproute2 < v3.9.0 tcp: fixing TLP's FIN recovery net: fec: Add software TSO support net: fec: Add Scatter/gather support net: fec: Increase buffer descriptor entry number net: fec: Factorize feature setting net: fec: Enable IP header hardware checksum net: fec: Factorize the .xmit transmit function bridge: fix compile error when compiling without IPv6 support bridge: fix smatch warning / potential null pointer dereference via-rhine: fix full-duplex with autoneg disable bnx2x: Enlarge the dorq threshold for VFs bnx2x: Check for UNDI in uncommon branch bnx2x: Fix 1G-baseT link bnx2x: Fix link for KR with swapped polarity lane sctp: Fix sk_ack_backlog wrap-around problem net/core: Add VF link state control policy net/fsl: xgmac_mdio is dependent on OF_MDIO net/fsl: Make xgmac_mdio read error message useful net_sched: drr: warn when qdisc is not work conserving ...
454 lines
11 KiB
C
454 lines
11 KiB
C
/*
|
|
* (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
|
|
* (C) 2011 Intra2net AG <http://www.intra2net.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation (or any later at your option).
|
|
*/
|
|
#include <linux/init.h>
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/atomic.h>
|
|
#include <linux/netlink.h>
|
|
#include <linux/rculist.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/types.h>
|
|
#include <linux/errno.h>
|
|
#include <net/netlink.h>
|
|
#include <net/sock.h>
|
|
|
|
#include <linux/netfilter.h>
|
|
#include <linux/netfilter/nfnetlink.h>
|
|
#include <linux/netfilter/nfnetlink_acct.h>
|
|
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
|
|
MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
|
|
|
|
static LIST_HEAD(nfnl_acct_list);
|
|
|
|
struct nf_acct {
|
|
atomic64_t pkts;
|
|
atomic64_t bytes;
|
|
unsigned long flags;
|
|
struct list_head head;
|
|
atomic_t refcnt;
|
|
char name[NFACCT_NAME_MAX];
|
|
struct rcu_head rcu_head;
|
|
char data[0];
|
|
};
|
|
|
|
#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
|
|
|
|
static int
|
|
nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
|
|
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
|
|
{
|
|
struct nf_acct *nfacct, *matching = NULL;
|
|
char *acct_name;
|
|
unsigned int size = 0;
|
|
u32 flags = 0;
|
|
|
|
if (!tb[NFACCT_NAME])
|
|
return -EINVAL;
|
|
|
|
acct_name = nla_data(tb[NFACCT_NAME]);
|
|
if (strlen(acct_name) == 0)
|
|
return -EINVAL;
|
|
|
|
list_for_each_entry(nfacct, &nfnl_acct_list, head) {
|
|
if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
|
|
continue;
|
|
|
|
if (nlh->nlmsg_flags & NLM_F_EXCL)
|
|
return -EEXIST;
|
|
|
|
matching = nfacct;
|
|
break;
|
|
}
|
|
|
|
if (matching) {
|
|
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
|
|
/* reset counters if you request a replacement. */
|
|
atomic64_set(&matching->pkts, 0);
|
|
atomic64_set(&matching->bytes, 0);
|
|
smp_mb__before_atomic();
|
|
/* reset overquota flag if quota is enabled. */
|
|
if ((matching->flags & NFACCT_F_QUOTA))
|
|
clear_bit(NFACCT_F_OVERQUOTA, &matching->flags);
|
|
return 0;
|
|
}
|
|
return -EBUSY;
|
|
}
|
|
|
|
if (tb[NFACCT_FLAGS]) {
|
|
flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS]));
|
|
if (flags & ~NFACCT_F_QUOTA)
|
|
return -EOPNOTSUPP;
|
|
if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA)
|
|
return -EINVAL;
|
|
if (flags & NFACCT_F_OVERQUOTA)
|
|
return -EINVAL;
|
|
|
|
size += sizeof(u64);
|
|
}
|
|
|
|
nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL);
|
|
if (nfacct == NULL)
|
|
return -ENOMEM;
|
|
|
|
if (flags & NFACCT_F_QUOTA) {
|
|
u64 *quota = (u64 *)nfacct->data;
|
|
|
|
*quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA]));
|
|
nfacct->flags = flags;
|
|
}
|
|
|
|
strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
|
|
|
|
if (tb[NFACCT_BYTES]) {
|
|
atomic64_set(&nfacct->bytes,
|
|
be64_to_cpu(nla_get_be64(tb[NFACCT_BYTES])));
|
|
}
|
|
if (tb[NFACCT_PKTS]) {
|
|
atomic64_set(&nfacct->pkts,
|
|
be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
|
|
}
|
|
atomic_set(&nfacct->refcnt, 1);
|
|
list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
|
|
int event, struct nf_acct *acct)
|
|
{
|
|
struct nlmsghdr *nlh;
|
|
struct nfgenmsg *nfmsg;
|
|
unsigned int flags = portid ? NLM_F_MULTI : 0;
|
|
u64 pkts, bytes;
|
|
|
|
event |= NFNL_SUBSYS_ACCT << 8;
|
|
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
|
|
if (nlh == NULL)
|
|
goto nlmsg_failure;
|
|
|
|
nfmsg = nlmsg_data(nlh);
|
|
nfmsg->nfgen_family = AF_UNSPEC;
|
|
nfmsg->version = NFNETLINK_V0;
|
|
nfmsg->res_id = 0;
|
|
|
|
if (nla_put_string(skb, NFACCT_NAME, acct->name))
|
|
goto nla_put_failure;
|
|
|
|
if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
|
|
pkts = atomic64_xchg(&acct->pkts, 0);
|
|
bytes = atomic64_xchg(&acct->bytes, 0);
|
|
smp_mb__before_atomic();
|
|
if (acct->flags & NFACCT_F_QUOTA)
|
|
clear_bit(NFACCT_F_OVERQUOTA, &acct->flags);
|
|
} else {
|
|
pkts = atomic64_read(&acct->pkts);
|
|
bytes = atomic64_read(&acct->bytes);
|
|
}
|
|
if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts)) ||
|
|
nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) ||
|
|
nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
|
|
goto nla_put_failure;
|
|
if (acct->flags & NFACCT_F_QUOTA) {
|
|
u64 *quota = (u64 *)acct->data;
|
|
|
|
if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) ||
|
|
nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota)))
|
|
goto nla_put_failure;
|
|
}
|
|
nlmsg_end(skb, nlh);
|
|
return skb->len;
|
|
|
|
nlmsg_failure:
|
|
nla_put_failure:
|
|
nlmsg_cancel(skb, nlh);
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
|
|
{
|
|
struct nf_acct *cur, *last;
|
|
|
|
if (cb->args[2])
|
|
return 0;
|
|
|
|
last = (struct nf_acct *)cb->args[1];
|
|
if (cb->args[1])
|
|
cb->args[1] = 0;
|
|
|
|
rcu_read_lock();
|
|
list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
|
|
if (last) {
|
|
if (cur != last)
|
|
continue;
|
|
|
|
last = NULL;
|
|
}
|
|
if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid,
|
|
cb->nlh->nlmsg_seq,
|
|
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
|
|
NFNL_MSG_ACCT_NEW, cur) < 0) {
|
|
cb->args[1] = (unsigned long)cur;
|
|
break;
|
|
}
|
|
}
|
|
if (!cb->args[1])
|
|
cb->args[2] = 1;
|
|
rcu_read_unlock();
|
|
return skb->len;
|
|
}
|
|
|
|
static int
|
|
nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
|
|
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
|
|
{
|
|
int ret = -ENOENT;
|
|
struct nf_acct *cur;
|
|
char *acct_name;
|
|
|
|
if (nlh->nlmsg_flags & NLM_F_DUMP) {
|
|
struct netlink_dump_control c = {
|
|
.dump = nfnl_acct_dump,
|
|
};
|
|
return netlink_dump_start(nfnl, skb, nlh, &c);
|
|
}
|
|
|
|
if (!tb[NFACCT_NAME])
|
|
return -EINVAL;
|
|
acct_name = nla_data(tb[NFACCT_NAME]);
|
|
|
|
list_for_each_entry(cur, &nfnl_acct_list, head) {
|
|
struct sk_buff *skb2;
|
|
|
|
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
|
|
continue;
|
|
|
|
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
|
if (skb2 == NULL) {
|
|
ret = -ENOMEM;
|
|
break;
|
|
}
|
|
|
|
ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid,
|
|
nlh->nlmsg_seq,
|
|
NFNL_MSG_TYPE(nlh->nlmsg_type),
|
|
NFNL_MSG_ACCT_NEW, cur);
|
|
if (ret <= 0) {
|
|
kfree_skb(skb2);
|
|
break;
|
|
}
|
|
ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
|
|
MSG_DONTWAIT);
|
|
if (ret > 0)
|
|
ret = 0;
|
|
|
|
/* this avoids a loop in nfnetlink. */
|
|
return ret == -EAGAIN ? -ENOBUFS : ret;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* try to delete object, fail if it is still in use. */
|
|
static int nfnl_acct_try_del(struct nf_acct *cur)
|
|
{
|
|
int ret = 0;
|
|
|
|
/* we want to avoid races with nfnl_acct_find_get. */
|
|
if (atomic_dec_and_test(&cur->refcnt)) {
|
|
/* We are protected by nfnl mutex. */
|
|
list_del_rcu(&cur->head);
|
|
kfree_rcu(cur, rcu_head);
|
|
} else {
|
|
/* still in use, restore reference counter. */
|
|
atomic_inc(&cur->refcnt);
|
|
ret = -EBUSY;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
|
|
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
|
|
{
|
|
char *acct_name;
|
|
struct nf_acct *cur;
|
|
int ret = -ENOENT;
|
|
|
|
if (!tb[NFACCT_NAME]) {
|
|
list_for_each_entry(cur, &nfnl_acct_list, head)
|
|
nfnl_acct_try_del(cur);
|
|
|
|
return 0;
|
|
}
|
|
acct_name = nla_data(tb[NFACCT_NAME]);
|
|
|
|
list_for_each_entry(cur, &nfnl_acct_list, head) {
|
|
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
|
|
continue;
|
|
|
|
ret = nfnl_acct_try_del(cur);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
|
|
[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
|
|
[NFACCT_BYTES] = { .type = NLA_U64 },
|
|
[NFACCT_PKTS] = { .type = NLA_U64 },
|
|
[NFACCT_FLAGS] = { .type = NLA_U32 },
|
|
[NFACCT_QUOTA] = { .type = NLA_U64 },
|
|
};
|
|
|
|
static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
|
|
[NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new,
|
|
.attr_count = NFACCT_MAX,
|
|
.policy = nfnl_acct_policy },
|
|
[NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get,
|
|
.attr_count = NFACCT_MAX,
|
|
.policy = nfnl_acct_policy },
|
|
[NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get,
|
|
.attr_count = NFACCT_MAX,
|
|
.policy = nfnl_acct_policy },
|
|
[NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del,
|
|
.attr_count = NFACCT_MAX,
|
|
.policy = nfnl_acct_policy },
|
|
};
|
|
|
|
static const struct nfnetlink_subsystem nfnl_acct_subsys = {
|
|
.name = "acct",
|
|
.subsys_id = NFNL_SUBSYS_ACCT,
|
|
.cb_count = NFNL_MSG_ACCT_MAX,
|
|
.cb = nfnl_acct_cb,
|
|
};
|
|
|
|
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
|
|
|
|
struct nf_acct *nfnl_acct_find_get(const char *acct_name)
|
|
{
|
|
struct nf_acct *cur, *acct = NULL;
|
|
|
|
rcu_read_lock();
|
|
list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
|
|
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
|
|
continue;
|
|
|
|
if (!try_module_get(THIS_MODULE))
|
|
goto err;
|
|
|
|
if (!atomic_inc_not_zero(&cur->refcnt)) {
|
|
module_put(THIS_MODULE);
|
|
goto err;
|
|
}
|
|
|
|
acct = cur;
|
|
break;
|
|
}
|
|
err:
|
|
rcu_read_unlock();
|
|
return acct;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
|
|
|
|
void nfnl_acct_put(struct nf_acct *acct)
|
|
{
|
|
atomic_dec(&acct->refcnt);
|
|
module_put(THIS_MODULE);
|
|
}
|
|
EXPORT_SYMBOL_GPL(nfnl_acct_put);
|
|
|
|
void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
|
|
{
|
|
atomic64_inc(&nfacct->pkts);
|
|
atomic64_add(skb->len, &nfacct->bytes);
|
|
}
|
|
EXPORT_SYMBOL_GPL(nfnl_acct_update);
|
|
|
|
static void nfnl_overquota_report(struct nf_acct *nfacct)
|
|
{
|
|
int ret;
|
|
struct sk_buff *skb;
|
|
|
|
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
|
|
if (skb == NULL)
|
|
return;
|
|
|
|
ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0,
|
|
nfacct);
|
|
if (ret <= 0) {
|
|
kfree_skb(skb);
|
|
return;
|
|
}
|
|
netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
|
|
GFP_ATOMIC);
|
|
}
|
|
|
|
int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
|
|
{
|
|
u64 now;
|
|
u64 *quota;
|
|
int ret = NFACCT_UNDERQUOTA;
|
|
|
|
/* no place here if we don't have a quota */
|
|
if (!(nfacct->flags & NFACCT_F_QUOTA))
|
|
return NFACCT_NO_QUOTA;
|
|
|
|
quota = (u64 *)nfacct->data;
|
|
now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ?
|
|
atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes);
|
|
|
|
ret = now > *quota;
|
|
|
|
if (now >= *quota &&
|
|
!test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) {
|
|
nfnl_overquota_report(nfacct);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
|
|
|
|
static int __init nfnl_acct_init(void)
|
|
{
|
|
int ret;
|
|
|
|
pr_info("nfnl_acct: registering with nfnetlink.\n");
|
|
ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
|
|
if (ret < 0) {
|
|
pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
|
|
goto err_out;
|
|
}
|
|
return 0;
|
|
err_out:
|
|
return ret;
|
|
}
|
|
|
|
static void __exit nfnl_acct_exit(void)
|
|
{
|
|
struct nf_acct *cur, *tmp;
|
|
|
|
pr_info("nfnl_acct: unregistering from nfnetlink.\n");
|
|
nfnetlink_subsys_unregister(&nfnl_acct_subsys);
|
|
|
|
list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) {
|
|
list_del_rcu(&cur->head);
|
|
/* We are sure that our objects have no clients at this point,
|
|
* it's safe to release them all without checking refcnt. */
|
|
kfree_rcu(cur, rcu_head);
|
|
}
|
|
}
|
|
|
|
module_init(nfnl_acct_init);
|
|
module_exit(nfnl_acct_exit);
|