linux-stable/net/core/filter.c
Tejun Heo 5a0e3ad6af include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files.  percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.

percpu.h -> slab.h dependency is about to be removed.  Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability.  As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.

  http://userweb.kernel.org/~tj/misc/slabh-sweep.py

The script does the followings.

* Scan files for gfp and slab usages and update includes such that
  only the necessary includes are there.  ie. if only gfp is used,
  gfp.h, if slab is used, slab.h.

* When the script inserts a new include, it looks at the include
  blocks and try to put the new include such that its order conforms
  to its surrounding.  It's put in the include block which contains
  core kernel includes, in the same order that the rest are ordered -
  alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
  doesn't seem to be any matching order.

* If the script can't find a place to put a new include (mostly
  because the file doesn't have fitting include block), it prints out
  an error message indicating which .h file needs to be added to the
  file.

The conversion was done in the following steps.

1. The initial automatic conversion of all .c files updated slightly
   over 4000 files, deleting around 700 includes and adding ~480 gfp.h
   and ~3000 slab.h inclusions.  The script emitted errors for ~400
   files.

2. Each error was manually checked.  Some didn't need the inclusion,
   some needed manual addition while adding it to implementation .h or
   embedding .c file was more appropriate for others.  This step added
   inclusions to around 150 files.

3. The script was run again and the output was compared to the edits
   from #2 to make sure no file was left behind.

4. Several build tests were done and a couple of problems were fixed.
   e.g. lib/decompress_*.c used malloc/free() wrappers around slab
   APIs requiring slab.h to be added manually.

5. The script was run on all .h files but without automatically
   editing them as sprinkling gfp.h and slab.h inclusions around .h
   files could easily lead to inclusion dependency hell.  Most gfp.h
   inclusion directives were ignored as stuff from gfp.h was usually
   wildly available and often used in preprocessor macros.  Each
   slab.h inclusion directive was examined and added manually as
   necessary.

6. percpu.h was updated not to include slab.h.

7. Build test were done on the following configurations and failures
   were fixed.  CONFIG_GCOV_KERNEL was turned off for all tests (as my
   distributed build env didn't work with gcov compiles) and a few
   more options had to be turned off depending on archs to make things
   build (like ipr on powerpc/64 which failed due to missing writeq).

   * x86 and x86_64 UP and SMP allmodconfig and a custom test config.
   * powerpc and powerpc64 SMP allmodconfig
   * sparc and sparc64 SMP allmodconfig
   * ia64 SMP allmodconfig
   * s390 SMP allmodconfig
   * alpha SMP allmodconfig
   * um on x86_64 SMP allmodconfig

8. percpu.h modifications were reverted so that it could be applied as
   a separate patch and serve as bisection point.

Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-30 22:02:32 +09:00

550 lines
12 KiB
C

/*
* Linux Socket Filter - Kernel level socket filtering
*
* Author:
* Jay Schulist <jschlst@samba.org>
*
* Based on the design of:
* - The Berkeley Packet Filter
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Andi Kleen - Fix a few bad bugs and races.
* Kris Katterjohn - Added many additional checks in sk_chk_filter()
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <linux/gfp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/netlink.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <linux/filter.h>
/* No hurry in this branch */
static void *__load_pointer(struct sk_buff *skb, int k)
{
u8 *ptr = NULL;
if (k >= SKF_NET_OFF)
ptr = skb_network_header(skb) + k - SKF_NET_OFF;
else if (k >= SKF_LL_OFF)
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
return ptr;
return NULL;
}
static inline void *load_pointer(struct sk_buff *skb, int k,
unsigned int size, void *buffer)
{
if (k >= 0)
return skb_header_pointer(skb, k, size, buffer);
else {
if (k >= SKF_AD_OFF)
return NULL;
return __load_pointer(skb, k);
}
}
/**
* sk_filter - run a packet through a socket filter
* @sk: sock associated with &sk_buff
* @skb: buffer to filter
*
* Run the filter code and then cut skb->data to correct size returned by
* sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
* than pkt_len we keep whole skb->data. This is the socket level
* wrapper to sk_run_filter. It returns 0 if the packet should
* be accepted or -EPERM if the packet should be tossed.
*
*/
int sk_filter(struct sock *sk, struct sk_buff *skb)
{
int err;
struct sk_filter *filter;
err = security_sock_rcv_skb(sk, skb);
if (err)
return err;
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter) {
unsigned int pkt_len = sk_run_filter(skb, filter->insns,
filter->len);
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
rcu_read_unlock_bh();
return err;
}
EXPORT_SYMBOL(sk_filter);
/**
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
* @filter: filter to apply
* @flen: length of filter
*
* Decode and apply filter instructions to the skb->data.
* Return length to keep, 0 for none. skb is the data we are
* filtering, filter is the array of filter instructions, and
* len is the number of filter blocks in the array.
*/
unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
{
struct sock_filter *fentry; /* We walk down these */
void *ptr;
u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
u32 tmp;
int k;
int pc;
/*
* Process array of filter instructions.
*/
for (pc = 0; pc < flen; pc++) {
fentry = &filter[pc];
switch (fentry->code) {
case BPF_ALU|BPF_ADD|BPF_X:
A += X;
continue;
case BPF_ALU|BPF_ADD|BPF_K:
A += fentry->k;
continue;
case BPF_ALU|BPF_SUB|BPF_X:
A -= X;
continue;
case BPF_ALU|BPF_SUB|BPF_K:
A -= fentry->k;
continue;
case BPF_ALU|BPF_MUL|BPF_X:
A *= X;
continue;
case BPF_ALU|BPF_MUL|BPF_K:
A *= fentry->k;
continue;
case BPF_ALU|BPF_DIV|BPF_X:
if (X == 0)
return 0;
A /= X;
continue;
case BPF_ALU|BPF_DIV|BPF_K:
A /= fentry->k;
continue;
case BPF_ALU|BPF_AND|BPF_X:
A &= X;
continue;
case BPF_ALU|BPF_AND|BPF_K:
A &= fentry->k;
continue;
case BPF_ALU|BPF_OR|BPF_X:
A |= X;
continue;
case BPF_ALU|BPF_OR|BPF_K:
A |= fentry->k;
continue;
case BPF_ALU|BPF_LSH|BPF_X:
A <<= X;
continue;
case BPF_ALU|BPF_LSH|BPF_K:
A <<= fentry->k;
continue;
case BPF_ALU|BPF_RSH|BPF_X:
A >>= X;
continue;
case BPF_ALU|BPF_RSH|BPF_K:
A >>= fentry->k;
continue;
case BPF_ALU|BPF_NEG:
A = -A;
continue;
case BPF_JMP|BPF_JA:
pc += fentry->k;
continue;
case BPF_JMP|BPF_JGT|BPF_K:
pc += (A > fentry->k) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JGE|BPF_K:
pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JEQ|BPF_K:
pc += (A == fentry->k) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JSET|BPF_K:
pc += (A & fentry->k) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JGT|BPF_X:
pc += (A > X) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JGE|BPF_X:
pc += (A >= X) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JEQ|BPF_X:
pc += (A == X) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JSET|BPF_X:
pc += (A & X) ? fentry->jt : fentry->jf;
continue;
case BPF_LD|BPF_W|BPF_ABS:
k = fentry->k;
load_w:
ptr = load_pointer(skb, k, 4, &tmp);
if (ptr != NULL) {
A = get_unaligned_be32(ptr);
continue;
}
break;
case BPF_LD|BPF_H|BPF_ABS:
k = fentry->k;
load_h:
ptr = load_pointer(skb, k, 2, &tmp);
if (ptr != NULL) {
A = get_unaligned_be16(ptr);
continue;
}
break;
case BPF_LD|BPF_B|BPF_ABS:
k = fentry->k;
load_b:
ptr = load_pointer(skb, k, 1, &tmp);
if (ptr != NULL) {
A = *(u8 *)ptr;
continue;
}
break;
case BPF_LD|BPF_W|BPF_LEN:
A = skb->len;
continue;
case BPF_LDX|BPF_W|BPF_LEN:
X = skb->len;
continue;
case BPF_LD|BPF_W|BPF_IND:
k = X + fentry->k;
goto load_w;
case BPF_LD|BPF_H|BPF_IND:
k = X + fentry->k;
goto load_h;
case BPF_LD|BPF_B|BPF_IND:
k = X + fentry->k;
goto load_b;
case BPF_LDX|BPF_B|BPF_MSH:
ptr = load_pointer(skb, fentry->k, 1, &tmp);
if (ptr != NULL) {
X = (*(u8 *)ptr & 0xf) << 2;
continue;
}
return 0;
case BPF_LD|BPF_IMM:
A = fentry->k;
continue;
case BPF_LDX|BPF_IMM:
X = fentry->k;
continue;
case BPF_LD|BPF_MEM:
A = mem[fentry->k];
continue;
case BPF_LDX|BPF_MEM:
X = mem[fentry->k];
continue;
case BPF_MISC|BPF_TAX:
X = A;
continue;
case BPF_MISC|BPF_TXA:
A = X;
continue;
case BPF_RET|BPF_K:
return fentry->k;
case BPF_RET|BPF_A:
return A;
case BPF_ST:
mem[fentry->k] = A;
continue;
case BPF_STX:
mem[fentry->k] = X;
continue;
default:
WARN_ON(1);
return 0;
}
/*
* Handle ancillary data, which are impossible
* (or very difficult) to get parsing packet contents.
*/
switch (k-SKF_AD_OFF) {
case SKF_AD_PROTOCOL:
A = ntohs(skb->protocol);
continue;
case SKF_AD_PKTTYPE:
A = skb->pkt_type;
continue;
case SKF_AD_IFINDEX:
A = skb->dev->ifindex;
continue;
case SKF_AD_MARK:
A = skb->mark;
continue;
case SKF_AD_QUEUE:
A = skb->queue_mapping;
continue;
case SKF_AD_NLATTR: {
struct nlattr *nla;
if (skb_is_nonlinear(skb))
return 0;
if (A > skb->len - sizeof(struct nlattr))
return 0;
nla = nla_find((struct nlattr *)&skb->data[A],
skb->len - A, X);
if (nla)
A = (void *)nla - (void *)skb->data;
else
A = 0;
continue;
}
case SKF_AD_NLATTR_NEST: {
struct nlattr *nla;
if (skb_is_nonlinear(skb))
return 0;
if (A > skb->len - sizeof(struct nlattr))
return 0;
nla = (struct nlattr *)&skb->data[A];
if (nla->nla_len > A - skb->len)
return 0;
nla = nla_find_nested(nla, X);
if (nla)
A = (void *)nla - (void *)skb->data;
else
A = 0;
continue;
}
default:
return 0;
}
}
return 0;
}
EXPORT_SYMBOL(sk_run_filter);
/**
* sk_chk_filter - verify socket filter code
* @filter: filter to verify
* @flen: length of filter
*
* Check the user's filter code. If we let some ugly
* filter code slip through kaboom! The filter must contain
* no references or jumps that are out of range, no illegal
* instructions, and must end with a RET instruction.
*
* All jumps are forward as they are not signed.
*
* Returns 0 if the rule set is legal or -EINVAL if not.
*/
int sk_chk_filter(struct sock_filter *filter, int flen)
{
struct sock_filter *ftest;
int pc;
if (flen == 0 || flen > BPF_MAXINSNS)
return -EINVAL;
/* check the filter code now */
for (pc = 0; pc < flen; pc++) {
ftest = &filter[pc];
/* Only allow valid instructions */
switch (ftest->code) {
case BPF_ALU|BPF_ADD|BPF_K:
case BPF_ALU|BPF_ADD|BPF_X:
case BPF_ALU|BPF_SUB|BPF_K:
case BPF_ALU|BPF_SUB|BPF_X:
case BPF_ALU|BPF_MUL|BPF_K:
case BPF_ALU|BPF_MUL|BPF_X:
case BPF_ALU|BPF_DIV|BPF_X:
case BPF_ALU|BPF_AND|BPF_K:
case BPF_ALU|BPF_AND|BPF_X:
case BPF_ALU|BPF_OR|BPF_K:
case BPF_ALU|BPF_OR|BPF_X:
case BPF_ALU|BPF_LSH|BPF_K:
case BPF_ALU|BPF_LSH|BPF_X:
case BPF_ALU|BPF_RSH|BPF_K:
case BPF_ALU|BPF_RSH|BPF_X:
case BPF_ALU|BPF_NEG:
case BPF_LD|BPF_W|BPF_ABS:
case BPF_LD|BPF_H|BPF_ABS:
case BPF_LD|BPF_B|BPF_ABS:
case BPF_LD|BPF_W|BPF_LEN:
case BPF_LD|BPF_W|BPF_IND:
case BPF_LD|BPF_H|BPF_IND:
case BPF_LD|BPF_B|BPF_IND:
case BPF_LD|BPF_IMM:
case BPF_LDX|BPF_W|BPF_LEN:
case BPF_LDX|BPF_B|BPF_MSH:
case BPF_LDX|BPF_IMM:
case BPF_MISC|BPF_TAX:
case BPF_MISC|BPF_TXA:
case BPF_RET|BPF_K:
case BPF_RET|BPF_A:
break;
/* Some instructions need special checks */
case BPF_ALU|BPF_DIV|BPF_K:
/* check for division by zero */
if (ftest->k == 0)
return -EINVAL;
break;
case BPF_LD|BPF_MEM:
case BPF_LDX|BPF_MEM:
case BPF_ST:
case BPF_STX:
/* check for invalid memory addresses */
if (ftest->k >= BPF_MEMWORDS)
return -EINVAL;
break;
case BPF_JMP|BPF_JA:
/*
* Note, the large ftest->k might cause loops.
* Compare this with conditional jumps below,
* where offsets are limited. --ANK (981016)
*/
if (ftest->k >= (unsigned)(flen-pc-1))
return -EINVAL;
break;
case BPF_JMP|BPF_JEQ|BPF_K:
case BPF_JMP|BPF_JEQ|BPF_X:
case BPF_JMP|BPF_JGE|BPF_K:
case BPF_JMP|BPF_JGE|BPF_X:
case BPF_JMP|BPF_JGT|BPF_K:
case BPF_JMP|BPF_JGT|BPF_X:
case BPF_JMP|BPF_JSET|BPF_K:
case BPF_JMP|BPF_JSET|BPF_X:
/* for conditionals both must be safe */
if (pc + ftest->jt + 1 >= flen ||
pc + ftest->jf + 1 >= flen)
return -EINVAL;
break;
default:
return -EINVAL;
}
}
return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
}
EXPORT_SYMBOL(sk_chk_filter);
/**
* sk_filter_rcu_release: Release a socket filter by rcu_head
* @rcu: rcu_head that contains the sk_filter to free
*/
static void sk_filter_rcu_release(struct rcu_head *rcu)
{
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
sk_filter_release(fp);
}
static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
{
unsigned int size = sk_filter_len(fp);
atomic_sub(size, &sk->sk_omem_alloc);
call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
}
/**
* sk_attach_filter - attach a socket filter
* @fprog: the filter program
* @sk: the socket to use
*
* Attach the user's filter code. We first run some sanity checks on
* it to make sure it does not explode on us later. If an error
* occurs or there is insufficient memory for the filter a negative
* errno code is returned. On success the return is zero.
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct sk_filter *fp, *old_fp;
unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
int err;
/* Make sure new filter is there and in the right amounts. */
if (fprog->filter == NULL)
return -EINVAL;
fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
if (!fp)
return -ENOMEM;
if (copy_from_user(fp->insns, fprog->filter, fsize)) {
sock_kfree_s(sk, fp, fsize+sizeof(*fp));
return -EFAULT;
}
atomic_set(&fp->refcnt, 1);
fp->len = fprog->len;
err = sk_chk_filter(fp->insns, fp->len);
if (err) {
sk_filter_uncharge(sk, fp);
return err;
}
rcu_read_lock_bh();
old_fp = rcu_dereference_bh(sk->sk_filter);
rcu_assign_pointer(sk->sk_filter, fp);
rcu_read_unlock_bh();
if (old_fp)
sk_filter_delayed_uncharge(sk, old_fp);
return 0;
}
EXPORT_SYMBOL_GPL(sk_attach_filter);
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
struct sk_filter *filter;
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter) {
rcu_assign_pointer(sk->sk_filter, NULL);
sk_filter_delayed_uncharge(sk, filter);
ret = 0;
}
rcu_read_unlock_bh();
return ret;
}
EXPORT_SYMBOL_GPL(sk_detach_filter);