filter: optimize sk_run_filter

Remove pc variable to avoid arithmetic to compute fentry at each filter
instruction. Jumps directly manipulate fentry pointer.

As the last instruction of filter[] is guaranteed to be a RETURN, and
all jumps are before the last instruction, we dont need to check filter
bounds (number of instructions in filter array) at each iteration, so we
remove it from sk_run_filter() params.

On x86_32 remove f_k var introduced in commit 57fe93b374
(filter: make sure filters dont read uninitialized memory)

Note : We could use a CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS in order to
avoid too many ifdefs in this code.

This helps compiler to use cpu registers to hold fentry and A
accumulator.

On x86_32, this saves 401 bytes, and more important, sk_run_filter()
runs much faster because less register pressure (One less conditional
branch per BPF instruction)

# size net/core/filter.o net/core/filter_pre.o
   text    data     bss     dec     hex filename
   2948       0       0    2948     b84 net/core/filter.o
   3349       0       0    3349     d15 net/core/filter_pre.o

on x86_64 :
# size net/core/filter.o net/core/filter_pre.o
   text    data     bss     dec     hex filename
   5173       0       0    5173    1435 net/core/filter.o
   5224       0       0    5224    1468 net/core/filter_pre.o

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2010-11-19 09:49:59 -08:00 committed by David S. Miller
parent 0a80410dc5
commit 93aaae2e01
6 changed files with 61 additions and 64 deletions

View File

@ -1147,15 +1147,14 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
} }
if (is->pass_filter if (is->pass_filter
&& sk_run_filter(skb, is->pass_filter, is->pass_len) == 0) { && sk_run_filter(skb, is->pass_filter) == 0) {
if (is->debug & 0x2) if (is->debug & 0x2)
printk(KERN_DEBUG "IPPP: inbound frame filtered.\n"); printk(KERN_DEBUG "IPPP: inbound frame filtered.\n");
kfree_skb(skb); kfree_skb(skb);
return; return;
} }
if (!(is->active_filter if (!(is->active_filter
&& sk_run_filter(skb, is->active_filter, && sk_run_filter(skb, is->active_filter) == 0)) {
is->active_len) == 0)) {
if (is->debug & 0x2) if (is->debug & 0x2)
printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n"); printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n");
lp->huptimer = 0; lp->huptimer = 0;
@ -1294,15 +1293,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev)
} }
if (ipt->pass_filter if (ipt->pass_filter
&& sk_run_filter(skb, ipt->pass_filter, ipt->pass_len) == 0) { && sk_run_filter(skb, ipt->pass_filter) == 0) {
if (ipt->debug & 0x4) if (ipt->debug & 0x4)
printk(KERN_DEBUG "IPPP: outbound frame filtered.\n"); printk(KERN_DEBUG "IPPP: outbound frame filtered.\n");
kfree_skb(skb); kfree_skb(skb);
goto unlock; goto unlock;
} }
if (!(ipt->active_filter if (!(ipt->active_filter
&& sk_run_filter(skb, ipt->active_filter, && sk_run_filter(skb, ipt->active_filter) == 0)) {
ipt->active_len) == 0)) {
if (ipt->debug & 0x4) if (ipt->debug & 0x4)
printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n"); printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n");
lp->huptimer = 0; lp->huptimer = 0;
@ -1492,9 +1490,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp)
} }
drop |= is->pass_filter drop |= is->pass_filter
&& sk_run_filter(skb, is->pass_filter, is->pass_len) == 0; && sk_run_filter(skb, is->pass_filter) == 0;
drop |= is->active_filter drop |= is->active_filter
&& sk_run_filter(skb, is->active_filter, is->active_len) == 0; && sk_run_filter(skb, is->active_filter) == 0;
skb_push(skb, IPPP_MAX_HEADER - 4); skb_push(skb, IPPP_MAX_HEADER - 4);
return drop; return drop;

View File

@ -1136,8 +1136,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
a four-byte PPP header on each packet */ a four-byte PPP header on each packet */
*skb_push(skb, 2) = 1; *skb_push(skb, 2) = 1;
if (ppp->pass_filter && if (ppp->pass_filter &&
sk_run_filter(skb, ppp->pass_filter, sk_run_filter(skb, ppp->pass_filter) == 0) {
ppp->pass_len) == 0) {
if (ppp->debug & 1) if (ppp->debug & 1)
printk(KERN_DEBUG "PPP: outbound frame not passed\n"); printk(KERN_DEBUG "PPP: outbound frame not passed\n");
kfree_skb(skb); kfree_skb(skb);
@ -1145,8 +1144,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
} }
/* if this packet passes the active filter, record the time */ /* if this packet passes the active filter, record the time */
if (!(ppp->active_filter && if (!(ppp->active_filter &&
sk_run_filter(skb, ppp->active_filter, sk_run_filter(skb, ppp->active_filter) == 0))
ppp->active_len) == 0))
ppp->last_xmit = jiffies; ppp->last_xmit = jiffies;
skb_pull(skb, 2); skb_pull(skb, 2);
#else #else
@ -1758,8 +1756,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
*skb_push(skb, 2) = 0; *skb_push(skb, 2) = 0;
if (ppp->pass_filter && if (ppp->pass_filter &&
sk_run_filter(skb, ppp->pass_filter, sk_run_filter(skb, ppp->pass_filter) == 0) {
ppp->pass_len) == 0) {
if (ppp->debug & 1) if (ppp->debug & 1)
printk(KERN_DEBUG "PPP: inbound frame " printk(KERN_DEBUG "PPP: inbound frame "
"not passed\n"); "not passed\n");
@ -1767,8 +1764,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
return; return;
} }
if (!(ppp->active_filter && if (!(ppp->active_filter &&
sk_run_filter(skb, ppp->active_filter, sk_run_filter(skb, ppp->active_filter) == 0))
ppp->active_len) == 0))
ppp->last_recv = jiffies; ppp->last_recv = jiffies;
__skb_pull(skb, 2); __skb_pull(skb, 2);
} else } else

View File

@ -147,7 +147,7 @@ struct sock;
extern int sk_filter(struct sock *sk, struct sk_buff *skb); extern int sk_filter(struct sock *sk, struct sk_buff *skb);
extern unsigned int sk_run_filter(struct sk_buff *skb, extern unsigned int sk_run_filter(struct sk_buff *skb,
struct sock_filter *filter, int flen); const struct sock_filter *filter);
extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
extern int sk_detach_filter(struct sock *sk); extern int sk_detach_filter(struct sock *sk);
extern int sk_chk_filter(struct sock_filter *filter, int flen); extern int sk_chk_filter(struct sock_filter *filter, int flen);

View File

@ -137,7 +137,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
rcu_read_lock_bh(); rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter); filter = rcu_dereference_bh(sk->sk_filter);
if (filter) { if (filter) {
unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len); unsigned int pkt_len = sk_run_filter(skb, filter->insns);
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
} }
@ -151,14 +151,15 @@ EXPORT_SYMBOL(sk_filter);
* sk_run_filter - run a filter on a socket * sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on * @skb: buffer to run the filter on
* @filter: filter to apply * @filter: filter to apply
* @flen: length of filter
* *
* Decode and apply filter instructions to the skb->data. * Decode and apply filter instructions to the skb->data.
* Return length to keep, 0 for none. skb is the data we are * Return length to keep, 0 for none. @skb is the data we are
* filtering, filter is the array of filter instructions, and * filtering, @filter is the array of filter instructions.
* len is the number of filter blocks in the array. * Because all jumps are guaranteed to be before last instruction,
* and last instruction guaranteed to be a RET, we dont need to check
* flen. (We used to pass to this function the length of filter)
*/ */
unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
{ {
void *ptr; void *ptr;
u32 A = 0; /* Accumulator */ u32 A = 0; /* Accumulator */
@ -167,34 +168,36 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
unsigned long memvalid = 0; unsigned long memvalid = 0;
u32 tmp; u32 tmp;
int k; int k;
int pc;
BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG); BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
/* /*
* Process array of filter instructions. * Process array of filter instructions.
*/ */
for (pc = 0; pc < flen; pc++) { for (;; fentry++) {
const struct sock_filter *fentry = &filter[pc]; #if defined(CONFIG_X86_32)
u32 f_k = fentry->k; #define K (fentry->k)
#else
const u32 K = fentry->k;
#endif
switch (fentry->code) { switch (fentry->code) {
case BPF_S_ALU_ADD_X: case BPF_S_ALU_ADD_X:
A += X; A += X;
continue; continue;
case BPF_S_ALU_ADD_K: case BPF_S_ALU_ADD_K:
A += f_k; A += K;
continue; continue;
case BPF_S_ALU_SUB_X: case BPF_S_ALU_SUB_X:
A -= X; A -= X;
continue; continue;
case BPF_S_ALU_SUB_K: case BPF_S_ALU_SUB_K:
A -= f_k; A -= K;
continue; continue;
case BPF_S_ALU_MUL_X: case BPF_S_ALU_MUL_X:
A *= X; A *= X;
continue; continue;
case BPF_S_ALU_MUL_K: case BPF_S_ALU_MUL_K:
A *= f_k; A *= K;
continue; continue;
case BPF_S_ALU_DIV_X: case BPF_S_ALU_DIV_X:
if (X == 0) if (X == 0)
@ -202,64 +205,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
A /= X; A /= X;
continue; continue;
case BPF_S_ALU_DIV_K: case BPF_S_ALU_DIV_K:
A /= f_k; A /= K;
continue; continue;
case BPF_S_ALU_AND_X: case BPF_S_ALU_AND_X:
A &= X; A &= X;
continue; continue;
case BPF_S_ALU_AND_K: case BPF_S_ALU_AND_K:
A &= f_k; A &= K;
continue; continue;
case BPF_S_ALU_OR_X: case BPF_S_ALU_OR_X:
A |= X; A |= X;
continue; continue;
case BPF_S_ALU_OR_K: case BPF_S_ALU_OR_K:
A |= f_k; A |= K;
continue; continue;
case BPF_S_ALU_LSH_X: case BPF_S_ALU_LSH_X:
A <<= X; A <<= X;
continue; continue;
case BPF_S_ALU_LSH_K: case BPF_S_ALU_LSH_K:
A <<= f_k; A <<= K;
continue; continue;
case BPF_S_ALU_RSH_X: case BPF_S_ALU_RSH_X:
A >>= X; A >>= X;
continue; continue;
case BPF_S_ALU_RSH_K: case BPF_S_ALU_RSH_K:
A >>= f_k; A >>= K;
continue; continue;
case BPF_S_ALU_NEG: case BPF_S_ALU_NEG:
A = -A; A = -A;
continue; continue;
case BPF_S_JMP_JA: case BPF_S_JMP_JA:
pc += f_k; fentry += K;
continue; continue;
case BPF_S_JMP_JGT_K: case BPF_S_JMP_JGT_K:
pc += (A > f_k) ? fentry->jt : fentry->jf; fentry += (A > K) ? fentry->jt : fentry->jf;
continue; continue;
case BPF_S_JMP_JGE_K: case BPF_S_JMP_JGE_K:
pc += (A >= f_k) ? fentry->jt : fentry->jf; fentry += (A >= K) ? fentry->jt : fentry->jf;
continue; continue;
case BPF_S_JMP_JEQ_K: case BPF_S_JMP_JEQ_K:
pc += (A == f_k) ? fentry->jt : fentry->jf; fentry += (A == K) ? fentry->jt : fentry->jf;
continue; continue;
case BPF_S_JMP_JSET_K: case BPF_S_JMP_JSET_K:
pc += (A & f_k) ? fentry->jt : fentry->jf; fentry += (A & K) ? fentry->jt : fentry->jf;
continue; continue;
case BPF_S_JMP_JGT_X: case BPF_S_JMP_JGT_X:
pc += (A > X) ? fentry->jt : fentry->jf; fentry += (A > X) ? fentry->jt : fentry->jf;
continue; continue;
case BPF_S_JMP_JGE_X: case BPF_S_JMP_JGE_X:
pc += (A >= X) ? fentry->jt : fentry->jf; fentry += (A >= X) ? fentry->jt : fentry->jf;
continue; continue;
case BPF_S_JMP_JEQ_X: case BPF_S_JMP_JEQ_X:
pc += (A == X) ? fentry->jt : fentry->jf; fentry += (A == X) ? fentry->jt : fentry->jf;
continue; continue;
case BPF_S_JMP_JSET_X: case BPF_S_JMP_JSET_X:
pc += (A & X) ? fentry->jt : fentry->jf; fentry += (A & X) ? fentry->jt : fentry->jf;
continue; continue;
case BPF_S_LD_W_ABS: case BPF_S_LD_W_ABS:
k = f_k; k = K;
load_w: load_w:
ptr = load_pointer(skb, k, 4, &tmp); ptr = load_pointer(skb, k, 4, &tmp);
if (ptr != NULL) { if (ptr != NULL) {
@ -268,7 +271,7 @@ load_w:
} }
break; break;
case BPF_S_LD_H_ABS: case BPF_S_LD_H_ABS:
k = f_k; k = K;
load_h: load_h:
ptr = load_pointer(skb, k, 2, &tmp); ptr = load_pointer(skb, k, 2, &tmp);
if (ptr != NULL) { if (ptr != NULL) {
@ -277,7 +280,7 @@ load_h:
} }
break; break;
case BPF_S_LD_B_ABS: case BPF_S_LD_B_ABS:
k = f_k; k = K;
load_b: load_b:
ptr = load_pointer(skb, k, 1, &tmp); ptr = load_pointer(skb, k, 1, &tmp);
if (ptr != NULL) { if (ptr != NULL) {
@ -292,34 +295,34 @@ load_b:
X = skb->len; X = skb->len;
continue; continue;
case BPF_S_LD_W_IND: case BPF_S_LD_W_IND:
k = X + f_k; k = X + K;
goto load_w; goto load_w;
case BPF_S_LD_H_IND: case BPF_S_LD_H_IND:
k = X + f_k; k = X + K;
goto load_h; goto load_h;
case BPF_S_LD_B_IND: case BPF_S_LD_B_IND:
k = X + f_k; k = X + K;
goto load_b; goto load_b;
case BPF_S_LDX_B_MSH: case BPF_S_LDX_B_MSH:
ptr = load_pointer(skb, f_k, 1, &tmp); ptr = load_pointer(skb, K, 1, &tmp);
if (ptr != NULL) { if (ptr != NULL) {
X = (*(u8 *)ptr & 0xf) << 2; X = (*(u8 *)ptr & 0xf) << 2;
continue; continue;
} }
return 0; return 0;
case BPF_S_LD_IMM: case BPF_S_LD_IMM:
A = f_k; A = K;
continue; continue;
case BPF_S_LDX_IMM: case BPF_S_LDX_IMM:
X = f_k; X = K;
continue; continue;
case BPF_S_LD_MEM: case BPF_S_LD_MEM:
A = (memvalid & (1UL << f_k)) ? A = (memvalid & (1UL << K)) ?
mem[f_k] : 0; mem[K] : 0;
continue; continue;
case BPF_S_LDX_MEM: case BPF_S_LDX_MEM:
X = (memvalid & (1UL << f_k)) ? X = (memvalid & (1UL << K)) ?
mem[f_k] : 0; mem[K] : 0;
continue; continue;
case BPF_S_MISC_TAX: case BPF_S_MISC_TAX:
X = A; X = A;
@ -328,16 +331,16 @@ load_b:
A = X; A = X;
continue; continue;
case BPF_S_RET_K: case BPF_S_RET_K:
return f_k; return K;
case BPF_S_RET_A: case BPF_S_RET_A:
return A; return A;
case BPF_S_ST: case BPF_S_ST:
memvalid |= 1UL << f_k; memvalid |= 1UL << K;
mem[f_k] = A; mem[K] = A;
continue; continue;
case BPF_S_STX: case BPF_S_STX:
memvalid |= 1UL << f_k; memvalid |= 1UL << K;
mem[f_k] = X; mem[K] = X;
continue; continue;
default: default:
WARN_ON(1); WARN_ON(1);

View File

@ -31,7 +31,7 @@ static unsigned int classify(struct sk_buff *skb)
if (likely(skb->dev && if (likely(skb->dev &&
skb->dev->phydev && skb->dev->phydev &&
skb->dev->phydev->drv)) skb->dev->phydev->drv))
return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter)); return sk_run_filter(skb, ptp_filter);
else else
return PTP_CLASS_NONE; return PTP_CLASS_NONE;
} }

View File

@ -519,7 +519,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
rcu_read_lock_bh(); rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter); filter = rcu_dereference_bh(sk->sk_filter);
if (filter != NULL) if (filter != NULL)
res = sk_run_filter(skb, filter->insns, filter->len); res = sk_run_filter(skb, filter->insns);
rcu_read_unlock_bh(); rcu_read_unlock_bh();
return res; return res;