From 7a92263705435d046d37a0990d0edfcb517f7ad3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 14 Dec 2009 14:52:10 +0100 Subject: [PATCH 1/5] netfilter: xtables: document minimal required version For both .33 and .32-stable. Signed-off-by: Jan Engelhardt Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- Documentation/Changes | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/Changes b/Documentation/Changes index 6d0f1efc5bf6..f08b313cd235 100644 --- a/Documentation/Changes +++ b/Documentation/Changes @@ -49,6 +49,8 @@ o oprofile 0.9 # oprofiled --version o udev 081 # udevinfo -V o grub 0.93 # grub --version o mcelog 0.6 +o iptables 1.4.1 # iptables -V + Kernel compilation ================== From 9abfe315de96aa5c9878b2f627542bc54901c6e9 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Mon, 14 Dec 2009 16:38:21 +0100 Subject: [PATCH 2/5] ipvs: fix synchronization on connection close commit 9d3a0de makes slaves expire as they would do on the master with much shorter timeouts. But it introduces another problem: When we close a connection, on master server the connection became CLOSE_WAIT/TIME_WAIT, it was synced to slaves, but if master is finished within it's timeouts (CLOSE), it will not be synced to slaves. Then slaves will be kept on CLOSE_WAIT/TIME_WAIT until timeout reaches. Thus we should also sync with CLOSE. Cc: Wensong Zhang Cc: Simon Horman Cc: Julian Anastasov Cc: David S. Miller Signed-off-by: Xiaotian Feng Acked-by: Simon Horman Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b95699f00545..847ffca40184 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1366,6 +1366,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, == sysctl_ip_vs_sync_threshold[0])) || ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && ((cp->state == IP_VS_TCP_S_FIN_WAIT) || + (cp->state == IP_VS_TCP_S_CLOSE) || (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || (cp->state == IP_VS_TCP_S_TIME_WAIT))))) ip_vs_sync_conn(cp); From 0b5ccb2ee250136dd7385b1c7da28417d0d4d32d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 15 Dec 2009 16:59:18 +0100 Subject: [PATCH 3/5] ipv6: reassembly: use seperate reassembly queues for conntrack and local delivery Currently the same reassembly queue might be used for packets reassembled by conntrack in different positions in the stack (PREROUTING/LOCAL_OUT), as well as local delivery. This can cause "packet jumps" when the fragment completing a reassembled packet is queued from a different position in the stack than the previous ones. Add a "user" identifier to the reassembly queue key to seperate the queues of each caller, similar to what we do for IPv4. Signed-off-by: Patrick McHardy --- include/net/ipv6.h | 7 +++++++ include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 13 +++++++++++-- net/ipv6/netfilter/nf_conntrack_reasm.c | 7 ++++--- net/ipv6/reassembly.c | 5 ++++- 5 files changed, 27 insertions(+), 7 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 92db8617d188..d6916035bcea 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -350,8 +350,15 @@ static inline int ipv6_prefix_equal(const struct in6_addr *a1, struct inet_frag_queue; +enum ip6_defrag_users { + IP6_DEFRAG_LOCAL_DELIVER, + IP6_DEFRAG_CONNTRACK_IN, + IP6_DEFRAG_CONNTRACK_OUT, +}; + struct ip6_create_arg { __be32 id; + u32 user; struct in6_addr *src; struct in6_addr *dst; }; diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index abc55ad75c2b..1ee717eb5b09 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -9,7 +9,7 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; extern int nf_ct_frag6_init(void); extern void nf_ct_frag6_cleanup(void); -extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb); +extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user); extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, struct net_device *in, struct net_device *out, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 5f2ec208a8c3..c0a82fe78321 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -187,6 +187,16 @@ static unsigned int ipv6_confirm(unsigned int hooknum, return nf_conntrack_confirm(skb); } +static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, + struct sk_buff *skb) +{ + if (hooknum == NF_INET_PRE_ROUTING) + return IP6_DEFRAG_CONNTRACK_IN; + else + return IP6_DEFRAG_CONNTRACK_OUT; + +} + static unsigned int ipv6_defrag(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, @@ -199,8 +209,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, if (skb->nfct) return NF_ACCEPT; - reasm = nf_ct_frag6_gather(skb); - + reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); /* queued */ if (reasm == NULL) return NF_STOLEN; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index e0b9424fa1b2..312c20adc83f 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -168,13 +168,14 @@ static void nf_ct_frag6_expire(unsigned long data) /* Creation primitives. */ static __inline__ struct nf_ct_frag6_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) +fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) { struct inet_frag_queue *q; struct ip6_create_arg arg; unsigned int hash; arg.id = id; + arg.user = user; arg.src = src; arg.dst = dst; @@ -559,7 +560,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) return 0; } -struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) +struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) { struct sk_buff *clone; struct net_device *dev = skb->dev; @@ -605,7 +606,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh) nf_ct_frag6_evictor(); - fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); + fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 4d98549a6868..3b3a95607125 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -72,6 +72,7 @@ struct frag_queue struct inet_frag_queue q; __be32 id; /* fragment id */ + u32 user; struct in6_addr saddr; struct in6_addr daddr; @@ -141,7 +142,7 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a) struct ip6_create_arg *arg = a; fq = container_of(q, struct frag_queue, q); - return (fq->id == arg->id && + return (fq->id == arg->id && fq->user == arg->user && ipv6_addr_equal(&fq->saddr, arg->src) && ipv6_addr_equal(&fq->daddr, arg->dst)); } @@ -163,6 +164,7 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) struct ip6_create_arg *arg = a; fq->id = arg->id; + fq->user = arg->user; ipv6_addr_copy(&fq->saddr, arg->src); ipv6_addr_copy(&fq->daddr, arg->dst); } @@ -243,6 +245,7 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, unsigned int hash; arg.id = id; + arg.user = IP6_DEFRAG_LOCAL_DELIVER; arg.src = src; arg.dst = dst; From 8fa9ff6849bb86c59cc2ea9faadf3cb2d5223497 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 15 Dec 2009 16:59:59 +0100 Subject: [PATCH 4/5] netfilter: fix crashes in bridge netfilter caused by fragment jumps When fragments from bridge netfilter are passed to IPv4 or IPv6 conntrack and a reassembly queue with the same fragment key already exists from reassembling a similar packet received on a different device (f.i. with multicasted fragments), the reassembled packet might continue on a different codepath than where the head fragment originated. This can cause crashes in bridge netfilter when a fragment received on a non-bridge device (and thus with skb->nf_bridge == NULL) continues through the bridge netfilter code. Add a new reassembly identifier for packets originating from bridge netfilter and use it to put those packets in insolated queues. Fixes http://bugzilla.kernel.org/show_bug.cgi?id=14805 Reported-and-Tested-by: Chong Qiao Signed-off-by: Patrick McHardy --- include/net/ip.h | 1 + include/net/ipv6.h | 1 + net/ipv4/netfilter/nf_defrag_ipv4.c | 21 +++++++++++++++---- .../netfilter/nf_conntrack_l3proto_ipv6.c | 6 ++++++ 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index e6b9d12d5f62..85108cfbb1ae 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -337,6 +337,7 @@ enum ip_defrag_users { IP_DEFRAG_CALL_RA_CHAIN, IP_DEFRAG_CONNTRACK_IN, IP_DEFRAG_CONNTRACK_OUT, + IP_DEFRAG_CONNTRACK_BRIDGE_IN, IP_DEFRAG_VS_IN, IP_DEFRAG_VS_OUT, IP_DEFRAG_VS_FWD diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d6916035bcea..ccab5946c830 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -354,6 +354,7 @@ enum ip6_defrag_users { IP6_DEFRAG_LOCAL_DELIVER, IP6_DEFRAG_CONNTRACK_IN, IP6_DEFRAG_CONNTRACK_OUT, + IP6_DEFRAG_CONNTRACK_BRIDGE_IN, }; struct ip6_create_arg { diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index fa2d6b6fc3e5..331ead3ebd1b 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -34,6 +35,20 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) return err; } +static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, + struct sk_buff *skb) +{ +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge && + skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) + return IP_DEFRAG_CONNTRACK_BRIDGE_IN; +#endif + if (hooknum == NF_INET_PRE_ROUTING) + return IP_DEFRAG_CONNTRACK_IN; + else + return IP_DEFRAG_CONNTRACK_OUT; +} + static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, @@ -50,10 +65,8 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #endif /* Gather fragments. */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (nf_ct_ipv4_gather_frags(skb, - hooknum == NF_INET_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT)) + enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); + if (nf_ct_ipv4_gather_frags(skb, user)) return NF_STOLEN; } return NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index c0a82fe78321..0956ebabbff2 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -190,6 +191,11 @@ static unsigned int ipv6_confirm(unsigned int hooknum, static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, struct sk_buff *skb) { +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge && + skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) + return IP6_DEFRAG_CONNTRACK_BRIDGE_IN; +#endif if (hooknum == NF_INET_PRE_ROUTING) return IP6_DEFRAG_CONNTRACK_IN; else From 258c889362aa95d0ab534b38ce8c15d3009705b1 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 15 Dec 2009 17:01:25 +0100 Subject: [PATCH 5/5] ipvs: zero usvc and udest Make sure that any otherwise uninitialised fields of usvc are zero. This has been obvserved to cause a problem whereby the port of fwmark services may end up as a non-zero value which causes scheduling of a destination server to fail for persisitent services. As observed by Deon van der Merwe . This fix suggested by Julian Anastasov . For good measure also zero udest. Cc: Deon van der Merwe Acked-by: Julian Anastasov Signed-off-by: Simon Horman Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e55a6861d26f..6bde12da2fe0 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2714,6 +2714,8 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) return -EINVAL; + memset(usvc, 0, sizeof(*usvc)); + usvc->af = nla_get_u16(nla_af); #ifdef CONFIG_IP_VS_IPV6 if (usvc->af != AF_INET && usvc->af != AF_INET6) @@ -2901,6 +2903,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, if (!(nla_addr && nla_port)) return -EINVAL; + memset(udest, 0, sizeof(*udest)); + nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); udest->port = nla_get_u16(nla_port);