linux-stable/net/ipv4/netfilter/nf_reject_ipv4.c
Florian Westphal 2954fe60e3 netfilter: let reset rules clean out conntrack entries
iptables/nftables support responding to tcp packets with tcp resets.

The generated tcp reset packet passes through both output and postrouting
netfilter hooks, but conntrack will never see them because the generated
skb has its ->nfct pointer copied over from the packet that triggered the
reset rule.

If the reset rule is used for established connections, this
may result in the conntrack entry to be around for a very long
time (default timeout is 5 days).

One way to avoid this would be to not copy the nf_conn pointer
so that the rest packet passes through conntrack too.

Problem is that output rules might not have the same conntrack
zone setup as the prerouting ones, so its possible that the
reset skb won't find the correct entry.  Generating a template
entry for the skb seems error prone as well.

Add an explicit "closing" function that switches a confirmed
conntrack entry to closed state and wire this up for tcp.

If the entry isn't confirmed, no action is needed because
the conntrack entry will never be committed to the table.

Reported-by: Russel King <linux@armlinux.org.uk>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2023-02-17 13:04:56 +01:00

338 lines
8.2 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*/
#include <linux/module.h>
#include <net/ip.h>
#include <net/tcp.h>
#include <net/route.h>
#include <net/dst.h>
#include <net/netfilter/ipv4/nf_reject.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
static int nf_reject_iphdr_validate(struct sk_buff *skb)
{
struct iphdr *iph;
u32 len;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
return 0;
iph = ip_hdr(skb);
if (iph->ihl < 5 || iph->version != 4)
return 0;
len = ntohs(iph->tot_len);
if (skb->len < len)
return 0;
else if (len < (iph->ihl*4))
return 0;
if (!pskb_may_pull(skb, iph->ihl*4))
return 0;
return 1;
}
struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net,
struct sk_buff *oldskb,
const struct net_device *dev,
int hook)
{
const struct tcphdr *oth;
struct sk_buff *nskb;
struct iphdr *niph;
struct tcphdr _oth;
if (!nf_reject_iphdr_validate(oldskb))
return NULL;
oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook);
if (!oth)
return NULL;
nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
LL_MAX_HEADER, GFP_ATOMIC);
if (!nskb)
return NULL;
nskb->dev = (struct net_device *)dev;
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
return nskb;
}
EXPORT_SYMBOL_GPL(nf_reject_skb_v4_tcp_reset);
struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
struct sk_buff *oldskb,
const struct net_device *dev,
int hook, u8 code)
{
struct sk_buff *nskb;
struct iphdr *niph;
struct icmphdr *icmph;
unsigned int len;
int dataoff;
__wsum csum;
u8 proto;
if (!nf_reject_iphdr_validate(oldskb))
return NULL;
/* IP header checks: fragment. */
if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
return NULL;
/* RFC says return as much as we can without exceeding 576 bytes. */
len = min_t(unsigned int, 536, oldskb->len);
if (!pskb_may_pull(oldskb, len))
return NULL;
if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len)))
return NULL;
dataoff = ip_hdrlen(oldskb);
proto = ip_hdr(oldskb)->protocol;
if (!skb_csum_unnecessary(oldskb) &&
nf_reject_verify_csum(oldskb, dataoff, proto) &&
nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto))
return NULL;
nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) +
LL_MAX_HEADER + len, GFP_ATOMIC);
if (!nskb)
return NULL;
nskb->dev = (struct net_device *)dev;
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP,
READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
skb_reset_transport_header(nskb);
icmph = skb_put_zero(nskb, sizeof(struct icmphdr));
icmph->type = ICMP_DEST_UNREACH;
icmph->code = code;
skb_put_data(nskb, skb_network_header(oldskb), len);
csum = csum_partial((void *)icmph, len + sizeof(struct icmphdr), 0);
icmph->checksum = csum_fold(csum);
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
return nskb;
}
EXPORT_SYMBOL_GPL(nf_reject_skb_v4_unreach);
const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
struct tcphdr *_oth, int hook)
{
const struct tcphdr *oth;
/* IP header checks: fragment. */
if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
return NULL;
if (ip_hdr(oldskb)->protocol != IPPROTO_TCP)
return NULL;
oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
sizeof(struct tcphdr), _oth);
if (oth == NULL)
return NULL;
/* No RST for RST. */
if (oth->rst)
return NULL;
/* Check checksum */
if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
return NULL;
return oth;
}
EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get);
struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
const struct sk_buff *oldskb,
__u8 protocol, int ttl)
{
struct iphdr *niph, *oiph = ip_hdr(oldskb);
skb_reset_network_header(nskb);
niph = skb_put(nskb, sizeof(struct iphdr));
niph->version = 4;
niph->ihl = sizeof(struct iphdr) / 4;
niph->tos = 0;
niph->id = 0;
niph->frag_off = htons(IP_DF);
niph->protocol = protocol;
niph->check = 0;
niph->saddr = oiph->daddr;
niph->daddr = oiph->saddr;
niph->ttl = ttl;
nskb->protocol = htons(ETH_P_IP);
return niph;
}
EXPORT_SYMBOL_GPL(nf_reject_iphdr_put);
void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
const struct tcphdr *oth)
{
struct iphdr *niph = ip_hdr(nskb);
struct tcphdr *tcph;
skb_reset_transport_header(nskb);
tcph = skb_put_zero(nskb, sizeof(struct tcphdr));
tcph->source = oth->dest;
tcph->dest = oth->source;
tcph->doff = sizeof(struct tcphdr) / 4;
if (oth->ack) {
tcph->seq = oth->ack_seq;
} else {
tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
oldskb->len - ip_hdrlen(oldskb) -
(oth->doff << 2));
tcph->ack = 1;
}
tcph->rst = 1;
tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
niph->daddr, 0);
nskb->ip_summed = CHECKSUM_PARTIAL;
nskb->csum_start = (unsigned char *)tcph - nskb->head;
nskb->csum_offset = offsetof(struct tcphdr, check);
}
EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
static int nf_reject_fill_skb_dst(struct sk_buff *skb_in)
{
struct dst_entry *dst = NULL;
struct flowi fl;
memset(&fl, 0, sizeof(struct flowi));
fl.u.ip4.daddr = ip_hdr(skb_in)->saddr;
nf_ip_route(dev_net(skb_in->dev), &dst, &fl, false);
if (!dst)
return -1;
skb_dst_set(skb_in, dst);
return 0;
}
/* Send RST reply */
void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
int hook)
{
struct net_device *br_indev __maybe_unused;
struct sk_buff *nskb;
struct iphdr *niph;
const struct tcphdr *oth;
struct tcphdr _oth;
oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook);
if (!oth)
return;
if ((hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) &&
nf_reject_fill_skb_dst(oldskb) < 0)
return;
if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
return;
nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
LL_MAX_HEADER, GFP_ATOMIC);
if (!nskb)
return;
/* ip_route_me_harder expects skb->dst to be set */
skb_dst_set_noref(nskb, skb_dst(oldskb));
nskb->mark = IP4_REPLY_MARK(net, oldskb->mark);
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
ip4_dst_hoplimit(skb_dst(nskb)));
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
if (ip_route_me_harder(net, sk, nskb, RTN_UNSPEC))
goto free_nskb;
niph = ip_hdr(nskb);
/* "Never happens" */
if (nskb->len > dst_mtu(skb_dst(nskb)))
goto free_nskb;
nf_ct_attach(nskb, oldskb);
nf_ct_set_closing(skb_nfct(oldskb));
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* If we use ip_local_out for bridged traffic, the MAC source on
* the RST will be ours, instead of the destination's. This confuses
* some routers/firewalls, and they drop the packet. So we need to
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
br_indev = nf_bridge_get_physindev(oldskb);
if (br_indev) {
struct ethhdr *oeth = eth_hdr(oldskb);
nskb->dev = br_indev;
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
oeth->h_source, oeth->h_dest, nskb->len) < 0)
goto free_nskb;
dev_queue_xmit(nskb);
} else
#endif
ip_local_out(net, nskb->sk, nskb);
return;
free_nskb:
kfree_skb(nskb);
}
EXPORT_SYMBOL_GPL(nf_send_reset);
void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
{
struct iphdr *iph = ip_hdr(skb_in);
int dataoff = ip_hdrlen(skb_in);
u8 proto = iph->protocol;
if (iph->frag_off & htons(IP_OFFSET))
return;
if ((hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) &&
nf_reject_fill_skb_dst(skb_in) < 0)
return;
if (skb_csum_unnecessary(skb_in) ||
!nf_reject_verify_csum(skb_in, dataoff, proto)) {
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
return;
}
if (nf_ip_checksum(skb_in, hook, dataoff, proto) == 0)
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
}
EXPORT_SYMBOL_GPL(nf_send_unreach);
MODULE_LICENSE("GPL");