mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-13 14:14:37 +00:00
48cac18ecf
Andrey reported a use-after-free in IPv6 stack.
Issue here is that we free the socket while it still has skb
in TX path and in some queues.
It happens here because IPv6 reassembly unit messes skb->truesize,
breaking skb_set_owner_w() badly.
We fixed a similar issue for IPV4 in commit 8282f27449
("inet: frag:
Always orphan skbs inside ip_defrag()")
Acked-by: Joe Stringer <joe@ovn.org>
==================================================================
BUG: KASAN: use-after-free in sock_wfree+0x118/0x120
Read of size 8 at addr ffff880062da0060 by task a.out/4140
page:ffffea00018b6800 count:1 mapcount:0 mapping: (null)
index:0x0 compound_mapcount: 0
flags: 0x100000000008100(slab|head)
raw: 0100000000008100 0000000000000000 0000000000000000 0000000180130013
raw: dead000000000100 dead000000000200 ffff88006741f140 0000000000000000
page dumped because: kasan: bad access detected
CPU: 0 PID: 4140 Comm: a.out Not tainted 4.10.0-rc3+ #59
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:15
dump_stack+0x292/0x398 lib/dump_stack.c:51
describe_address mm/kasan/report.c:262
kasan_report_error+0x121/0x560 mm/kasan/report.c:370
kasan_report mm/kasan/report.c:392
__asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:413
sock_flag ./arch/x86/include/asm/bitops.h:324
sock_wfree+0x118/0x120 net/core/sock.c:1631
skb_release_head_state+0xfc/0x250 net/core/skbuff.c:655
skb_release_all+0x15/0x60 net/core/skbuff.c:668
__kfree_skb+0x15/0x20 net/core/skbuff.c:684
kfree_skb+0x16e/0x4e0 net/core/skbuff.c:705
inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304
inet_frag_put ./include/net/inet_frag.h:133
nf_ct_frag6_gather+0x1125/0x38b0 net/ipv6/netfilter/nf_conntrack_reasm.c:617
ipv6_defrag+0x21b/0x350 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68
nf_hook_entry_hookfn ./include/linux/netfilter.h:102
nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310
nf_hook ./include/linux/netfilter.h:212
__ip6_local_out+0x52c/0xaf0 net/ipv6/output_core.c:160
ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170
ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722
ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742
rawv6_push_pending_frames net/ipv6/raw.c:613
rawv6_sendmsg+0x2cff/0x4130 net/ipv6/raw.c:927
inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
sock_sendmsg_nosec net/socket.c:635
sock_sendmsg+0xca/0x110 net/socket.c:645
sock_write_iter+0x326/0x620 net/socket.c:848
new_sync_write fs/read_write.c:499
__vfs_write+0x483/0x760 fs/read_write.c:512
vfs_write+0x187/0x530 fs/read_write.c:560
SYSC_write fs/read_write.c:607
SyS_write+0xfb/0x230 fs/read_write.c:599
entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203
RIP: 0033:0x7ff26e6f5b79
RSP: 002b:00007ff268e0ed98 EFLAGS: 00000206 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007ff268e0f9c0 RCX: 00007ff26e6f5b79
RDX: 0000000000000010 RSI: 0000000020f50fe1 RDI: 0000000000000003
RBP: 00007ff26ebc1220 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000
R13: 00007ff268e0f9c0 R14: 00007ff26efec040 R15: 0000000000000003
The buggy address belongs to the object at ffff880062da0000
which belongs to the cache RAWv6 of size 1504
The buggy address ffff880062da0060 is located 96 bytes inside
of 1504-byte region [ffff880062da0000, ffff880062da05e0)
Freed by task 4113:
save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
save_stack+0x43/0xd0 mm/kasan/kasan.c:502
set_track mm/kasan/kasan.c:514
kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578
slab_free_hook mm/slub.c:1352
slab_free_freelist_hook mm/slub.c:1374
slab_free mm/slub.c:2951
kmem_cache_free+0xb2/0x2c0 mm/slub.c:2973
sk_prot_free net/core/sock.c:1377
__sk_destruct+0x49c/0x6e0 net/core/sock.c:1452
sk_destruct+0x47/0x80 net/core/sock.c:1460
__sk_free+0x57/0x230 net/core/sock.c:1468
sk_free+0x23/0x30 net/core/sock.c:1479
sock_put ./include/net/sock.h:1638
sk_common_release+0x31e/0x4e0 net/core/sock.c:2782
rawv6_close+0x54/0x80 net/ipv6/raw.c:1214
inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425
inet6_release+0x50/0x70 net/ipv6/af_inet6.c:431
sock_release+0x8d/0x1e0 net/socket.c:599
sock_close+0x16/0x20 net/socket.c:1063
__fput+0x332/0x7f0 fs/file_table.c:208
____fput+0x15/0x20 fs/file_table.c:244
task_work_run+0x19b/0x270 kernel/task_work.c:116
exit_task_work ./include/linux/task_work.h:21
do_exit+0x186b/0x2800 kernel/exit.c:839
do_group_exit+0x149/0x420 kernel/exit.c:943
SYSC_exit_group kernel/exit.c:954
SyS_exit_group+0x1d/0x20 kernel/exit.c:952
entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203
Allocated by task 4115:
save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
save_stack+0x43/0xd0 mm/kasan/kasan.c:502
set_track mm/kasan/kasan.c:514
kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605
kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544
slab_post_alloc_hook mm/slab.h:432
slab_alloc_node mm/slub.c:2708
slab_alloc mm/slub.c:2716
kmem_cache_alloc+0x1af/0x250 mm/slub.c:2721
sk_prot_alloc+0x65/0x2a0 net/core/sock.c:1334
sk_alloc+0x105/0x1010 net/core/sock.c:1396
inet6_create+0x44d/0x1150 net/ipv6/af_inet6.c:183
__sock_create+0x4f6/0x880 net/socket.c:1199
sock_create net/socket.c:1239
SYSC_socket net/socket.c:1269
SyS_socket+0xf9/0x230 net/socket.c:1249
entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203
Memory state around the buggy address:
ffff880062d9ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff880062d9ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff880062da0000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
^
ffff880062da0080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
ffff880062da0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
==================================================================
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
676 lines
16 KiB
C
676 lines
16 KiB
C
/*
|
|
* IPv6 fragment reassembly for connection tracking
|
|
*
|
|
* Copyright (C)2004 USAGI/WIDE Project
|
|
*
|
|
* Author:
|
|
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
|
|
*
|
|
* Based on: net/ipv6/reassembly.c
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "IPv6-nf: " fmt
|
|
|
|
#include <linux/errno.h>
|
|
#include <linux/types.h>
|
|
#include <linux/string.h>
|
|
#include <linux/socket.h>
|
|
#include <linux/sockios.h>
|
|
#include <linux/jiffies.h>
|
|
#include <linux/net.h>
|
|
#include <linux/list.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/in6.h>
|
|
#include <linux/ipv6.h>
|
|
#include <linux/icmpv6.h>
|
|
#include <linux/random.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <net/sock.h>
|
|
#include <net/snmp.h>
|
|
#include <net/inet_frag.h>
|
|
|
|
#include <net/ipv6.h>
|
|
#include <net/protocol.h>
|
|
#include <net/transp_v6.h>
|
|
#include <net/rawv6.h>
|
|
#include <net/ndisc.h>
|
|
#include <net/addrconf.h>
|
|
#include <net/inet_ecn.h>
|
|
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
|
|
#include <linux/sysctl.h>
|
|
#include <linux/netfilter.h>
|
|
#include <linux/netfilter_ipv6.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
|
|
|
|
static const char nf_frags_cache_name[] = "nf-frags";
|
|
|
|
struct nf_ct_frag6_skb_cb
|
|
{
|
|
struct inet6_skb_parm h;
|
|
int offset;
|
|
};
|
|
|
|
#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb))
|
|
|
|
static struct inet_frags nf_frags;
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
static int zero;
|
|
|
|
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
|
|
{
|
|
.procname = "nf_conntrack_frag6_timeout",
|
|
.data = &init_net.nf_frag.frags.timeout,
|
|
.maxlen = sizeof(unsigned int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_jiffies,
|
|
},
|
|
{
|
|
.procname = "nf_conntrack_frag6_low_thresh",
|
|
.data = &init_net.nf_frag.frags.low_thresh,
|
|
.maxlen = sizeof(unsigned int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
.extra1 = &zero,
|
|
.extra2 = &init_net.nf_frag.frags.high_thresh
|
|
},
|
|
{
|
|
.procname = "nf_conntrack_frag6_high_thresh",
|
|
.data = &init_net.nf_frag.frags.high_thresh,
|
|
.maxlen = sizeof(unsigned int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
.extra1 = &init_net.nf_frag.frags.low_thresh
|
|
},
|
|
{ }
|
|
};
|
|
|
|
static int nf_ct_frag6_sysctl_register(struct net *net)
|
|
{
|
|
struct ctl_table *table;
|
|
struct ctl_table_header *hdr;
|
|
|
|
table = nf_ct_frag6_sysctl_table;
|
|
if (!net_eq(net, &init_net)) {
|
|
table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table),
|
|
GFP_KERNEL);
|
|
if (table == NULL)
|
|
goto err_alloc;
|
|
|
|
table[0].data = &net->nf_frag.frags.timeout;
|
|
table[1].data = &net->nf_frag.frags.low_thresh;
|
|
table[1].extra2 = &net->nf_frag.frags.high_thresh;
|
|
table[2].data = &net->nf_frag.frags.high_thresh;
|
|
table[2].extra1 = &net->nf_frag.frags.low_thresh;
|
|
table[2].extra2 = &init_net.nf_frag.frags.high_thresh;
|
|
}
|
|
|
|
hdr = register_net_sysctl(net, "net/netfilter", table);
|
|
if (hdr == NULL)
|
|
goto err_reg;
|
|
|
|
net->nf_frag.sysctl.frags_hdr = hdr;
|
|
return 0;
|
|
|
|
err_reg:
|
|
if (!net_eq(net, &init_net))
|
|
kfree(table);
|
|
err_alloc:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
|
|
{
|
|
struct ctl_table *table;
|
|
|
|
table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg;
|
|
unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr);
|
|
if (!net_eq(net, &init_net))
|
|
kfree(table);
|
|
}
|
|
|
|
#else
|
|
static int nf_ct_frag6_sysctl_register(struct net *net)
|
|
{
|
|
return 0;
|
|
}
|
|
static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
|
|
{
|
|
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
|
|
}
|
|
|
|
static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
|
|
const struct in6_addr *daddr)
|
|
{
|
|
net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
|
|
return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
|
|
(__force u32)id, nf_frags.rnd);
|
|
}
|
|
|
|
|
|
static unsigned int nf_hashfn(const struct inet_frag_queue *q)
|
|
{
|
|
const struct frag_queue *nq;
|
|
|
|
nq = container_of(q, struct frag_queue, q);
|
|
return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
|
|
}
|
|
|
|
static void nf_ct_frag6_expire(unsigned long data)
|
|
{
|
|
struct frag_queue *fq;
|
|
struct net *net;
|
|
|
|
fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
|
|
net = container_of(fq->q.net, struct net, nf_frag.frags);
|
|
|
|
ip6_expire_frag_queue(net, fq, &nf_frags);
|
|
}
|
|
|
|
/* Creation primitives. */
|
|
static inline struct frag_queue *fq_find(struct net *net, __be32 id,
|
|
u32 user, struct in6_addr *src,
|
|
struct in6_addr *dst, int iif, u8 ecn)
|
|
{
|
|
struct inet_frag_queue *q;
|
|
struct ip6_create_arg arg;
|
|
unsigned int hash;
|
|
|
|
arg.id = id;
|
|
arg.user = user;
|
|
arg.src = src;
|
|
arg.dst = dst;
|
|
arg.iif = iif;
|
|
arg.ecn = ecn;
|
|
|
|
local_bh_disable();
|
|
hash = nf_hash_frag(id, src, dst);
|
|
|
|
q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
|
|
local_bh_enable();
|
|
if (IS_ERR_OR_NULL(q)) {
|
|
inet_frag_maybe_warn_overflow(q, pr_fmt());
|
|
return NULL;
|
|
}
|
|
return container_of(q, struct frag_queue, q);
|
|
}
|
|
|
|
|
|
static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
|
|
const struct frag_hdr *fhdr, int nhoff)
|
|
{
|
|
struct sk_buff *prev, *next;
|
|
unsigned int payload_len;
|
|
int offset, end;
|
|
u8 ecn;
|
|
|
|
if (fq->q.flags & INET_FRAG_COMPLETE) {
|
|
pr_debug("Already completed\n");
|
|
goto err;
|
|
}
|
|
|
|
payload_len = ntohs(ipv6_hdr(skb)->payload_len);
|
|
|
|
offset = ntohs(fhdr->frag_off) & ~0x7;
|
|
end = offset + (payload_len -
|
|
((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
|
|
|
|
if ((unsigned int)end > IPV6_MAXPLEN) {
|
|
pr_debug("offset is too large.\n");
|
|
return -1;
|
|
}
|
|
|
|
ecn = ip6_frag_ecn(ipv6_hdr(skb));
|
|
|
|
if (skb->ip_summed == CHECKSUM_COMPLETE) {
|
|
const unsigned char *nh = skb_network_header(skb);
|
|
skb->csum = csum_sub(skb->csum,
|
|
csum_partial(nh, (u8 *)(fhdr + 1) - nh,
|
|
0));
|
|
}
|
|
|
|
/* Is this the final fragment? */
|
|
if (!(fhdr->frag_off & htons(IP6_MF))) {
|
|
/* If we already have some bits beyond end
|
|
* or have different end, the segment is corrupted.
|
|
*/
|
|
if (end < fq->q.len ||
|
|
((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) {
|
|
pr_debug("already received last fragment\n");
|
|
goto err;
|
|
}
|
|
fq->q.flags |= INET_FRAG_LAST_IN;
|
|
fq->q.len = end;
|
|
} else {
|
|
/* Check if the fragment is rounded to 8 bytes.
|
|
* Required by the RFC.
|
|
*/
|
|
if (end & 0x7) {
|
|
/* RFC2460 says always send parameter problem in
|
|
* this case. -DaveM
|
|
*/
|
|
pr_debug("end of fragment not rounded to 8 bytes.\n");
|
|
return -1;
|
|
}
|
|
if (end > fq->q.len) {
|
|
/* Some bits beyond end -> corruption. */
|
|
if (fq->q.flags & INET_FRAG_LAST_IN) {
|
|
pr_debug("last packet already reached.\n");
|
|
goto err;
|
|
}
|
|
fq->q.len = end;
|
|
}
|
|
}
|
|
|
|
if (end == offset)
|
|
goto err;
|
|
|
|
/* Point into the IP datagram 'data' part. */
|
|
if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
|
|
pr_debug("queue: message is too short.\n");
|
|
goto err;
|
|
}
|
|
if (pskb_trim_rcsum(skb, end - offset)) {
|
|
pr_debug("Can't trim\n");
|
|
goto err;
|
|
}
|
|
|
|
/* Find out which fragments are in front and at the back of us
|
|
* in the chain of fragments so far. We must know where to put
|
|
* this fragment, right?
|
|
*/
|
|
prev = fq->q.fragments_tail;
|
|
if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
|
|
next = NULL;
|
|
goto found;
|
|
}
|
|
prev = NULL;
|
|
for (next = fq->q.fragments; next != NULL; next = next->next) {
|
|
if (NFCT_FRAG6_CB(next)->offset >= offset)
|
|
break; /* bingo! */
|
|
prev = next;
|
|
}
|
|
|
|
found:
|
|
/* RFC5722, Section 4:
|
|
* When reassembling an IPv6 datagram, if
|
|
* one or more its constituent fragments is determined to be an
|
|
* overlapping fragment, the entire datagram (and any constituent
|
|
* fragments, including those not yet received) MUST be silently
|
|
* discarded.
|
|
*/
|
|
|
|
/* Check for overlap with preceding fragment. */
|
|
if (prev &&
|
|
(NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
|
|
goto discard_fq;
|
|
|
|
/* Look for overlap with succeeding segment. */
|
|
if (next && NFCT_FRAG6_CB(next)->offset < end)
|
|
goto discard_fq;
|
|
|
|
NFCT_FRAG6_CB(skb)->offset = offset;
|
|
|
|
/* Insert this fragment in the chain of fragments. */
|
|
skb->next = next;
|
|
if (!next)
|
|
fq->q.fragments_tail = skb;
|
|
if (prev)
|
|
prev->next = skb;
|
|
else
|
|
fq->q.fragments = skb;
|
|
|
|
if (skb->dev) {
|
|
fq->iif = skb->dev->ifindex;
|
|
skb->dev = NULL;
|
|
}
|
|
fq->q.stamp = skb->tstamp;
|
|
fq->q.meat += skb->len;
|
|
fq->ecn |= ecn;
|
|
if (payload_len > fq->q.max_size)
|
|
fq->q.max_size = payload_len;
|
|
add_frag_mem_limit(fq->q.net, skb->truesize);
|
|
|
|
/* The first fragment.
|
|
* nhoffset is obtained from the first fragment, of course.
|
|
*/
|
|
if (offset == 0) {
|
|
fq->nhoffset = nhoff;
|
|
fq->q.flags |= INET_FRAG_FIRST_IN;
|
|
}
|
|
|
|
return 0;
|
|
|
|
discard_fq:
|
|
inet_frag_kill(&fq->q, &nf_frags);
|
|
err:
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* Check if this packet is complete.
|
|
*
|
|
* It is called with locked fq, and caller must check that
|
|
* queue is eligible for reassembly i.e. it is not COMPLETE,
|
|
* the last and the first frames arrived and all the bits are here.
|
|
*
|
|
* returns true if *prev skb has been transformed into the reassembled
|
|
* skb, false otherwise.
|
|
*/
|
|
static bool
|
|
nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
|
|
{
|
|
struct sk_buff *fp, *head = fq->q.fragments;
|
|
int payload_len;
|
|
u8 ecn;
|
|
|
|
inet_frag_kill(&fq->q, &nf_frags);
|
|
|
|
WARN_ON(head == NULL);
|
|
WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
|
|
|
|
ecn = ip_frag_ecn_table[fq->ecn];
|
|
if (unlikely(ecn == 0xff))
|
|
return false;
|
|
|
|
/* Unfragmented part is taken from the first segment. */
|
|
payload_len = ((head->data - skb_network_header(head)) -
|
|
sizeof(struct ipv6hdr) + fq->q.len -
|
|
sizeof(struct frag_hdr));
|
|
if (payload_len > IPV6_MAXPLEN) {
|
|
net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
|
|
payload_len);
|
|
return false;
|
|
}
|
|
|
|
/* Head of list must not be cloned. */
|
|
if (skb_unclone(head, GFP_ATOMIC))
|
|
return false;
|
|
|
|
/* If the first fragment is fragmented itself, we split
|
|
* it to two chunks: the first with data and paged part
|
|
* and the second, holding only fragments. */
|
|
if (skb_has_frag_list(head)) {
|
|
struct sk_buff *clone;
|
|
int i, plen = 0;
|
|
|
|
clone = alloc_skb(0, GFP_ATOMIC);
|
|
if (clone == NULL)
|
|
return false;
|
|
|
|
clone->next = head->next;
|
|
head->next = clone;
|
|
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
|
|
skb_frag_list_init(head);
|
|
for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
|
|
plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
|
|
clone->len = clone->data_len = head->data_len - plen;
|
|
head->data_len -= clone->len;
|
|
head->len -= clone->len;
|
|
clone->csum = 0;
|
|
clone->ip_summed = head->ip_summed;
|
|
|
|
add_frag_mem_limit(fq->q.net, clone->truesize);
|
|
}
|
|
|
|
/* morph head into last received skb: prev.
|
|
*
|
|
* This allows callers of ipv6 conntrack defrag to continue
|
|
* to use the last skb(frag) passed into the reasm engine.
|
|
* The last skb frag 'silently' turns into the full reassembled skb.
|
|
*
|
|
* Since prev is also part of q->fragments we have to clone it first.
|
|
*/
|
|
if (head != prev) {
|
|
struct sk_buff *iter;
|
|
|
|
fp = skb_clone(prev, GFP_ATOMIC);
|
|
if (!fp)
|
|
return false;
|
|
|
|
fp->next = prev->next;
|
|
|
|
iter = head;
|
|
while (iter) {
|
|
if (iter->next == prev) {
|
|
iter->next = fp;
|
|
break;
|
|
}
|
|
iter = iter->next;
|
|
}
|
|
|
|
skb_morph(prev, head);
|
|
prev->next = head->next;
|
|
consume_skb(head);
|
|
head = prev;
|
|
}
|
|
|
|
/* We have to remove fragment header from datagram and to relocate
|
|
* header in order to calculate ICV correctly. */
|
|
skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
|
|
memmove(head->head + sizeof(struct frag_hdr), head->head,
|
|
(head->data - head->head) - sizeof(struct frag_hdr));
|
|
head->mac_header += sizeof(struct frag_hdr);
|
|
head->network_header += sizeof(struct frag_hdr);
|
|
|
|
skb_shinfo(head)->frag_list = head->next;
|
|
skb_reset_transport_header(head);
|
|
skb_push(head, head->data - skb_network_header(head));
|
|
|
|
for (fp = head->next; fp; fp = fp->next) {
|
|
head->data_len += fp->len;
|
|
head->len += fp->len;
|
|
if (head->ip_summed != fp->ip_summed)
|
|
head->ip_summed = CHECKSUM_NONE;
|
|
else if (head->ip_summed == CHECKSUM_COMPLETE)
|
|
head->csum = csum_add(head->csum, fp->csum);
|
|
head->truesize += fp->truesize;
|
|
}
|
|
sub_frag_mem_limit(fq->q.net, head->truesize);
|
|
|
|
head->ignore_df = 1;
|
|
head->next = NULL;
|
|
head->dev = dev;
|
|
head->tstamp = fq->q.stamp;
|
|
ipv6_hdr(head)->payload_len = htons(payload_len);
|
|
ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
|
|
IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
|
|
|
|
/* Yes, and fold redundant checksum back. 8) */
|
|
if (head->ip_summed == CHECKSUM_COMPLETE)
|
|
head->csum = csum_partial(skb_network_header(head),
|
|
skb_network_header_len(head),
|
|
head->csum);
|
|
|
|
fq->q.fragments = NULL;
|
|
fq->q.fragments_tail = NULL;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* find the header just before Fragment Header.
|
|
*
|
|
* if success return 0 and set ...
|
|
* (*prevhdrp): the value of "Next Header Field" in the header
|
|
* just before Fragment Header.
|
|
* (*prevhoff): the offset of "Next Header Field" in the header
|
|
* just before Fragment Header.
|
|
* (*fhoff) : the offset of Fragment Header.
|
|
*
|
|
* Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
|
|
*
|
|
*/
|
|
static int
|
|
find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
|
|
{
|
|
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
|
|
const int netoff = skb_network_offset(skb);
|
|
u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr);
|
|
int start = netoff + sizeof(struct ipv6hdr);
|
|
int len = skb->len - start;
|
|
u8 prevhdr = NEXTHDR_IPV6;
|
|
|
|
while (nexthdr != NEXTHDR_FRAGMENT) {
|
|
struct ipv6_opt_hdr hdr;
|
|
int hdrlen;
|
|
|
|
if (!ipv6_ext_hdr(nexthdr)) {
|
|
return -1;
|
|
}
|
|
if (nexthdr == NEXTHDR_NONE) {
|
|
pr_debug("next header is none\n");
|
|
return -1;
|
|
}
|
|
if (len < (int)sizeof(struct ipv6_opt_hdr)) {
|
|
pr_debug("too short\n");
|
|
return -1;
|
|
}
|
|
if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
|
|
BUG();
|
|
if (nexthdr == NEXTHDR_AUTH)
|
|
hdrlen = (hdr.hdrlen+2)<<2;
|
|
else
|
|
hdrlen = ipv6_optlen(&hdr);
|
|
|
|
prevhdr = nexthdr;
|
|
prev_nhoff = start;
|
|
|
|
nexthdr = hdr.nexthdr;
|
|
len -= hdrlen;
|
|
start += hdrlen;
|
|
}
|
|
|
|
if (len < 0)
|
|
return -1;
|
|
|
|
*prevhdrp = prevhdr;
|
|
*prevhoff = prev_nhoff;
|
|
*fhoff = start;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
|
|
{
|
|
struct net_device *dev = skb->dev;
|
|
int fhoff, nhoff, ret;
|
|
struct frag_hdr *fhdr;
|
|
struct frag_queue *fq;
|
|
struct ipv6hdr *hdr;
|
|
u8 prevhdr;
|
|
|
|
/* Jumbo payload inhibits frag. header */
|
|
if (ipv6_hdr(skb)->payload_len == 0) {
|
|
pr_debug("payload len = 0\n");
|
|
return 0;
|
|
}
|
|
|
|
if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
|
|
return 0;
|
|
|
|
if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr)))
|
|
return -ENOMEM;
|
|
|
|
skb_set_transport_header(skb, fhoff);
|
|
hdr = ipv6_hdr(skb);
|
|
fhdr = (struct frag_hdr *)skb_transport_header(skb);
|
|
|
|
skb_orphan(skb);
|
|
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
|
|
skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
|
|
if (fq == NULL) {
|
|
pr_debug("Can't find and can't create new queue\n");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
spin_lock_bh(&fq->q.lock);
|
|
|
|
if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) {
|
|
ret = -EINVAL;
|
|
goto out_unlock;
|
|
}
|
|
|
|
/* after queue has assumed skb ownership, only 0 or -EINPROGRESS
|
|
* must be returned.
|
|
*/
|
|
ret = -EINPROGRESS;
|
|
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
|
|
fq->q.meat == fq->q.len &&
|
|
nf_ct_frag6_reasm(fq, skb, dev))
|
|
ret = 0;
|
|
|
|
out_unlock:
|
|
spin_unlock_bh(&fq->q.lock);
|
|
inet_frag_put(&fq->q, &nf_frags);
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
|
|
|
|
static int nf_ct_net_init(struct net *net)
|
|
{
|
|
int res;
|
|
|
|
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
|
|
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
|
|
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
|
|
res = inet_frags_init_net(&net->nf_frag.frags);
|
|
if (res)
|
|
return res;
|
|
res = nf_ct_frag6_sysctl_register(net);
|
|
if (res)
|
|
inet_frags_uninit_net(&net->nf_frag.frags);
|
|
return res;
|
|
}
|
|
|
|
static void nf_ct_net_exit(struct net *net)
|
|
{
|
|
nf_ct_frags6_sysctl_unregister(net);
|
|
inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
|
|
}
|
|
|
|
static struct pernet_operations nf_ct_net_ops = {
|
|
.init = nf_ct_net_init,
|
|
.exit = nf_ct_net_exit,
|
|
};
|
|
|
|
int nf_ct_frag6_init(void)
|
|
{
|
|
int ret = 0;
|
|
|
|
nf_frags.hashfn = nf_hashfn;
|
|
nf_frags.constructor = ip6_frag_init;
|
|
nf_frags.destructor = NULL;
|
|
nf_frags.qsize = sizeof(struct frag_queue);
|
|
nf_frags.match = ip6_frag_match;
|
|
nf_frags.frag_expire = nf_ct_frag6_expire;
|
|
nf_frags.frags_cache_name = nf_frags_cache_name;
|
|
ret = inet_frags_init(&nf_frags);
|
|
if (ret)
|
|
goto out;
|
|
ret = register_pernet_subsys(&nf_ct_net_ops);
|
|
if (ret)
|
|
inet_frags_fini(&nf_frags);
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
void nf_ct_frag6_cleanup(void)
|
|
{
|
|
unregister_pernet_subsys(&nf_ct_net_ops);
|
|
inet_frags_fini(&nf_frags);
|
|
}
|