linux-stable/net/core/dst.c
David Miller 3a2232e92e ipv6: Move dst->from into struct rt6_info.
The dst->from value is only used by ipv6 routes to track where
a route "came from".

Any time we clone or copy a core ipv6 route in the ipv6 routing
tables, we have the copy/clone's ->from point to the base route.

This is used to handle route expiration properly.

Only ipv6 uses this mechanism, and only ipv6 code references
it.  So it is safe to move it into rt6_info.

Signed-off-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
2017-11-30 09:54:26 -05:00

344 lines
8.3 KiB
C

/*
* net/core/dst.c Protocol independent destination cache.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/workqueue.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/types.h>
#include <net/net_namespace.h>
#include <linux/sched.h>
#include <linux/prefetch.h>
#include <net/lwtunnel.h>
#include <net/xfrm.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
/*
* Theory of operations:
* 1) We use a list, protected by a spinlock, to add
* new entries from both BH and non-BH context.
* 2) In order to keep spinlock held for a small delay,
* we use a second list where are stored long lived
* entries, that are handled by the garbage collect thread
* fired by a workqueue.
* 3) This list is guarded by a mutex,
* so that the gc_task and dst_dev_event() can be synchronized.
*/
/*
* We want to keep lock & list close together
* to dirty as few cache lines as possible in __dst_free().
* As this is not a very strong hint, we dont force an alignment on SMP.
*/
int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
EXPORT_SYMBOL(dst_discard_out);
const struct dst_metrics dst_default_metrics = {
/* This initializer is needed to force linker to place this variable
* into const section. Otherwise it might end into bss section.
* We really want to avoid false sharing on this variable, and catch
* any writes on it.
*/
.refcnt = REFCOUNT_INIT(1),
};
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
struct net_device *dev, int initial_ref, int initial_obsolete,
unsigned short flags)
{
dst->dev = dev;
if (dev)
dev_hold(dev);
dst->ops = ops;
dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
dst->path = dst;
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
#endif
dst->input = dst_discard;
dst->output = dst_discard_out;
dst->error = 0;
dst->obsolete = initial_obsolete;
dst->header_len = 0;
dst->trailer_len = 0;
#ifdef CONFIG_IP_ROUTE_CLASSID
dst->tclassid = 0;
#endif
dst->lwtstate = NULL;
atomic_set(&dst->__refcnt, initial_ref);
dst->__use = 0;
dst->lastuse = jiffies;
dst->flags = flags;
dst->next = NULL;
if (!(flags & DST_NOCOUNT))
dst_entries_add(ops, 1);
}
EXPORT_SYMBOL(dst_init);
void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
int initial_ref, int initial_obsolete, unsigned short flags)
{
struct dst_entry *dst;
if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
if (ops->gc(ops))
return NULL;
}
dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
if (!dst)
return NULL;
dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags);
return dst;
}
EXPORT_SYMBOL(dst_alloc);
struct dst_entry *dst_destroy(struct dst_entry * dst)
{
struct dst_entry *child = NULL;
smp_rmb();
#ifdef CONFIG_XFRM
if (dst->xfrm) {
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
child = xdst->child;
}
#endif
if (!(dst->flags & DST_NOCOUNT))
dst_entries_add(dst->ops, -1);
if (dst->ops->destroy)
dst->ops->destroy(dst);
if (dst->dev)
dev_put(dst->dev);
lwtstate_put(dst->lwtstate);
if (dst->flags & DST_METADATA)
metadata_dst_free((struct metadata_dst *)dst);
else
kmem_cache_free(dst->ops->kmem_cachep, dst);
dst = child;
if (dst)
dst_release_immediate(dst);
return NULL;
}
EXPORT_SYMBOL(dst_destroy);
static void dst_destroy_rcu(struct rcu_head *head)
{
struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
dst = dst_destroy(dst);
}
/* Operations to mark dst as DEAD and clean up the net device referenced
* by dst:
* 1. put the dst under loopback interface and discard all tx/rx packets
* on this route.
* 2. release the net_device
* This function should be called when removing routes from the fib tree
* in preparation for a NETDEV_DOWN/NETDEV_UNREGISTER event and also to
* make the next dst_ops->check() fail.
*/
void dst_dev_put(struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
dst->obsolete = DST_OBSOLETE_DEAD;
if (dst->ops->ifdown)
dst->ops->ifdown(dst, dev, true);
dst->input = dst_discard;
dst->output = dst_discard_out;
dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
dev_put(dev);
}
EXPORT_SYMBOL(dst_dev_put);
void dst_release(struct dst_entry *dst)
{
if (dst) {
int newrefcnt;
newrefcnt = atomic_dec_return(&dst->__refcnt);
if (unlikely(newrefcnt < 0))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
if (!newrefcnt)
call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
EXPORT_SYMBOL(dst_release);
void dst_release_immediate(struct dst_entry *dst)
{
if (dst) {
int newrefcnt;
newrefcnt = atomic_dec_return(&dst->__refcnt);
if (unlikely(newrefcnt < 0))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
if (!newrefcnt)
dst_destroy(dst);
}
}
EXPORT_SYMBOL(dst_release_immediate);
u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
{
struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC);
if (p) {
struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
unsigned long prev, new;
refcount_set(&p->refcnt, 1);
memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
new = (unsigned long) p;
prev = cmpxchg(&dst->_metrics, old, new);
if (prev != old) {
kfree(p);
p = (struct dst_metrics *)__DST_METRICS_PTR(prev);
if (prev & DST_METRICS_READ_ONLY)
p = NULL;
} else if (prev & DST_METRICS_REFCOUNTED) {
if (refcount_dec_and_test(&old_p->refcnt))
kfree(old_p);
}
}
BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0);
return (u32 *)p;
}
EXPORT_SYMBOL(dst_cow_metrics_generic);
/* Caller asserts that dst_metrics_read_only(dst) is false. */
void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
{
unsigned long prev, new;
new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY;
prev = cmpxchg(&dst->_metrics, old, new);
if (prev == old)
kfree(__DST_METRICS_PTR(old));
}
EXPORT_SYMBOL(__dst_destroy_metrics_generic);
static struct dst_ops md_dst_ops = {
.family = AF_UNSPEC,
};
static int dst_md_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
WARN_ONCE(1, "Attempting to call output on metadata dst\n");
kfree_skb(skb);
return 0;
}
static int dst_md_discard(struct sk_buff *skb)
{
WARN_ONCE(1, "Attempting to call input on metadata dst\n");
kfree_skb(skb);
return 0;
}
static void __metadata_dst_init(struct metadata_dst *md_dst,
enum metadata_type type, u8 optslen)
{
struct dst_entry *dst;
dst = &md_dst->dst;
dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
DST_METADATA | DST_NOCOUNT);
dst->input = dst_md_discard;
dst->output = dst_md_discard_out;
memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
md_dst->type = type;
}
struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type,
gfp_t flags)
{
struct metadata_dst *md_dst;
md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
if (!md_dst)
return NULL;
__metadata_dst_init(md_dst, type, optslen);
return md_dst;
}
EXPORT_SYMBOL_GPL(metadata_dst_alloc);
void metadata_dst_free(struct metadata_dst *md_dst)
{
#ifdef CONFIG_DST_CACHE
if (md_dst->type == METADATA_IP_TUNNEL)
dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
#endif
kfree(md_dst);
}
struct metadata_dst __percpu *
metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)
{
int cpu;
struct metadata_dst __percpu *md_dst;
md_dst = __alloc_percpu_gfp(sizeof(struct metadata_dst) + optslen,
__alignof__(struct metadata_dst), flags);
if (!md_dst)
return NULL;
for_each_possible_cpu(cpu)
__metadata_dst_init(per_cpu_ptr(md_dst, cpu), type, optslen);
return md_dst;
}
EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
{
#ifdef CONFIG_DST_CACHE
int cpu;
for_each_possible_cpu(cpu) {
struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
if (one_md_dst->type == METADATA_IP_TUNNEL)
dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
}
#endif
free_percpu(md_dst);
}
EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);