Merge branch 'ipv4-Enable-support-for-IPv6-gateway-with-IPv4-routes'

David Ahern says:

====================
ipv4: Enable support for IPv6 gateway with IPv4 routes

Last set of three with the end goal of enabling IPv6 gateways with IPv4
routes.

This set adds fib6_nh_init and release to the IPv6 stubs, and adds neighbor
helpers that IPv4 code invokes to resolve an IPv6 address. When using
an IPv6 neighbor entry the hh_cache is bypassed as it contains the wrong
ethernet header for an IPv4 packet.

The nh_common nhc_has_gw was a temporary field used to convert existing
code from fib{6}_nh to fib_nh_common. That field is now converted to
nhc_gw_family to differentiate the address family of the gateway entry
as opposed to the address family of the container of fib_nh_common.

Existing code for rtable and fib_config is refactored to prepare
for a v6 address and then support is added. From there various
miscellaneous functions are updated to handle a v6 gateway - from
validating the v6 address to lookups in bpf code to verifying the
nexthop state.

Offload drivers - mlxsw and rocker - are modified to detect the v6
gateway and reject the route as 'unsupported'. e.g.,

    $ ip ro add 172.16.101.0/24 via inet6 fe80::202:ff:fe00:b dev swp1s0
    Error: mlxsw_spectrum: IPv6 gateway with IPv4 route is not supported.

This can be removed in time once support is added to each.

With the infrastructure changes in place, patch 17 enables it by adding
support for RTA_VIA to IPv4. RTA_VIA can be used for IPv4 addresses as
well. Only one of RTA_VIA and RTA_GATEWAY can be passed in a request.

Patch 18 adds a few test cases to fib_tests.sh.

v2
- comments from Ido - fixed typos as noted and updated messages
- add commit message to patch 1
- In patch 9, ipv4: Add fib_check_nh_v6_gw, moved the call to
  fib6_nh_release under the 'if (!err)' check as the intention is
  that release should not be called if init fails.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-04-08 15:22:41 -07:00
commit 0ed8c3dc41
32 changed files with 700 additions and 261 deletions

View file

@ -351,7 +351,7 @@ static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
if (family == AF_INET) {
rt = container_of(dst, struct rtable, dst);
return rt->rt_uses_gateway;
return rt->rt_gw_family == AF_INET;
}
rt6 = container_of(dst, struct rt6_info, dst);

View file

@ -1407,7 +1407,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
if (neigh->nud_state & NUD_VALID) {
nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
" is %pM, Gateway is 0x%08X \n", dst_ip,
neigh->ha, ntohl(rt->rt_gateway));
neigh->ha, ntohl(rt->rt_gw4));
if (arpindex >= 0) {
if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) {

View file

@ -116,11 +116,15 @@ static struct net_device * __init ipddp_init(void)
*/
static netdev_tx_t ipddp_xmit(struct sk_buff *skb, struct net_device *dev)
{
__be32 paddr = skb_rtable(skb)->rt_gateway;
struct rtable *rtable = skb_rtable(skb);
__be32 paddr = 0;
struct ddpehdr *ddp;
struct ipddp_route *rt;
struct atalk_addr *our_addr;
if (rtable->rt_gw_family == AF_INET)
paddr = rtable->rt_gw4;
spin_lock(&ipddp_route_lock);
/*

View file

@ -70,7 +70,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
if (ret)
return ret;
if (mlx5_lag_is_multipath(mdev) && !rt->rt_gateway)
if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET)
return -ENETUNREACH;
#else
return -EOPNOTSUPP;

View file

@ -4915,7 +4915,7 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
{
/* RTF_CACHE routes are ignored */
return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_has_gw;
return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family;
}
static struct fib6_info *
@ -5055,7 +5055,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
const struct fib6_info *rt)
{
return rt->fib6_nh.fib_nh_has_gw ||
return rt->fib6_nh.fib_nh_gw_family ||
mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
}
@ -6092,6 +6092,14 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
return notifier_from_errno(-EINVAL);
}
if (info->family == AF_INET) {
struct fib_entry_notifier_info *fen_info = ptr;
if (fen_info->fi->fib_nh_is_v6) {
NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
return notifier_from_errno(-EINVAL);
}
}
break;
}

View file

@ -316,7 +316,11 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
dev = rt->dst.dev;
*saddrp = fl4.saddr;
*daddrp = rt->rt_gateway;
if (rt->rt_gw_family == AF_INET)
*daddrp = rt->rt_gw4;
/* can not offload if route has an IPv6 gateway */
else if (rt->rt_gw_family == AF_INET6)
dev = NULL;
out:
ip_rt_put(rt);

View file

@ -2207,6 +2207,15 @@ static int rocker_router_fib_event(struct notifier_block *nb,
switch (event) {
case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL:
if (info->family == AF_INET) {
struct fib_entry_notifier_info *fen_info = ptr;
if (fen_info->fi->fib_nh_is_v6) {
NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
return notifier_from_errno(-EINVAL);
}
}
memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
/* Take referece on fib_info to prevent it from being
* freed while work is queued. Release it afterwards.

View file

@ -370,7 +370,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb);
ret = neigh_output(neigh, skb, false);
rcu_read_unlock_bh();
return ret;
}
@ -549,7 +549,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
u32 nexthop;
bool is_v6gw = false;
int ret = -EINVAL;
nf_reset(skb);
@ -572,13 +572,11 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
rcu_read_lock_bh();
nexthop = (__force u32)rt_nexthop(rt, ip_hdr(skb)->daddr);
neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb);
/* if crossing protocols, can not use the cached header */
ret = neigh_output(neigh, skb, is_v6gw);
rcu_read_unlock_bh();
return ret;
}

View file

@ -69,7 +69,7 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
return !(f6i->fib6_flags & (RTF_ADDRCONF|RTF_DYNAMIC)) &&
f6i->fib6_nh.fib_nh_has_gw;
f6i->fib6_nh.fib_nh_gw_family;
}
void ip6_route_input(struct sk_buff *skb);

View file

@ -32,10 +32,14 @@ struct fib_config {
u8 fc_protocol;
u8 fc_scope;
u8 fc_type;
/* 3 bytes unused */
u8 fc_gw_family;
/* 2 bytes unused */
u32 fc_table;
__be32 fc_dst;
__be32 fc_gw;
union {
__be32 fc_gw4;
struct in6_addr fc_gw6;
};
int fc_oif;
u32 fc_flags;
u32 fc_priority;
@ -83,8 +87,8 @@ struct fib_nh_common {
struct lwtunnel_state *nhc_lwtstate;
unsigned char nhc_scope;
u8 nhc_family;
u8 nhc_has_gw:1,
unused:7;
u8 nhc_gw_family;
union {
__be32 ipv4;
struct in6_addr ipv6;
@ -112,8 +116,7 @@ struct fib_nh {
#define fib_nh_flags nh_common.nhc_flags
#define fib_nh_lws nh_common.nhc_lwtstate
#define fib_nh_scope nh_common.nhc_scope
#define fib_nh_family nh_common.nhc_family
#define fib_nh_has_gw nh_common.nhc_has_gw
#define fib_nh_gw_family nh_common.nhc_gw_family
#define fib_nh_gw4 nh_common.nhc_gw.ipv4
#define fib_nh_gw6 nh_common.nhc_gw.ipv6
#define fib_nh_weight nh_common.nhc_weight
@ -144,6 +147,7 @@ struct fib_info {
#define fib_rtt fib_metrics->metrics[RTAX_RTT-1]
#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1]
int fib_nhs;
bool fib_nh_is_v6;
struct rcu_head rcu;
struct fib_nh fib_nh[0];
#define fib_dev fib_nh[0].fib_nh_dev
@ -397,6 +401,8 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
/* Exported by fib_frontend.c */
extern const struct nla_policy rtm_ipv4_policy[];
void ip_fib_init(void);
int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
struct netlink_ext_ack *extack);
__be32 fib_compute_spec_dst(struct sk_buff *skb);
bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev);
int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,

View file

@ -12,6 +12,8 @@
/* structs from net/ip6_fib.h */
struct fib6_info;
struct fib6_nh;
struct fib6_config;
/* This is ugly, ideally these symbols should be built
* into the core kernel.
@ -40,6 +42,10 @@ struct ipv6_stub {
u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr);
int (*fib6_nh_init)(struct net *net, struct fib6_nh *fib6_nh,
struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
void (*udpv6_encap_enable)(void);
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,

View file

@ -2,6 +2,8 @@
#ifndef _NDISC_H
#define _NDISC_H
#include <net/ipv6_stubs.h>
/*
* ICMP codes for neighbour discovery messages
*/
@ -379,6 +381,14 @@ static inline struct neighbour *__ipv6_neigh_lookup_noref(struct net_device *dev
return ___neigh_lookup_noref(&nd_tbl, neigh_key_eq128, ndisc_hashfn, pkey, dev);
}
static inline
struct neighbour *__ipv6_neigh_lookup_noref_stub(struct net_device *dev,
const void *pkey)
{
return ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
ndisc_hashfn, pkey, dev);
}
static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, const void *pkey)
{
struct neighbour *n;
@ -409,6 +419,36 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev,
rcu_read_unlock_bh();
}
static inline void __ipv6_confirm_neigh_stub(struct net_device *dev,
const void *pkey)
{
struct neighbour *n;
rcu_read_lock_bh();
n = __ipv6_neigh_lookup_noref_stub(dev, pkey);
if (n) {
unsigned long now = jiffies;
/* avoid dirtying neighbour */
if (n->confirmed != now)
n->confirmed = now;
}
rcu_read_unlock_bh();
}
/* uses ipv6_stub and is meant for use outside of IPv6 core */
static inline struct neighbour *ip_neigh_gw6(struct net_device *dev,
const void *addr)
{
struct neighbour *neigh;
neigh = __ipv6_neigh_lookup_noref_stub(dev, addr);
if (unlikely(!neigh))
neigh = __neigh_create(ipv6_stub->nd_tbl, addr, dev, false);
return neigh;
}
int ndisc_init(void);
int ndisc_late_init(void);

View file

@ -498,11 +498,12 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb
return dev_queue_xmit(skb);
}
static inline int neigh_output(struct neighbour *n, struct sk_buff *skb)
static inline int neigh_output(struct neighbour *n, struct sk_buff *skb,
bool skip_cache)
{
const struct hh_cache *hh = &n->hh;
if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache)
return neigh_hh_output(hh, skb);
else
return n->output(n, skb);

View file

@ -29,6 +29,8 @@
#include <net/flow.h>
#include <net/inet_sock.h>
#include <net/ip_fib.h>
#include <net/arp.h>
#include <net/ndisc.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/rcupdate.h>
@ -55,12 +57,15 @@ struct rtable {
unsigned int rt_flags;
__u16 rt_type;
__u8 rt_is_input;
__u8 rt_uses_gateway;
u8 rt_gw_family;
int rt_iif;
/* Info on neighbour */
__be32 rt_gateway;
union {
__be32 rt_gw4;
struct in6_addr rt_gw6;
};
/* Miscellaneous cached information */
u32 rt_mtu_locked:1,
@ -82,8 +87,8 @@ static inline bool rt_is_output_route(const struct rtable *rt)
static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr)
{
if (rt->rt_gateway)
return rt->rt_gateway;
if (rt->rt_gw_family == AF_INET)
return rt->rt_gw4;
return daddr;
}
@ -347,4 +352,34 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
return hoplimit;
}
static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
__be32 daddr)
{
struct neighbour *neigh;
neigh = __ipv4_neigh_lookup_noref(dev, daddr);
if (unlikely(!neigh))
neigh = __neigh_create(&arp_tbl, &daddr, dev, false);
return neigh;
}
static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt,
struct sk_buff *skb,
bool *is_v6gw)
{
struct net_device *dev = rt->dst.dev;
struct neighbour *neigh;
if (likely(rt->rt_gw_family == AF_INET)) {
neigh = ip_neigh_gw4(dev, rt->rt_gw4);
} else if (rt->rt_gw_family == AF_INET6) {
neigh = ip_neigh_gw6(dev, &rt->rt_gw6);
*is_v6gw = true;
} else {
neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr);
}
return neigh;
}
#endif /* _ROUTE_H */

View file

@ -69,13 +69,13 @@ TRACE_EVENT(fib_table_lookup,
__assign_str(name, dev ? dev->name : "-");
if (nhc) {
if (nhc->nhc_family == AF_INET) {
if (nhc->nhc_gw_family == AF_INET) {
p32 = (__be32 *) __entry->gw4;
*p32 = nhc->nhc_gw.ipv4;
in6 = (struct in6_addr *)__entry->gw6;
*in6 = in6_zero;
} else if (nhc->nhc_family == AF_INET6) {
} else if (nhc->nhc_gw_family == AF_INET6) {
p32 = (__be32 *) __entry->gw4;
*p32 = 0;

View file

@ -345,8 +345,8 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
}
rt = (struct rtable *) dst;
if (rt->rt_gateway)
daddr = &rt->rt_gateway;
if (rt->rt_gw_family == AF_INET)
daddr = &rt->rt_gw4;
else
daddr = &ip_hdr(skb)->daddr;
n = dst_neigh_lookup(dst, daddr);

View file

@ -4639,15 +4639,26 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
return BPF_FIB_LKUP_RET_UNSUPP_LWT;
dev = nhc->nhc_dev;
if (nhc->nhc_has_gw)
params->ipv4_dst = nhc->nhc_gw.ipv4;
params->rt_metric = res.fi->fib_priority;
/* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here
*/
neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
if (likely(nhc->nhc_gw_family != AF_INET6)) {
if (nhc->nhc_gw_family)
params->ipv4_dst = nhc->nhc_gw.ipv4;
neigh = __ipv4_neigh_lookup_noref(dev,
(__force u32)params->ipv4_dst);
} else {
struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
params->family = AF_INET6;
*dst = nhc->nhc_gw.ipv6;
neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
}
if (!neigh)
return BPF_FIB_LKUP_RET_NO_NEIGH;
@ -4752,18 +4763,16 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (f6i->fib6_nh.fib_nh_lws)
return BPF_FIB_LKUP_RET_UNSUPP_LWT;
if (f6i->fib6_nh.fib_nh_has_gw)
if (f6i->fib6_nh.fib_nh_gw_family)
*dst = f6i->fib6_nh.fib_nh_gw6;
dev = f6i->fib6_nh.fib_nh_dev;
params->rt_metric = f6i->fib6_metric;
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
* not needed here. Can not use __ipv6_neigh_lookup_noref here
* because we need to get nd_tbl via the stub
* not needed here.
*/
neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
ndisc_hashfn, dst, dev);
neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
if (!neigh)
return BPF_FIB_LKUP_RET_NO_NEIGH;

View file

@ -558,7 +558,8 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
if (rt->rt_gateway.sa_family == AF_INET && addr) {
unsigned int addr_type;
cfg->fc_gw = addr;
cfg->fc_gw4 = addr;
cfg->fc_gw_family = AF_INET;
addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
if (rt->rt_flags & RTF_GATEWAY &&
addr_type == RTN_UNICAST)
@ -568,7 +569,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
if (cmd == SIOCDELRT)
return 0;
if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw_family)
return -EINVAL;
if (cfg->fc_scope == RT_SCOPE_NOWHERE)
@ -664,10 +665,55 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_DPORT] = { .type = NLA_U16 },
};
int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
struct rtvia *via;
int alen;
if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) {
NL_SET_ERR_MSG(extack, "Invalid attribute length for RTA_VIA");
return -EINVAL;
}
via = nla_data(nla);
alen = nla_len(nla) - offsetof(struct rtvia, rtvia_addr);
switch (via->rtvia_family) {
case AF_INET:
if (alen != sizeof(__be32)) {
NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_VIA");
return -EINVAL;
}
cfg->fc_gw_family = AF_INET;
cfg->fc_gw4 = *((__be32 *)via->rtvia_addr);
break;
case AF_INET6:
#ifdef CONFIG_IPV6
if (alen != sizeof(struct in6_addr)) {
NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA");
return -EINVAL;
}
cfg->fc_gw_family = AF_INET6;
cfg->fc_gw6 = *((struct in6_addr *)via->rtvia_addr);
#else
NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
return -EINVAL;
#endif
break;
default:
NL_SET_ERR_MSG(extack, "Unsupported address family in RTA_VIA");
return -EINVAL;
}
return 0;
}
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
struct nlmsghdr *nlh, struct fib_config *cfg,
struct netlink_ext_ack *extack)
{
bool has_gw = false, has_via = false;
struct nlattr *attr;
int err, remaining;
struct rtmsg *rtm;
@ -708,12 +754,16 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
cfg->fc_oif = nla_get_u32(attr);
break;
case RTA_GATEWAY:
cfg->fc_gw = nla_get_be32(attr);
has_gw = true;
cfg->fc_gw_family = AF_INET;
cfg->fc_gw4 = nla_get_be32(attr);
break;
case RTA_VIA:
NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute");
err = -EINVAL;
goto errout;
has_via = true;
err = fib_gw_from_via(cfg, attr, extack);
if (err)
goto errout;
break;
case RTA_PRIORITY:
cfg->fc_priority = nla_get_u32(attr);
break;
@ -752,6 +802,12 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
}
}
if (has_gw && has_via) {
NL_SET_ERR_MSG(extack,
"Nexthop configuration can not contain both GATEWAY and VIA");
goto errout;
}
return 0;
errout:
return err;

View file

@ -41,6 +41,7 @@
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
#include <net/netlink.h>
#include <net/nexthop.h>
#include <net/lwtunnel.h>
@ -276,7 +277,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
for_nexthops(fi) {
if (nh->fib_nh_oif != onh->fib_nh_oif ||
nh->fib_nh_gw4 != onh->fib_nh_gw4 ||
nh->fib_nh_gw_family != onh->fib_nh_gw_family ||
nh->fib_nh_scope != onh->fib_nh_scope ||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
nh->fib_nh_weight != onh->fib_nh_weight ||
@ -287,6 +288,15 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) ||
((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK))
return -1;
if (nh->fib_nh_gw_family == AF_INET &&
nh->fib_nh_gw4 != onh->fib_nh_gw4)
return -1;
if (nh->fib_nh_gw_family == AF_INET6 &&
ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6))
return -1;
onh++;
} endfor_nexthops(fi);
return 0;
@ -447,10 +457,18 @@ static int fib_detect_death(struct fib_info *fi, int order,
struct fib_info **last_resort, int *last_idx,
int dflt)
{
const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
struct neighbour *n;
int state = NUD_NONE;
n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].fib_nh_gw4, fi->fib_dev);
if (likely(nhc->nhc_gw_family == AF_INET))
n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev);
else if (nhc->nhc_gw_family == AF_INET6)
n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6,
nhc->nhc_dev);
else
n = NULL;
if (n) {
state = n->nud_state;
neigh_release(n);
@ -511,10 +529,12 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
goto init_failure;
nh->fib_nh_oif = cfg->fc_oif;
if (cfg->fc_gw) {
nh->fib_nh_gw4 = cfg->fc_gw;
nh->fib_nh_has_gw = 1;
}
nh->fib_nh_gw_family = cfg->fc_gw_family;
if (cfg->fc_gw_family == AF_INET)
nh->fib_nh_gw4 = cfg->fc_gw4;
else if (cfg->fc_gw_family == AF_INET6)
nh->fib_nh_gw6 = cfg->fc_gw6;
nh->fib_nh_flags = cfg->fc_flags;
#ifdef CONFIG_IP_ROUTE_CLASSID
@ -586,11 +606,23 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
attrlen = rtnh_attrlen(rtnh);
if (attrlen > 0) {
struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
if (nla)
fib_cfg.fc_gw = nla_get_in_addr(nla);
nlav = nla_find(attrs, attrlen, RTA_VIA);
if (nla && nlav) {
NL_SET_ERR_MSG(extack,
"Nexthop configuration can not contain both GATEWAY and VIA");
return -EINVAL;
}
if (nla) {
fib_cfg.fc_gw_family = AF_INET;
fib_cfg.fc_gw4 = nla_get_in_addr(nla);
} else if (nlav) {
ret = fib_gw_from_via(&fib_cfg, nlav, extack);
if (ret)
goto errout;
}
nla = nla_find(attrs, attrlen, RTA_FLOW);
if (nla)
@ -616,10 +648,16 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
"Nexthop device index does not match RTA_OIF");
goto errout;
}
if (cfg->fc_gw && fi->fib_nh->fib_nh_gw4 != cfg->fc_gw) {
NL_SET_ERR_MSG(extack,
"Nexthop gateway does not match RTA_GATEWAY");
goto errout;
if (cfg->fc_gw_family) {
if (cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family ||
(cfg->fc_gw_family == AF_INET &&
fi->fib_nh->fib_nh_gw4 != cfg->fc_gw4) ||
(cfg->fc_gw_family == AF_INET6 &&
ipv6_addr_cmp(&fi->fib_nh->fib_nh_gw6, &cfg->fc_gw6))) {
NL_SET_ERR_MSG(extack,
"Nexthop gateway does not match RTA_GATEWAY or RTA_VIA");
goto errout;
}
}
#ifdef CONFIG_IP_ROUTE_CLASSID
if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
@ -719,7 +757,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
return 1;
if (cfg->fc_oif || cfg->fc_gw) {
if (cfg->fc_oif || cfg->fc_gw_family) {
if (cfg->fc_encap) {
if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
fi->fib_nh, cfg, extack))
@ -730,10 +768,20 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
cfg->fc_flow != fi->fib_nh->nh_tclassid)
return 1;
#endif
if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->fib_nh_oif) &&
(!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->fib_nh_gw4))
return 0;
return 1;
if ((cfg->fc_oif && cfg->fc_oif != fi->fib_nh->fib_nh_oif) ||
(cfg->fc_gw_family &&
cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family))
return 1;
if (cfg->fc_gw_family == AF_INET &&
cfg->fc_gw4 != fi->fib_nh->fib_nh_gw4)
return 1;
if (cfg->fc_gw_family == AF_INET6 &&
ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->fib_nh_gw6))
return 1;
return 0;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@ -754,11 +802,43 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
attrlen = rtnh_attrlen(rtnh);
if (attrlen > 0) {
struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
if (nla && nla_get_in_addr(nla) != nh->fib_nh_gw4)
return 1;
nlav = nla_find(attrs, attrlen, RTA_VIA);
if (nla && nlav) {
NL_SET_ERR_MSG(extack,
"Nexthop configuration can not contain both GATEWAY and VIA");
return -EINVAL;
}
if (nla) {
if (nh->fib_nh_gw_family != AF_INET ||
nla_get_in_addr(nla) != nh->fib_nh_gw4)
return 1;
} else if (nlav) {
struct fib_config cfg2;
int err;
err = fib_gw_from_via(&cfg2, nlav, extack);
if (err)
return err;
switch (nh->fib_nh_gw_family) {
case AF_INET:
if (cfg2.fc_gw_family != AF_INET ||
cfg2.fc_gw4 != nh->fib_nh_gw4)
return 1;
break;
case AF_INET6:
if (cfg2.fc_gw_family != AF_INET6 ||
ipv6_addr_cmp(&cfg2.fc_gw6,
&nh->fib_nh_gw6))
return 1;
break;
}
}
#ifdef CONFIG_IP_ROUTE_CLASSID
nla = nla_find(attrs, attrlen, RTA_FLOW);
if (nla && nla_get_u32(nla) != nh->nh_tclassid)
@ -812,6 +892,30 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
return true;
}
static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh,
u32 table, struct netlink_ext_ack *extack)
{
struct fib6_config cfg = {
.fc_table = table,
.fc_flags = nh->fib_nh_flags | RTF_GATEWAY,
.fc_ifindex = nh->fib_nh_oif,
.fc_gateway = nh->fib_nh_gw6,
};
struct fib6_nh fib6_nh = {};
int err;
err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack);
if (!err) {
nh->fib_nh_dev = fib6_nh.fib_nh_dev;
dev_hold(nh->fib_nh_dev);
nh->fib_nh_oif = nh->fib_nh_dev->ifindex;
nh->fib_nh_scope = RT_SCOPE_LINK;
ipv6_stub->fib6_nh_release(&fib6_nh);
}
return err;
}
/*
* Picture
@ -856,134 +960,152 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
* |
* |-> {local prefix} (terminal node)
*/
static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
struct netlink_ext_ack *extack)
static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
u8 scope, struct netlink_ext_ack *extack)
{
int err = 0;
struct net *net;
struct net_device *dev;
struct fib_result res;
int err;
net = cfg->fc_nlinfo.nl_net;
if (nh->fib_nh_gw4) {
struct fib_result res;
if (nh->fib_nh_flags & RTNH_F_ONLINK) {
unsigned int addr_type;
if (nh->fib_nh_flags & RTNH_F_ONLINK) {
unsigned int addr_type;
if (cfg->fc_scope >= RT_SCOPE_LINK) {
NL_SET_ERR_MSG(extack,
"Nexthop has invalid scope");
return -EINVAL;
}
dev = __dev_get_by_index(net, nh->fib_nh_oif);
if (!dev) {
NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
return -ENODEV;
}
if (!(dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack,
"Nexthop device is not up");
return -ENETDOWN;
}
addr_type = inet_addr_type_dev_table(net, dev,
nh->fib_nh_gw4);
if (addr_type != RTN_UNICAST) {
NL_SET_ERR_MSG(extack,
"Nexthop has invalid gateway");
return -EINVAL;
}
if (!netif_carrier_ok(dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
nh->fib_nh_dev = dev;
dev_hold(dev);
nh->fib_nh_scope = RT_SCOPE_LINK;
return 0;
if (scope >= RT_SCOPE_LINK) {
NL_SET_ERR_MSG(extack, "Nexthop has invalid scope");
return -EINVAL;
}
rcu_read_lock();
{
struct fib_table *tbl = NULL;
struct flowi4 fl4 = {
.daddr = nh->fib_nh_gw4,
.flowi4_scope = cfg->fc_scope + 1,
.flowi4_oif = nh->fib_nh_oif,
.flowi4_iif = LOOPBACK_IFINDEX,
};
/* It is not necessary, but requires a bit of thinking */
if (fl4.flowi4_scope < RT_SCOPE_LINK)
fl4.flowi4_scope = RT_SCOPE_LINK;
if (cfg->fc_table)
tbl = fib_get_table(net, cfg->fc_table);
if (tbl)
err = fib_table_lookup(tbl, &fl4, &res,
FIB_LOOKUP_IGNORE_LINKSTATE |
FIB_LOOKUP_NOREF);
/* on error or if no table given do full lookup. This
* is needed for example when nexthops are in the local
* table rather than the given table
*/
if (!tbl || err) {
err = fib_lookup(net, &fl4, &res,
FIB_LOOKUP_IGNORE_LINKSTATE);
}
if (err) {
NL_SET_ERR_MSG(extack,
"Nexthop has invalid gateway");
rcu_read_unlock();
return err;
}
dev = __dev_get_by_index(net, nh->fib_nh_oif);
if (!dev) {
NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
return -ENODEV;
}
err = -EINVAL;
if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
if (!(dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack, "Nexthop device is not up");
return -ENETDOWN;
}
addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4);
if (addr_type != RTN_UNICAST) {
NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
return -EINVAL;
}
if (!netif_carrier_ok(dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
nh->fib_nh_dev = dev;
dev_hold(dev);
nh->fib_nh_scope = RT_SCOPE_LINK;
return 0;
}
rcu_read_lock();
{
struct fib_table *tbl = NULL;
struct flowi4 fl4 = {
.daddr = nh->fib_nh_gw4,
.flowi4_scope = scope + 1,
.flowi4_oif = nh->fib_nh_oif,
.flowi4_iif = LOOPBACK_IFINDEX,
};
/* It is not necessary, but requires a bit of thinking */
if (fl4.flowi4_scope < RT_SCOPE_LINK)
fl4.flowi4_scope = RT_SCOPE_LINK;
if (table)
tbl = fib_get_table(net, table);
if (tbl)
err = fib_table_lookup(tbl, &fl4, &res,
FIB_LOOKUP_IGNORE_LINKSTATE |
FIB_LOOKUP_NOREF);
/* on error or if no table given do full lookup. This
* is needed for example when nexthops are in the local
* table rather than the given table
*/
if (!tbl || err) {
err = fib_lookup(net, &fl4, &res,
FIB_LOOKUP_IGNORE_LINKSTATE);
}
if (err) {
NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
goto out;
}
nh->fib_nh_scope = res.scope;
nh->fib_nh_oif = FIB_RES_OIF(res);
nh->fib_nh_dev = dev = FIB_RES_DEV(res);
if (!dev) {
NL_SET_ERR_MSG(extack,
"No egress device for nexthop gateway");
goto out;
}
dev_hold(dev);
if (!netif_carrier_ok(dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
} else {
struct in_device *in_dev;
if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
NL_SET_ERR_MSG(extack,
"Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
return -EINVAL;
}
rcu_read_lock();
err = -ENODEV;
in_dev = inetdev_by_index(net, nh->fib_nh_oif);
if (!in_dev)
goto out;
err = -ENETDOWN;
if (!(in_dev->dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
goto out;
}
nh->fib_nh_dev = in_dev->dev;
dev_hold(nh->fib_nh_dev);
nh->fib_nh_scope = RT_SCOPE_HOST;
if (!netif_carrier_ok(nh->fib_nh_dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
err = 0;
}
err = -EINVAL;
if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
goto out;
}
nh->fib_nh_scope = res.scope;
nh->fib_nh_oif = FIB_RES_OIF(res);
nh->fib_nh_dev = dev = FIB_RES_DEV(res);
if (!dev) {
NL_SET_ERR_MSG(extack,
"No egress device for nexthop gateway");
goto out;
}
dev_hold(dev);
if (!netif_carrier_ok(dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
out:
rcu_read_unlock();
return err;
}
static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh,
struct netlink_ext_ack *extack)
{
struct in_device *in_dev;
int err;
if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
NL_SET_ERR_MSG(extack,
"Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
return -EINVAL;
}
rcu_read_lock();
err = -ENODEV;
in_dev = inetdev_by_index(net, nh->fib_nh_oif);
if (!in_dev)
goto out;
err = -ENETDOWN;
if (!(in_dev->dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
goto out;
}
nh->fib_nh_dev = in_dev->dev;
dev_hold(nh->fib_nh_dev);
nh->fib_nh_scope = RT_SCOPE_HOST;
if (!netif_carrier_ok(nh->fib_nh_dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
err = 0;
out:
rcu_read_unlock();
return err;
}
static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
struct netlink_ext_ack *extack)
{
struct net *net = cfg->fc_nlinfo.nl_net;
u32 table = cfg->fc_table;
int err;
if (nh->fib_nh_gw_family == AF_INET)
err = fib_check_nh_v4_gw(net, nh, table, cfg->fc_scope, extack);
else if (nh->fib_nh_gw_family == AF_INET6)
err = fib_check_nh_v6_gw(net, nh, table, extack);
else
err = fib_check_nh_nongw(net, nh, extack);
return err;
}
static inline unsigned int fib_laddr_hashfn(__be32 val)
{
unsigned int mask = (fib_info_hash_size - 1);
@ -1204,7 +1326,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto failure;
if (fib_props[cfg->fc_type].error) {
if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) {
if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) {
NL_SET_ERR_MSG(extack,
"Gateway, device and multipath can not be specified for this route type");
goto err_inval;
@ -1238,7 +1360,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
"Route with host scope can not have multiple nexthops");
goto err_inval;
}
if (nh->fib_nh_gw4) {
if (nh->fib_nh_gw_family) {
NL_SET_ERR_MSG(extack,
"Route with host scope can not have a gateway");
goto err_inval;
@ -1269,6 +1391,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
change_nexthops(fi) {
fib_info_update_nh_saddr(net, nexthop_nh);
if (nexthop_nh->fib_nh_gw_family == AF_INET6)
fi->fib_nh_is_v6 = true;
} endfor_nexthops(fi)
fib_rebalance(fi);
@ -1341,18 +1465,32 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
rcu_read_unlock();
}
if (nhc->nhc_has_gw) {
switch (nhc->nhc_family) {
case AF_INET:
if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4))
switch (nhc->nhc_gw_family) {
case AF_INET:
if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4))
goto nla_put_failure;
break;
case AF_INET6:
/* if gateway family does not match nexthop family
* gateway is encoded as RTA_VIA
*/
if (nhc->nhc_gw_family != nhc->nhc_family) {
int alen = sizeof(struct in6_addr);
struct nlattr *nla;
struct rtvia *via;
nla = nla_reserve(skb, RTA_VIA, alen + 2);
if (!nla)
goto nla_put_failure;
break;
case AF_INET6:
if (nla_put_in6_addr(skb, RTA_GATEWAY,
&nhc->nhc_gw.ipv6) < 0)
goto nla_put_failure;
break;
via = nla_data(nla);
via->rtvia_family = AF_INET6;
memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen);
} else if (nla_put_in6_addr(skb, RTA_GATEWAY,
&nhc->nhc_gw.ipv6) < 0) {
goto nla_put_failure;
}
break;
}
*flags |= (nhc->nhc_flags & RTNH_F_ONLINK);
@ -1832,8 +1970,14 @@ static bool fib_good_nh(const struct fib_nh *nh)
rcu_read_lock_bh();
n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
(__force u32)nh->fib_nh_gw4);
if (likely(nh->fib_nh_gw_family == AF_INET))
n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
(__force u32)nh->fib_nh_gw4);
else if (nh->fib_nh_gw_family == AF_INET6)
n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev,
&nh->fib_nh_gw6);
else
n = NULL;
if (n)
state = n->nud_state;

View file

@ -564,7 +564,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
goto route_err;
rcu_read_unlock();
return &rt->dst;
@ -602,7 +602,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
goto route_err;
return &rt->dst;

View file

@ -123,7 +123,7 @@ int ip_forward(struct sk_buff *skb)
rt = skb_rtable(skb);
if (opt->is_strictroute && rt->rt_uses_gateway)
if (opt->is_strictroute && rt->rt_gw_family)
goto sr_failed;
IPCB(skb)->flags |= IPSKB_FORWARDED;

View file

@ -188,7 +188,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
u32 nexthop;
bool is_v6gw = false;
if (rt->rt_type == RTN_MULTICAST) {
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
@ -218,16 +218,13 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
}
rcu_read_lock_bh();
nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
if (!IS_ERR(neigh)) {
int res;
sock_confirm_neigh(skb, neigh);
res = neigh_output(neigh, skb);
/* if crossing protocols, can not use the cached header */
res = neigh_output(neigh, skb, is_v6gw);
rcu_read_unlock_bh();
return res;
}
@ -472,7 +469,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
skb_dst_set_noref(skb, &rt->dst);
packet_routed:
if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gw_family)
goto no_route;
/* OK, we know where to send it, allocate and build IP header. */

View file

@ -434,37 +434,46 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr)
{
const struct rtable *rt = container_of(dst, struct rtable, dst);
struct net_device *dev = dst->dev;
const __be32 *pkey = daddr;
const struct rtable *rt;
struct neighbour *n;
rt = (const struct rtable *) dst;
if (rt->rt_gateway)
pkey = (const __be32 *) &rt->rt_gateway;
else if (skb)
pkey = &ip_hdr(skb)->daddr;
rcu_read_lock_bh();
n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
if (n)
return n;
return neigh_create(&arp_tbl, pkey, dev);
if (likely(rt->rt_gw_family == AF_INET)) {
n = ip_neigh_gw4(dev, rt->rt_gw4);
} else if (rt->rt_gw_family == AF_INET6) {
n = ip_neigh_gw6(dev, &rt->rt_gw6);
} else {
__be32 pkey;
pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
n = ip_neigh_gw4(dev, pkey);
}
if (n && !refcount_inc_not_zero(&n->refcnt))
n = NULL;
rcu_read_unlock_bh();
return n;
}
static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
const struct rtable *rt = container_of(dst, struct rtable, dst);
struct net_device *dev = dst->dev;
const __be32 *pkey = daddr;
const struct rtable *rt;
rt = (const struct rtable *)dst;
if (rt->rt_gateway)
pkey = (const __be32 *)&rt->rt_gateway;
else if (!daddr ||
if (rt->rt_gw_family == AF_INET) {
pkey = (const __be32 *)&rt->rt_gw4;
} else if (rt->rt_gw_family == AF_INET6) {
return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
} else if (!daddr ||
(rt->rt_flags &
(RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL)))
(RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
return;
}
__ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
}
@ -629,8 +638,8 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
if (fnhe->fnhe_gw) {
rt->rt_flags |= RTCF_REDIRECTED;
rt->rt_gateway = fnhe->fnhe_gw;
rt->rt_uses_gateway = 1;
rt->rt_gw_family = AF_INET;
rt->rt_gw4 = fnhe->fnhe_gw;
}
}
@ -747,7 +756,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
return;
}
if (rt->rt_gateway != old_gw)
if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
return;
in_dev = __in_dev_get_rcu(dev);
@ -1282,7 +1291,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = READ_ONCE(dst->dev->mtu);
if (unlikely(ip_mtu_locked(dst))) {
if (rt->rt_uses_gateway && mtu > 576)
if (rt->rt_gw_family && mtu > 576)
mtu = 576;
}
@ -1410,8 +1419,10 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
orig = NULL;
}
fill_route_from_fnhe(rt, fnhe);
if (!rt->rt_gateway)
rt->rt_gateway = daddr;
if (!rt->rt_gw4) {
rt->rt_gw4 = daddr;
rt->rt_gw_family = AF_INET;
}
if (do_cache) {
dst_hold(&rt->dst);
@ -1535,14 +1546,20 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
if (fi) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
struct fib_nh *nh;
if (nh->fib_nh_gw4 && nh->fib_nh_scope == RT_SCOPE_LINK) {
rt->rt_gateway = nh->fib_nh_gw4;
rt->rt_uses_gateway = 1;
if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
rt->rt_gw_family = nhc->nhc_gw_family;
/* only INET and INET6 are supported */
if (likely(nhc->nhc_gw_family == AF_INET))
rt->rt_gw4 = nhc->nhc_gw.ipv4;
else
rt->rt_gw6 = nhc->nhc_gw.ipv6;
}
ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
nh = container_of(nhc, struct fib_nh, nh_common);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
@ -1557,8 +1574,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
* However, if we are unsuccessful at storing this
* route into the cache we really need to set it.
*/
if (!rt->rt_gateway)
rt->rt_gateway = daddr;
if (!rt->rt_gw4) {
rt->rt_gw_family = AF_INET;
rt->rt_gw4 = daddr;
}
rt_add_uncached_list(rt);
}
} else
@ -1591,8 +1610,8 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
rt->rt_iif = 0;
rt->rt_pmtu = 0;
rt->rt_mtu_locked = 0;
rt->rt_gateway = 0;
rt->rt_uses_gateway = 0;
rt->rt_gw_family = 0;
rt->rt_gw4 = 0;
INIT_LIST_HEAD(&rt->rt_uncached);
rt->dst.output = ip_output;
@ -1734,8 +1753,9 @@ static int __mkroute_input(struct sk_buff *skb,
do_cache = res->fi && !itag;
if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
skb->protocol == htons(ETH_P_IP)) {
__be32 gw = nhc->nhc_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
__be32 gw;
gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
if (IN_DEV_SHARED_MEDIA(out_dev) ||
inet_addr_onlink(out_dev, saddr, gw))
IPCB(skb)->flags |= IPSKB_DOREDIRECT;
@ -2284,7 +2304,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
} else {
if (unlikely(fl4->flowi4_flags &
FLOWI_FLAG_KNOWN_NH &&
!(nhc->nhc_has_gw &&
!(nhc->nhc_gw_family &&
nhc->nhc_scope == RT_SCOPE_LINK))) {
do_cache = false;
goto add;
@ -2594,8 +2614,11 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_genid = rt_genid_ipv4(net);
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
rt->rt_gateway = ort->rt_gateway;
rt->rt_uses_gateway = ort->rt_uses_gateway;
rt->rt_gw_family = ort->rt_gw_family;
if (rt->rt_gw_family == AF_INET)
rt->rt_gw4 = ort->rt_gw4;
else if (rt->rt_gw_family == AF_INET6)
rt->rt_gw6 = ort->rt_gw6;
INIT_LIST_HEAD(&rt->rt_uncached);
}
@ -2674,9 +2697,22 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
goto nla_put_failure;
}
if (rt->rt_uses_gateway &&
nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
if (rt->rt_gw_family == AF_INET &&
nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
goto nla_put_failure;
} else if (rt->rt_gw_family == AF_INET6) {
int alen = sizeof(struct in6_addr);
struct nlattr *nla;
struct rtvia *via;
nla = nla_reserve(skb, RTA_VIA, alen + 2);
if (!nla)
goto nla_put_failure;
via = nla_data(nla);
via->rtvia_family = AF_INET6;
memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
}
expires = rt->dst.expires;
if (expires) {

View file

@ -97,8 +97,11 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
RTCF_LOCAL);
xdst->u.rt.rt_type = rt->rt_type;
xdst->u.rt.rt_gateway = rt->rt_gateway;
xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
xdst->u.rt.rt_gw_family = rt->rt_gw_family;
if (rt->rt_gw_family == AF_INET)
xdst->u.rt.rt_gw4 = rt->rt_gw4;
else if (rt->rt_gw_family == AF_INET6)
xdst->u.rt.rt_gw6 = rt->rt_gw6;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);

View file

@ -2421,7 +2421,7 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
for_each_fib6_node_rt_rcu(fn) {
if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex)
continue;
if (no_gw && rt->fib6_nh.fib_nh_has_gw)
if (no_gw && rt->fib6_nh.fib_nh_gw_family)
continue;
if ((rt->fib6_flags & flags) != flags)
continue;

View file

@ -173,6 +173,14 @@ eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
return 0;
}
static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack)
{
NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
return -EAFNOSUPPORT;
}
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
.ipv6_route_input = eafnosupport_ipv6_route_input,
@ -181,6 +189,7 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.fib6_lookup = eafnosupport_fib6_lookup,
.fib6_multipath_select = eafnosupport_fib6_multipath_select,
.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
.fib6_nh_init = eafnosupport_fib6_nh_init,
};
EXPORT_SYMBOL_GPL(ipv6_stub);

View file

@ -919,6 +919,8 @@ static const struct ipv6_stub ipv6_stub_impl = {
.fib6_lookup = fib6_lookup,
.fib6_multipath_select = fib6_multipath_select,
.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
.fib6_nh_init = fib6_nh_init,
.fib6_nh_release = fib6_nh_release,
.udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na,
.nd_tbl = &nd_tbl,

View file

@ -2304,7 +2304,7 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
if (rt->fib6_nh.fib_nh_has_gw) {
if (rt->fib6_nh.fib_nh_gw_family) {
flags |= RTF_GATEWAY;
seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6);
} else {

View file

@ -117,7 +117,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb);
ret = neigh_output(neigh, skb, false);
rcu_read_unlock_bh();
return ret;
}

View file

@ -533,7 +533,7 @@ static void rt6_probe(struct fib6_info *rt)
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
if (!rt || !rt->fib6_nh.fib_nh_has_gw)
if (!rt || !rt->fib6_nh.fib_nh_gw_family)
return;
nh_gw = &rt->fib6_nh.fib_nh_gw6;
@ -595,7 +595,7 @@ static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
struct neighbour *neigh;
if (rt->fib6_flags & RTF_NONEXTHOP ||
!rt->fib6_nh.fib_nh_has_gw)
!rt->fib6_nh.fib_nh_gw_family)
return RT6_NUD_SUCCEED;
rcu_read_lock_bh();
@ -769,7 +769,7 @@ static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
{
return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_has_gw;
return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_gw_family;
}
#ifdef CONFIG_IPV6_ROUTE_INFO
@ -975,7 +975,7 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
rt->rt6i_dst = ort->fib6_dst;
rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
rt->rt6i_flags = ort->fib6_flags;
if (ort->fib6_nh.fib_nh_has_gw) {
if (ort->fib6_nh.fib_nh_gw_family) {
rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6;
rt->rt6i_flags |= RTF_GATEWAY;
}
@ -1860,7 +1860,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
rcu_read_unlock();
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
!f6i->fib6_nh.fib_nh_has_gw)) {
!f6i->fib6_nh.fib_nh_gw_family)) {
/* Create a RTF_CACHE clone which will not be
* owned by the fib6 tree. It is for the special case where
* the daddr in the skb during the neighbor look-up is different
@ -2430,7 +2430,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
continue;
if (rt->fib6_flags & RTF_REJECT)
break;
if (!rt->fib6_nh.fib_nh_has_gw)
if (!rt->fib6_nh.fib_nh_gw_family)
continue;
if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex)
continue;
@ -2964,7 +2964,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
goto out;
fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
fib6_nh->fib_nh_has_gw = 1;
fib6_nh->fib_nh_gw_family = AF_INET6;
}
err = -ENODEV;
@ -3476,7 +3476,7 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
continue;
if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
!rt->fib6_nh.fib_nh_has_gw)
!rt->fib6_nh.fib_nh_gw_family)
continue;
if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
continue;
@ -3807,7 +3807,7 @@ static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
struct in6_addr *gateway = (struct in6_addr *)arg;
if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
rt->fib6_nh.fib_nh_has_gw &&
rt->fib6_nh.fib_nh_gw_family &&
ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
return -1;
}

View file

@ -137,10 +137,14 @@ static int mpls_xmit(struct sk_buff *skb)
mpls_stats_inc_outucastpkts(out_dev, skb);
if (rt)
err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway,
skb);
else if (rt6) {
if (rt) {
if (rt->rt_gw_family == AF_INET)
err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gw4,
skb);
else if (rt->rt_gw_family == AF_INET6)
err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt->rt_gw6,
skb);
} else if (rt6) {
if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) {
/* 6PE (RFC 4798) */
err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt6->rt6i_gateway.s6_addr32[3],

View file

@ -9,7 +9,8 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics"
TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw"
VERBOSE=0
PAUSE_ON_FAIL=no
PAUSE=no
@ -48,6 +49,7 @@ setup()
{
set -e
ip netns add ns1
ip netns set ns1 auto
$IP link set dev lo up
ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
@ -698,6 +700,7 @@ route_setup()
set -e
ip netns add ns2
ip netns set ns2 auto
ip -netns ns2 link set dev lo up
ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1
ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
@ -1442,6 +1445,70 @@ ipv4_route_metrics_test()
route_cleanup
}
ipv4_route_v6_gw_test()
{
local rc
echo
echo "IPv4 route with IPv6 gateway tests"
route_setup
sleep 2
#
# single path route
#
run_cmd "$IP ro add 172.16.104.0/24 via inet6 2001:db8:101::2"
rc=$?
log_test $rc 0 "Single path route with IPv6 gateway"
if [ $rc -eq 0 ]; then
check_route "172.16.104.0/24 via inet6 2001:db8:101::2 dev veth1"
fi
run_cmd "ip netns exec ns1 ping -w1 -c1 172.16.104.1"
log_test $rc 0 "Single path route with IPv6 gateway - ping"
run_cmd "$IP ro del 172.16.104.0/24 via inet6 2001:db8:101::2"
rc=$?
log_test $rc 0 "Single path route delete"
if [ $rc -eq 0 ]; then
check_route "172.16.112.0/24"
fi
#
# multipath - v6 then v4
#
run_cmd "$IP ro add 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3"
rc=$?
log_test $rc 0 "Multipath route add - v6 nexthop then v4"
if [ $rc -eq 0 ]; then
check_route "172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
fi
run_cmd "$IP ro del 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1"
log_test $? 2 " Multipath route delete - nexthops in wrong order"
run_cmd "$IP ro del 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3"
log_test $? 0 " Multipath route delete exact match"
#
# multipath - v4 then v6
#
run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1"
rc=$?
log_test $rc 0 "Multipath route add - v4 nexthop then v6"
if [ $rc -eq 0 ]; then
check_route "172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 weight 1 nexthop via inet6 2001:db8:101::2 dev veth1 weight 1"
fi
run_cmd "$IP ro del 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3"
log_test $? 2 " Multipath route delete - nexthops in wrong order"
run_cmd "$IP ro del 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1"
log_test $? 0 " Multipath route delete exact match"
route_cleanup
}
################################################################################
# usage
@ -1511,6 +1578,7 @@ do
ipv4_addr_metric) ipv4_addr_metric_test;;
ipv6_route_metrics) ipv6_route_metrics_test;;
ipv4_route_metrics) ipv4_route_metrics_test;;
ipv4_route_v6_gw) ipv4_route_v6_gw_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac