linux-stable/drivers/net/vxlan/vxlan_mdb.c
Ido Schimmel 0f83e69f44 vxlan: Add MDB data path support
Integrate MDB support into the Tx path of the VXLAN driver, allowing it
to selectively forward IP multicast traffic according to the matched MDB
entry.

If MDB entries are configured (i.e., 'VXLAN_F_MDB' is set) and the
packet is an IP multicast packet, perform up to three different lookups
according to the following priority:

1. For an (S, G) entry, using {Source VNI, Source IP, Destination IP}.
2. For a (*, G) entry, using {Source VNI, Destination IP}.
3. For the catchall MDB entry (0.0.0.0 or ::), using the source VNI.

The catchall MDB entry is similar to the catchall FDB entry
(00:00:00:00:00:00) that is currently used to transmit BUM (broadcast,
unknown unicast and multicast) traffic. However, unlike the catchall FDB
entry, this entry is only used to transmit unregistered IP multicast
traffic that is not link-local. Therefore, when configured, the catchall
FDB entry will only transmit BULL (broadcast, unknown unicast,
link-local multicast) traffic.

The catchall MDB entry is useful in deployments where inter-subnet
multicast forwarding is used and not all the VTEPs in a tenant domain
are members in all the broadcast domains. In such deployments it is
advantageous to transmit BULL (broadcast, unknown unicast and link-local
multicast) and unregistered IP multicast traffic on different tunnels.
If the same tunnel was used, a VTEP only interested in IP multicast
traffic would also pull all the BULL traffic and drop it as it is not a
member in the originating broadcast domain [1].

If the packet did not match an MDB entry (or if the packet is not an IP
multicast packet), return it to the Tx path, allowing it to be forwarded
according to the FDB.

If the packet did match an MDB entry, forward it to the associated
remote VTEPs. However, if the entry is a (*, G) entry and the associated
remote is in INCLUDE mode, then skip over it as the source IP is not in
its source list (otherwise the packet would have matched on an (S, G)
entry). Similarly, if the associated remote is marked as BLOCKED (can
only be set on (S, G) entries), then skip over it as well as the remote
is in EXCLUDE mode and the source IP is in its source list.

[1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-2.6

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-03-17 08:05:50 +00:00

1462 lines
37 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
#include <linux/if_bridge.h>
#include <linux/in.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/rhashtable.h>
#include <linux/rhashtable-types.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <net/netlink.h>
#include <net/vxlan.h>
#include "vxlan_private.h"
struct vxlan_mdb_entry_key {
union vxlan_addr src;
union vxlan_addr dst;
__be32 vni;
};
struct vxlan_mdb_entry {
struct rhash_head rhnode;
struct list_head remotes;
struct vxlan_mdb_entry_key key;
struct hlist_node mdb_node;
struct rcu_head rcu;
};
#define VXLAN_MDB_REMOTE_F_BLOCKED BIT(0)
struct vxlan_mdb_remote {
struct list_head list;
struct vxlan_rdst __rcu *rd;
u8 flags;
u8 filter_mode;
u8 rt_protocol;
struct hlist_head src_list;
struct rcu_head rcu;
};
#define VXLAN_SGRP_F_DELETE BIT(0)
struct vxlan_mdb_src_entry {
struct hlist_node node;
union vxlan_addr addr;
u8 flags;
};
struct vxlan_mdb_dump_ctx {
long reserved;
long entry_idx;
long remote_idx;
};
struct vxlan_mdb_config_src_entry {
union vxlan_addr addr;
struct list_head node;
};
struct vxlan_mdb_config {
struct vxlan_dev *vxlan;
struct vxlan_mdb_entry_key group;
struct list_head src_list;
union vxlan_addr remote_ip;
u32 remote_ifindex;
__be32 remote_vni;
__be16 remote_port;
u16 nlflags;
u8 flags;
u8 filter_mode;
u8 rt_protocol;
};
static const struct rhashtable_params vxlan_mdb_rht_params = {
.head_offset = offsetof(struct vxlan_mdb_entry, rhnode),
.key_offset = offsetof(struct vxlan_mdb_entry, key),
.key_len = sizeof(struct vxlan_mdb_entry_key),
.automatic_shrinking = true,
};
static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg,
struct netlink_ext_ack *extack);
static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg,
struct netlink_ext_ack *extack);
static void vxlan_br_mdb_entry_fill(const struct vxlan_dev *vxlan,
const struct vxlan_mdb_entry *mdb_entry,
const struct vxlan_mdb_remote *remote,
struct br_mdb_entry *e)
{
const union vxlan_addr *dst = &mdb_entry->key.dst;
memset(e, 0, sizeof(*e));
e->ifindex = vxlan->dev->ifindex;
e->state = MDB_PERMANENT;
if (remote->flags & VXLAN_MDB_REMOTE_F_BLOCKED)
e->flags |= MDB_FLAGS_BLOCKED;
switch (dst->sa.sa_family) {
case AF_INET:
e->addr.u.ip4 = dst->sin.sin_addr.s_addr;
e->addr.proto = htons(ETH_P_IP);
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
e->addr.u.ip6 = dst->sin6.sin6_addr;
e->addr.proto = htons(ETH_P_IPV6);
break;
#endif
}
}
static int vxlan_mdb_entry_info_fill_srcs(struct sk_buff *skb,
const struct vxlan_mdb_remote *remote)
{
struct vxlan_mdb_src_entry *ent;
struct nlattr *nest;
if (hlist_empty(&remote->src_list))
return 0;
nest = nla_nest_start(skb, MDBA_MDB_EATTR_SRC_LIST);
if (!nest)
return -EMSGSIZE;
hlist_for_each_entry(ent, &remote->src_list, node) {
struct nlattr *nest_ent;
nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY);
if (!nest_ent)
goto out_cancel_err;
if (vxlan_nla_put_addr(skb, MDBA_MDB_SRCATTR_ADDRESS,
&ent->addr) ||
nla_put_u32(skb, MDBA_MDB_SRCATTR_TIMER, 0))
goto out_cancel_err;
nla_nest_end(skb, nest_ent);
}
nla_nest_end(skb, nest);
return 0;
out_cancel_err:
nla_nest_cancel(skb, nest);
return -EMSGSIZE;
}
static int vxlan_mdb_entry_info_fill(const struct vxlan_dev *vxlan,
struct sk_buff *skb,
const struct vxlan_mdb_entry *mdb_entry,
const struct vxlan_mdb_remote *remote)
{
struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
struct br_mdb_entry e;
struct nlattr *nest;
nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY_INFO);
if (!nest)
return -EMSGSIZE;
vxlan_br_mdb_entry_fill(vxlan, mdb_entry, remote, &e);
if (nla_put_nohdr(skb, sizeof(e), &e) ||
nla_put_u32(skb, MDBA_MDB_EATTR_TIMER, 0))
goto nest_err;
if (!vxlan_addr_any(&mdb_entry->key.src) &&
vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_SOURCE, &mdb_entry->key.src))
goto nest_err;
if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, remote->rt_protocol) ||
nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, remote->filter_mode) ||
vxlan_mdb_entry_info_fill_srcs(skb, remote) ||
vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_DST, &rd->remote_ip))
goto nest_err;
if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port &&
nla_put_u16(skb, MDBA_MDB_EATTR_DST_PORT,
be16_to_cpu(rd->remote_port)))
goto nest_err;
if (rd->remote_vni != vxlan->default_dst.remote_vni &&
nla_put_u32(skb, MDBA_MDB_EATTR_VNI, be32_to_cpu(rd->remote_vni)))
goto nest_err;
if (rd->remote_ifindex &&
nla_put_u32(skb, MDBA_MDB_EATTR_IFINDEX, rd->remote_ifindex))
goto nest_err;
if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) &&
mdb_entry->key.vni && nla_put_u32(skb, MDBA_MDB_EATTR_SRC_VNI,
be32_to_cpu(mdb_entry->key.vni)))
goto nest_err;
nla_nest_end(skb, nest);
return 0;
nest_err:
nla_nest_cancel(skb, nest);
return -EMSGSIZE;
}
static int vxlan_mdb_entry_fill(const struct vxlan_dev *vxlan,
struct sk_buff *skb,
struct vxlan_mdb_dump_ctx *ctx,
const struct vxlan_mdb_entry *mdb_entry)
{
int remote_idx = 0, s_remote_idx = ctx->remote_idx;
struct vxlan_mdb_remote *remote;
struct nlattr *nest;
int err = 0;
nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY);
if (!nest)
return -EMSGSIZE;
list_for_each_entry(remote, &mdb_entry->remotes, list) {
if (remote_idx < s_remote_idx)
goto skip;
err = vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote);
if (err)
break;
skip:
remote_idx++;
}
ctx->remote_idx = err ? remote_idx : 0;
nla_nest_end(skb, nest);
return err;
}
static int vxlan_mdb_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb,
struct vxlan_mdb_dump_ctx *ctx)
{
int entry_idx = 0, s_entry_idx = ctx->entry_idx;
struct vxlan_mdb_entry *mdb_entry;
struct nlattr *nest;
int err = 0;
nest = nla_nest_start_noflag(skb, MDBA_MDB);
if (!nest)
return -EMSGSIZE;
hlist_for_each_entry(mdb_entry, &vxlan->mdb_list, mdb_node) {
if (entry_idx < s_entry_idx)
goto skip;
err = vxlan_mdb_entry_fill(vxlan, skb, ctx, mdb_entry);
if (err)
break;
skip:
entry_idx++;
}
ctx->entry_idx = err ? entry_idx : 0;
nla_nest_end(skb, nest);
return err;
}
int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb,
struct netlink_callback *cb)
{
struct vxlan_mdb_dump_ctx *ctx = (void *)cb->ctx;
struct vxlan_dev *vxlan = netdev_priv(dev);
struct br_port_msg *bpm;
struct nlmsghdr *nlh;
int err;
ASSERT_RTNL();
NL_ASSERT_DUMP_CTX_FITS(struct vxlan_mdb_dump_ctx);
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWMDB, sizeof(*bpm),
NLM_F_MULTI);
if (!nlh)
return -EMSGSIZE;
bpm = nlmsg_data(nlh);
memset(bpm, 0, sizeof(*bpm));
bpm->family = AF_BRIDGE;
bpm->ifindex = dev->ifindex;
err = vxlan_mdb_fill(vxlan, skb, ctx);
nlmsg_end(skb, nlh);
cb->seq = vxlan->mdb_seq;
nl_dump_check_consistent(cb, nlh);
return err;
}
static const struct nla_policy
vxlan_mdbe_src_list_entry_pol[MDBE_SRCATTR_MAX + 1] = {
[MDBE_SRCATTR_ADDRESS] = NLA_POLICY_RANGE(NLA_BINARY,
sizeof(struct in_addr),
sizeof(struct in6_addr)),
};
static const struct nla_policy
vxlan_mdbe_src_list_pol[MDBE_SRC_LIST_MAX + 1] = {
[MDBE_SRC_LIST_ENTRY] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_entry_pol),
};
static struct netlink_range_validation vni_range = {
.max = VXLAN_N_VID - 1,
};
static const struct nla_policy vxlan_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
[MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
sizeof(struct in_addr),
sizeof(struct in6_addr)),
[MDBE_ATTR_GROUP_MODE] = NLA_POLICY_RANGE(NLA_U8, MCAST_EXCLUDE,
MCAST_INCLUDE),
[MDBE_ATTR_SRC_LIST] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_pol),
[MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
[MDBE_ATTR_DST] = NLA_POLICY_RANGE(NLA_BINARY,
sizeof(struct in_addr),
sizeof(struct in6_addr)),
[MDBE_ATTR_DST_PORT] = { .type = NLA_U16 },
[MDBE_ATTR_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
[MDBE_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
[MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
};
static bool vxlan_mdb_is_valid_source(const struct nlattr *attr, __be16 proto,
struct netlink_ext_ack *extack)
{
switch (proto) {
case htons(ETH_P_IP):
if (nla_len(attr) != sizeof(struct in_addr)) {
NL_SET_ERR_MSG_MOD(extack, "IPv4 invalid source address length");
return false;
}
if (ipv4_is_multicast(nla_get_in_addr(attr))) {
NL_SET_ERR_MSG_MOD(extack, "IPv4 multicast source address is not allowed");
return false;
}
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6): {
struct in6_addr src;
if (nla_len(attr) != sizeof(struct in6_addr)) {
NL_SET_ERR_MSG_MOD(extack, "IPv6 invalid source address length");
return false;
}
src = nla_get_in6_addr(attr);
if (ipv6_addr_is_multicast(&src)) {
NL_SET_ERR_MSG_MOD(extack, "IPv6 multicast source address is not allowed");
return false;
}
break;
}
#endif
default:
NL_SET_ERR_MSG_MOD(extack, "Invalid protocol used with source address");
return false;
}
return true;
}
static void vxlan_mdb_config_group_set(struct vxlan_mdb_config *cfg,
const struct br_mdb_entry *entry,
const struct nlattr *source_attr)
{
struct vxlan_mdb_entry_key *group = &cfg->group;
switch (entry->addr.proto) {
case htons(ETH_P_IP):
group->dst.sa.sa_family = AF_INET;
group->dst.sin.sin_addr.s_addr = entry->addr.u.ip4;
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
group->dst.sa.sa_family = AF_INET6;
group->dst.sin6.sin6_addr = entry->addr.u.ip6;
break;
#endif
}
if (source_attr)
vxlan_nla_get_addr(&group->src, source_attr);
}
static bool vxlan_mdb_is_star_g(const struct vxlan_mdb_entry_key *group)
{
return !vxlan_addr_any(&group->dst) && vxlan_addr_any(&group->src);
}
static bool vxlan_mdb_is_sg(const struct vxlan_mdb_entry_key *group)
{
return !vxlan_addr_any(&group->dst) && !vxlan_addr_any(&group->src);
}
static int vxlan_mdb_config_src_entry_init(struct vxlan_mdb_config *cfg,
__be16 proto,
const struct nlattr *src_entry,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[MDBE_SRCATTR_MAX + 1];
struct vxlan_mdb_config_src_entry *src;
int err;
err = nla_parse_nested(tb, MDBE_SRCATTR_MAX, src_entry,
vxlan_mdbe_src_list_entry_pol, extack);
if (err)
return err;
if (NL_REQ_ATTR_CHECK(extack, src_entry, tb, MDBE_SRCATTR_ADDRESS))
return -EINVAL;
if (!vxlan_mdb_is_valid_source(tb[MDBE_SRCATTR_ADDRESS], proto,
extack))
return -EINVAL;
src = kzalloc(sizeof(*src), GFP_KERNEL);
if (!src)
return -ENOMEM;
err = vxlan_nla_get_addr(&src->addr, tb[MDBE_SRCATTR_ADDRESS]);
if (err)
goto err_free_src;
list_add_tail(&src->node, &cfg->src_list);
return 0;
err_free_src:
kfree(src);
return err;
}
static void
vxlan_mdb_config_src_entry_fini(struct vxlan_mdb_config_src_entry *src)
{
list_del(&src->node);
kfree(src);
}
static int vxlan_mdb_config_src_list_init(struct vxlan_mdb_config *cfg,
__be16 proto,
const struct nlattr *src_list,
struct netlink_ext_ack *extack)
{
struct vxlan_mdb_config_src_entry *src, *tmp;
struct nlattr *src_entry;
int rem, err;
nla_for_each_nested(src_entry, src_list, rem) {
err = vxlan_mdb_config_src_entry_init(cfg, proto, src_entry,
extack);
if (err)
goto err_src_entry_init;
}
return 0;
err_src_entry_init:
list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node)
vxlan_mdb_config_src_entry_fini(src);
return err;
}
static void vxlan_mdb_config_src_list_fini(struct vxlan_mdb_config *cfg)
{
struct vxlan_mdb_config_src_entry *src, *tmp;
list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node)
vxlan_mdb_config_src_entry_fini(src);
}
static int vxlan_mdb_config_attrs_init(struct vxlan_mdb_config *cfg,
const struct br_mdb_entry *entry,
const struct nlattr *set_attrs,
struct netlink_ext_ack *extack)
{
struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1];
int err;
err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, set_attrs,
vxlan_mdbe_attrs_pol, extack);
if (err)
return err;
if (NL_REQ_ATTR_CHECK(extack, set_attrs, mdbe_attrs, MDBE_ATTR_DST)) {
NL_SET_ERR_MSG_MOD(extack, "Missing remote destination IP address");
return -EINVAL;
}
if (mdbe_attrs[MDBE_ATTR_SOURCE] &&
!vxlan_mdb_is_valid_source(mdbe_attrs[MDBE_ATTR_SOURCE],
entry->addr.proto, extack))
return -EINVAL;
vxlan_mdb_config_group_set(cfg, entry, mdbe_attrs[MDBE_ATTR_SOURCE]);
/* rtnetlink code only validates that IPv4 group address is
* multicast.
*/
if (!vxlan_addr_is_multicast(&cfg->group.dst) &&
!vxlan_addr_any(&cfg->group.dst)) {
NL_SET_ERR_MSG_MOD(extack, "Group address is not multicast");
return -EINVAL;
}
if (vxlan_addr_any(&cfg->group.dst) &&
mdbe_attrs[MDBE_ATTR_SOURCE]) {
NL_SET_ERR_MSG_MOD(extack, "Source cannot be specified for the all-zeros entry");
return -EINVAL;
}
if (vxlan_mdb_is_sg(&cfg->group))
cfg->filter_mode = MCAST_INCLUDE;
if (mdbe_attrs[MDBE_ATTR_GROUP_MODE]) {
if (!vxlan_mdb_is_star_g(&cfg->group)) {
NL_SET_ERR_MSG_MOD(extack, "Filter mode can only be set for (*, G) entries");
return -EINVAL;
}
cfg->filter_mode = nla_get_u8(mdbe_attrs[MDBE_ATTR_GROUP_MODE]);
}
if (mdbe_attrs[MDBE_ATTR_SRC_LIST]) {
if (!vxlan_mdb_is_star_g(&cfg->group)) {
NL_SET_ERR_MSG_MOD(extack, "Source list can only be set for (*, G) entries");
return -EINVAL;
}
if (!mdbe_attrs[MDBE_ATTR_GROUP_MODE]) {
NL_SET_ERR_MSG_MOD(extack, "Source list cannot be set without filter mode");
return -EINVAL;
}
err = vxlan_mdb_config_src_list_init(cfg, entry->addr.proto,
mdbe_attrs[MDBE_ATTR_SRC_LIST],
extack);
if (err)
return err;
}
if (vxlan_mdb_is_star_g(&cfg->group) && list_empty(&cfg->src_list) &&
cfg->filter_mode == MCAST_INCLUDE) {
NL_SET_ERR_MSG_MOD(extack, "Cannot add (*, G) INCLUDE with an empty source list");
return -EINVAL;
}
if (mdbe_attrs[MDBE_ATTR_RTPROT])
cfg->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]);
err = vxlan_nla_get_addr(&cfg->remote_ip, mdbe_attrs[MDBE_ATTR_DST]);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Invalid remote destination address");
goto err_src_list_fini;
}
if (mdbe_attrs[MDBE_ATTR_DST_PORT])
cfg->remote_port =
cpu_to_be16(nla_get_u16(mdbe_attrs[MDBE_ATTR_DST_PORT]));
if (mdbe_attrs[MDBE_ATTR_VNI])
cfg->remote_vni =
cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_VNI]));
if (mdbe_attrs[MDBE_ATTR_IFINDEX]) {
cfg->remote_ifindex =
nla_get_s32(mdbe_attrs[MDBE_ATTR_IFINDEX]);
if (!__dev_get_by_index(cfg->vxlan->net, cfg->remote_ifindex)) {
NL_SET_ERR_MSG_MOD(extack, "Outgoing interface not found");
err = -EINVAL;
goto err_src_list_fini;
}
}
if (mdbe_attrs[MDBE_ATTR_SRC_VNI])
cfg->group.vni =
cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI]));
return 0;
err_src_list_fini:
vxlan_mdb_config_src_list_fini(cfg);
return err;
}
static int vxlan_mdb_config_init(struct vxlan_mdb_config *cfg,
struct net_device *dev, struct nlattr *tb[],
u16 nlmsg_flags,
struct netlink_ext_ack *extack)
{
struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]);
struct vxlan_dev *vxlan = netdev_priv(dev);
memset(cfg, 0, sizeof(*cfg));
cfg->vxlan = vxlan;
cfg->group.vni = vxlan->default_dst.remote_vni;
INIT_LIST_HEAD(&cfg->src_list);
cfg->nlflags = nlmsg_flags;
cfg->filter_mode = MCAST_EXCLUDE;
cfg->rt_protocol = RTPROT_STATIC;
cfg->remote_vni = vxlan->default_dst.remote_vni;
cfg->remote_port = vxlan->cfg.dst_port;
if (entry->ifindex != dev->ifindex) {
NL_SET_ERR_MSG_MOD(extack, "Port net device must be the VXLAN net device");
return -EINVAL;
}
/* State is not part of the entry key and can be ignored on deletion
* requests.
*/
if ((nlmsg_flags & (NLM_F_CREATE | NLM_F_REPLACE)) &&
entry->state != MDB_PERMANENT) {
NL_SET_ERR_MSG_MOD(extack, "MDB entry must be permanent");
return -EINVAL;
}
if (entry->flags) {
NL_SET_ERR_MSG_MOD(extack, "Invalid MDB entry flags");
return -EINVAL;
}
if (entry->vid) {
NL_SET_ERR_MSG_MOD(extack, "VID must not be specified");
return -EINVAL;
}
if (entry->addr.proto != htons(ETH_P_IP) &&
entry->addr.proto != htons(ETH_P_IPV6)) {
NL_SET_ERR_MSG_MOD(extack, "Group address must be an IPv4 / IPv6 address");
return -EINVAL;
}
if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY_ATTRS)) {
NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY_ATTRS attribute");
return -EINVAL;
}
return vxlan_mdb_config_attrs_init(cfg, entry, tb[MDBA_SET_ENTRY_ATTRS],
extack);
}
static void vxlan_mdb_config_fini(struct vxlan_mdb_config *cfg)
{
vxlan_mdb_config_src_list_fini(cfg);
}
static struct vxlan_mdb_entry *
vxlan_mdb_entry_lookup(struct vxlan_dev *vxlan,
const struct vxlan_mdb_entry_key *group)
{
return rhashtable_lookup_fast(&vxlan->mdb_tbl, group,
vxlan_mdb_rht_params);
}
static struct vxlan_mdb_remote *
vxlan_mdb_remote_lookup(const struct vxlan_mdb_entry *mdb_entry,
const union vxlan_addr *addr)
{
struct vxlan_mdb_remote *remote;
list_for_each_entry(remote, &mdb_entry->remotes, list) {
struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
if (vxlan_addr_equal(addr, &rd->remote_ip))
return remote;
}
return NULL;
}
static void vxlan_mdb_rdst_free(struct rcu_head *head)
{
struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
dst_cache_destroy(&rd->dst_cache);
kfree(rd);
}
static int vxlan_mdb_remote_rdst_init(const struct vxlan_mdb_config *cfg,
struct vxlan_mdb_remote *remote)
{
struct vxlan_rdst *rd;
int err;
rd = kzalloc(sizeof(*rd), GFP_KERNEL);
if (!rd)
return -ENOMEM;
err = dst_cache_init(&rd->dst_cache, GFP_KERNEL);
if (err)
goto err_free_rdst;
rd->remote_ip = cfg->remote_ip;
rd->remote_port = cfg->remote_port;
rd->remote_vni = cfg->remote_vni;
rd->remote_ifindex = cfg->remote_ifindex;
rcu_assign_pointer(remote->rd, rd);
return 0;
err_free_rdst:
kfree(rd);
return err;
}
static void vxlan_mdb_remote_rdst_fini(struct vxlan_rdst *rd)
{
call_rcu(&rd->rcu, vxlan_mdb_rdst_free);
}
static int vxlan_mdb_remote_init(const struct vxlan_mdb_config *cfg,
struct vxlan_mdb_remote *remote)
{
int err;
err = vxlan_mdb_remote_rdst_init(cfg, remote);
if (err)
return err;
remote->flags = cfg->flags;
remote->filter_mode = cfg->filter_mode;
remote->rt_protocol = cfg->rt_protocol;
INIT_HLIST_HEAD(&remote->src_list);
return 0;
}
static void vxlan_mdb_remote_fini(struct vxlan_dev *vxlan,
struct vxlan_mdb_remote *remote)
{
WARN_ON_ONCE(!hlist_empty(&remote->src_list));
vxlan_mdb_remote_rdst_fini(rtnl_dereference(remote->rd));
}
static struct vxlan_mdb_src_entry *
vxlan_mdb_remote_src_entry_lookup(const struct vxlan_mdb_remote *remote,
const union vxlan_addr *addr)
{
struct vxlan_mdb_src_entry *ent;
hlist_for_each_entry(ent, &remote->src_list, node) {
if (vxlan_addr_equal(&ent->addr, addr))
return ent;
}
return NULL;
}
static struct vxlan_mdb_src_entry *
vxlan_mdb_remote_src_entry_add(struct vxlan_mdb_remote *remote,
const union vxlan_addr *addr)
{
struct vxlan_mdb_src_entry *ent;
ent = kzalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
return NULL;
ent->addr = *addr;
hlist_add_head(&ent->node, &remote->src_list);
return ent;
}
static void
vxlan_mdb_remote_src_entry_del(struct vxlan_mdb_src_entry *ent)
{
hlist_del(&ent->node);
kfree(ent);
}
static int
vxlan_mdb_remote_src_fwd_add(const struct vxlan_mdb_config *cfg,
const union vxlan_addr *addr,
struct netlink_ext_ack *extack)
{
struct vxlan_mdb_config sg_cfg;
memset(&sg_cfg, 0, sizeof(sg_cfg));
sg_cfg.vxlan = cfg->vxlan;
sg_cfg.group.src = *addr;
sg_cfg.group.dst = cfg->group.dst;
sg_cfg.group.vni = cfg->group.vni;
INIT_LIST_HEAD(&sg_cfg.src_list);
sg_cfg.remote_ip = cfg->remote_ip;
sg_cfg.remote_ifindex = cfg->remote_ifindex;
sg_cfg.remote_vni = cfg->remote_vni;
sg_cfg.remote_port = cfg->remote_port;
sg_cfg.nlflags = cfg->nlflags;
sg_cfg.filter_mode = MCAST_INCLUDE;
if (cfg->filter_mode == MCAST_EXCLUDE)
sg_cfg.flags = VXLAN_MDB_REMOTE_F_BLOCKED;
sg_cfg.rt_protocol = cfg->rt_protocol;
return __vxlan_mdb_add(&sg_cfg, extack);
}
static void
vxlan_mdb_remote_src_fwd_del(struct vxlan_dev *vxlan,
const struct vxlan_mdb_entry_key *group,
const struct vxlan_mdb_remote *remote,
const union vxlan_addr *addr)
{
struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
struct vxlan_mdb_config sg_cfg;
memset(&sg_cfg, 0, sizeof(sg_cfg));
sg_cfg.vxlan = vxlan;
sg_cfg.group.src = *addr;
sg_cfg.group.dst = group->dst;
sg_cfg.group.vni = group->vni;
INIT_LIST_HEAD(&sg_cfg.src_list);
sg_cfg.remote_ip = rd->remote_ip;
__vxlan_mdb_del(&sg_cfg, NULL);
}
static int
vxlan_mdb_remote_src_add(const struct vxlan_mdb_config *cfg,
struct vxlan_mdb_remote *remote,
const struct vxlan_mdb_config_src_entry *src,
struct netlink_ext_ack *extack)
{
struct vxlan_mdb_src_entry *ent;
int err;
ent = vxlan_mdb_remote_src_entry_lookup(remote, &src->addr);
if (!ent) {
ent = vxlan_mdb_remote_src_entry_add(remote, &src->addr);
if (!ent)
return -ENOMEM;
} else if (!(cfg->nlflags & NLM_F_REPLACE)) {
NL_SET_ERR_MSG_MOD(extack, "Source entry already exists");
return -EEXIST;
}
err = vxlan_mdb_remote_src_fwd_add(cfg, &ent->addr, extack);
if (err)
goto err_src_del;
/* Clear flags in case source entry was marked for deletion as part of
* replace flow.
*/
ent->flags = 0;
return 0;
err_src_del:
vxlan_mdb_remote_src_entry_del(ent);
return err;
}
static void vxlan_mdb_remote_src_del(struct vxlan_dev *vxlan,
const struct vxlan_mdb_entry_key *group,
const struct vxlan_mdb_remote *remote,
struct vxlan_mdb_src_entry *ent)
{
vxlan_mdb_remote_src_fwd_del(vxlan, group, remote, &ent->addr);
vxlan_mdb_remote_src_entry_del(ent);
}
static int vxlan_mdb_remote_srcs_add(const struct vxlan_mdb_config *cfg,
struct vxlan_mdb_remote *remote,
struct netlink_ext_ack *extack)
{
struct vxlan_mdb_config_src_entry *src;
struct vxlan_mdb_src_entry *ent;
struct hlist_node *tmp;
int err;
list_for_each_entry(src, &cfg->src_list, node) {
err = vxlan_mdb_remote_src_add(cfg, remote, src, extack);
if (err)
goto err_src_del;
}
return 0;
err_src_del:
hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node)
vxlan_mdb_remote_src_del(cfg->vxlan, &cfg->group, remote, ent);
return err;
}
static void vxlan_mdb_remote_srcs_del(struct vxlan_dev *vxlan,
const struct vxlan_mdb_entry_key *group,
struct vxlan_mdb_remote *remote)
{
struct vxlan_mdb_src_entry *ent;
struct hlist_node *tmp;
hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node)
vxlan_mdb_remote_src_del(vxlan, group, remote, ent);
}
static size_t
vxlan_mdb_nlmsg_src_list_size(const struct vxlan_mdb_entry_key *group,
const struct vxlan_mdb_remote *remote)
{
struct vxlan_mdb_src_entry *ent;
size_t nlmsg_size;
if (hlist_empty(&remote->src_list))
return 0;
/* MDBA_MDB_EATTR_SRC_LIST */
nlmsg_size = nla_total_size(0);
hlist_for_each_entry(ent, &remote->src_list, node) {
/* MDBA_MDB_SRCLIST_ENTRY */
nlmsg_size += nla_total_size(0) +
/* MDBA_MDB_SRCATTR_ADDRESS */
nla_total_size(vxlan_addr_size(&group->dst)) +
/* MDBA_MDB_SRCATTR_TIMER */
nla_total_size(sizeof(u8));
}
return nlmsg_size;
}
static size_t vxlan_mdb_nlmsg_size(const struct vxlan_dev *vxlan,
const struct vxlan_mdb_entry *mdb_entry,
const struct vxlan_mdb_remote *remote)
{
const struct vxlan_mdb_entry_key *group = &mdb_entry->key;
struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
size_t nlmsg_size;
nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) +
/* MDBA_MDB */
nla_total_size(0) +
/* MDBA_MDB_ENTRY */
nla_total_size(0) +
/* MDBA_MDB_ENTRY_INFO */
nla_total_size(sizeof(struct br_mdb_entry)) +
/* MDBA_MDB_EATTR_TIMER */
nla_total_size(sizeof(u32));
/* MDBA_MDB_EATTR_SOURCE */
if (vxlan_mdb_is_sg(group))
nlmsg_size += nla_total_size(vxlan_addr_size(&group->dst));
/* MDBA_MDB_EATTR_RTPROT */
nlmsg_size += nla_total_size(sizeof(u8));
/* MDBA_MDB_EATTR_SRC_LIST */
nlmsg_size += vxlan_mdb_nlmsg_src_list_size(group, remote);
/* MDBA_MDB_EATTR_GROUP_MODE */
nlmsg_size += nla_total_size(sizeof(u8));
/* MDBA_MDB_EATTR_DST */
nlmsg_size += nla_total_size(vxlan_addr_size(&rd->remote_ip));
/* MDBA_MDB_EATTR_DST_PORT */
if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port)
nlmsg_size += nla_total_size(sizeof(u16));
/* MDBA_MDB_EATTR_VNI */
if (rd->remote_vni != vxlan->default_dst.remote_vni)
nlmsg_size += nla_total_size(sizeof(u32));
/* MDBA_MDB_EATTR_IFINDEX */
if (rd->remote_ifindex)
nlmsg_size += nla_total_size(sizeof(u32));
/* MDBA_MDB_EATTR_SRC_VNI */
if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && group->vni)
nlmsg_size += nla_total_size(sizeof(u32));
return nlmsg_size;
}
static int vxlan_mdb_nlmsg_fill(const struct vxlan_dev *vxlan,
struct sk_buff *skb,
const struct vxlan_mdb_entry *mdb_entry,
const struct vxlan_mdb_remote *remote,
int type)
{
struct nlattr *mdb_nest, *mdb_entry_nest;
struct br_port_msg *bpm;
struct nlmsghdr *nlh;
nlh = nlmsg_put(skb, 0, 0, type, sizeof(*bpm), 0);
if (!nlh)
return -EMSGSIZE;
bpm = nlmsg_data(nlh);
memset(bpm, 0, sizeof(*bpm));
bpm->family = AF_BRIDGE;
bpm->ifindex = vxlan->dev->ifindex;
mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB);
if (!mdb_nest)
goto cancel;
mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY);
if (!mdb_entry_nest)
goto cancel;
if (vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote))
goto cancel;
nla_nest_end(skb, mdb_entry_nest);
nla_nest_end(skb, mdb_nest);
nlmsg_end(skb, nlh);
return 0;
cancel:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
static void vxlan_mdb_remote_notify(const struct vxlan_dev *vxlan,
const struct vxlan_mdb_entry *mdb_entry,
const struct vxlan_mdb_remote *remote,
int type)
{
struct net *net = dev_net(vxlan->dev);
struct sk_buff *skb;
int err = -ENOBUFS;
skb = nlmsg_new(vxlan_mdb_nlmsg_size(vxlan, mdb_entry, remote),
GFP_KERNEL);
if (!skb)
goto errout;
err = vxlan_mdb_nlmsg_fill(vxlan, skb, mdb_entry, remote, type);
if (err) {
kfree_skb(skb);
goto errout;
}
rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_KERNEL);
return;
errout:
rtnl_set_sk_err(net, RTNLGRP_MDB, err);
}
static int
vxlan_mdb_remote_srcs_replace(const struct vxlan_mdb_config *cfg,
const struct vxlan_mdb_entry *mdb_entry,
struct vxlan_mdb_remote *remote,
struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = cfg->vxlan;
struct vxlan_mdb_src_entry *ent;
struct hlist_node *tmp;
int err;
hlist_for_each_entry(ent, &remote->src_list, node)
ent->flags |= VXLAN_SGRP_F_DELETE;
err = vxlan_mdb_remote_srcs_add(cfg, remote, extack);
if (err)
goto err_clear_delete;
hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node) {
if (ent->flags & VXLAN_SGRP_F_DELETE)
vxlan_mdb_remote_src_del(vxlan, &mdb_entry->key, remote,
ent);
}
return 0;
err_clear_delete:
hlist_for_each_entry(ent, &remote->src_list, node)
ent->flags &= ~VXLAN_SGRP_F_DELETE;
return err;
}
static int vxlan_mdb_remote_replace(const struct vxlan_mdb_config *cfg,
const struct vxlan_mdb_entry *mdb_entry,
struct vxlan_mdb_remote *remote,
struct netlink_ext_ack *extack)
{
struct vxlan_rdst *new_rd, *old_rd = rtnl_dereference(remote->rd);
struct vxlan_dev *vxlan = cfg->vxlan;
int err;
err = vxlan_mdb_remote_rdst_init(cfg, remote);
if (err)
return err;
new_rd = rtnl_dereference(remote->rd);
err = vxlan_mdb_remote_srcs_replace(cfg, mdb_entry, remote, extack);
if (err)
goto err_rdst_reset;
WRITE_ONCE(remote->flags, cfg->flags);
WRITE_ONCE(remote->filter_mode, cfg->filter_mode);
remote->rt_protocol = cfg->rt_protocol;
vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_NEWMDB);
vxlan_mdb_remote_rdst_fini(old_rd);
return 0;
err_rdst_reset:
rcu_assign_pointer(remote->rd, old_rd);
vxlan_mdb_remote_rdst_fini(new_rd);
return err;
}
static int vxlan_mdb_remote_add(const struct vxlan_mdb_config *cfg,
struct vxlan_mdb_entry *mdb_entry,
struct netlink_ext_ack *extack)
{
struct vxlan_mdb_remote *remote;
int err;
remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip);
if (remote) {
if (!(cfg->nlflags & NLM_F_REPLACE)) {
NL_SET_ERR_MSG_MOD(extack, "Replace not specified and MDB remote entry already exists");
return -EEXIST;
}
return vxlan_mdb_remote_replace(cfg, mdb_entry, remote, extack);
}
if (!(cfg->nlflags & NLM_F_CREATE)) {
NL_SET_ERR_MSG_MOD(extack, "Create not specified and entry does not exist");
return -ENOENT;
}
remote = kzalloc(sizeof(*remote), GFP_KERNEL);
if (!remote)
return -ENOMEM;
err = vxlan_mdb_remote_init(cfg, remote);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to initialize remote MDB entry");
goto err_free_remote;
}
err = vxlan_mdb_remote_srcs_add(cfg, remote, extack);
if (err)
goto err_remote_fini;
list_add_rcu(&remote->list, &mdb_entry->remotes);
vxlan_mdb_remote_notify(cfg->vxlan, mdb_entry, remote, RTM_NEWMDB);
return 0;
err_remote_fini:
vxlan_mdb_remote_fini(cfg->vxlan, remote);
err_free_remote:
kfree(remote);
return err;
}
static void vxlan_mdb_remote_del(struct vxlan_dev *vxlan,
struct vxlan_mdb_entry *mdb_entry,
struct vxlan_mdb_remote *remote)
{
vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_DELMDB);
list_del_rcu(&remote->list);
vxlan_mdb_remote_srcs_del(vxlan, &mdb_entry->key, remote);
vxlan_mdb_remote_fini(vxlan, remote);
kfree_rcu(remote, rcu);
}
static struct vxlan_mdb_entry *
vxlan_mdb_entry_get(struct vxlan_dev *vxlan,
const struct vxlan_mdb_entry_key *group)
{
struct vxlan_mdb_entry *mdb_entry;
int err;
mdb_entry = vxlan_mdb_entry_lookup(vxlan, group);
if (mdb_entry)
return mdb_entry;
mdb_entry = kzalloc(sizeof(*mdb_entry), GFP_KERNEL);
if (!mdb_entry)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&mdb_entry->remotes);
memcpy(&mdb_entry->key, group, sizeof(mdb_entry->key));
hlist_add_head(&mdb_entry->mdb_node, &vxlan->mdb_list);
err = rhashtable_lookup_insert_fast(&vxlan->mdb_tbl,
&mdb_entry->rhnode,
vxlan_mdb_rht_params);
if (err)
goto err_free_entry;
if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list))
vxlan->cfg.flags |= VXLAN_F_MDB;
return mdb_entry;
err_free_entry:
hlist_del(&mdb_entry->mdb_node);
kfree(mdb_entry);
return ERR_PTR(err);
}
static void vxlan_mdb_entry_put(struct vxlan_dev *vxlan,
struct vxlan_mdb_entry *mdb_entry)
{
if (!list_empty(&mdb_entry->remotes))
return;
if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list))
vxlan->cfg.flags &= ~VXLAN_F_MDB;
rhashtable_remove_fast(&vxlan->mdb_tbl, &mdb_entry->rhnode,
vxlan_mdb_rht_params);
hlist_del(&mdb_entry->mdb_node);
kfree_rcu(mdb_entry, rcu);
}
static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg,
struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = cfg->vxlan;
struct vxlan_mdb_entry *mdb_entry;
int err;
mdb_entry = vxlan_mdb_entry_get(vxlan, &cfg->group);
if (IS_ERR(mdb_entry))
return PTR_ERR(mdb_entry);
err = vxlan_mdb_remote_add(cfg, mdb_entry, extack);
if (err)
goto err_entry_put;
vxlan->mdb_seq++;
return 0;
err_entry_put:
vxlan_mdb_entry_put(vxlan, mdb_entry);
return err;
}
static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg,
struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = cfg->vxlan;
struct vxlan_mdb_entry *mdb_entry;
struct vxlan_mdb_remote *remote;
mdb_entry = vxlan_mdb_entry_lookup(vxlan, &cfg->group);
if (!mdb_entry) {
NL_SET_ERR_MSG_MOD(extack, "Did not find MDB entry");
return -ENOENT;
}
remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip);
if (!remote) {
NL_SET_ERR_MSG_MOD(extack, "Did not find MDB remote entry");
return -ENOENT;
}
vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
vxlan_mdb_entry_put(vxlan, mdb_entry);
vxlan->mdb_seq++;
return 0;
}
int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
struct netlink_ext_ack *extack)
{
struct vxlan_mdb_config cfg;
int err;
ASSERT_RTNL();
err = vxlan_mdb_config_init(&cfg, dev, tb, nlmsg_flags, extack);
if (err)
return err;
err = __vxlan_mdb_add(&cfg, extack);
vxlan_mdb_config_fini(&cfg);
return err;
}
int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct vxlan_mdb_config cfg;
int err;
ASSERT_RTNL();
err = vxlan_mdb_config_init(&cfg, dev, tb, 0, extack);
if (err)
return err;
err = __vxlan_mdb_del(&cfg, extack);
vxlan_mdb_config_fini(&cfg);
return err;
}
struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan,
struct sk_buff *skb,
__be32 src_vni)
{
struct vxlan_mdb_entry *mdb_entry;
struct vxlan_mdb_entry_key group;
if (!is_multicast_ether_addr(eth_hdr(skb)->h_dest) ||
is_broadcast_ether_addr(eth_hdr(skb)->h_dest))
return NULL;
/* When not in collect metadata mode, 'src_vni' is zero, but MDB
* entries are stored with the VNI of the VXLAN device.
*/
if (!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA))
src_vni = vxlan->default_dst.remote_vni;
memset(&group, 0, sizeof(group));
group.vni = src_vni;
switch (skb->protocol) {
case htons(ETH_P_IP):
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
return NULL;
group.dst.sa.sa_family = AF_INET;
group.dst.sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
group.src.sa.sa_family = AF_INET;
group.src.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
return NULL;
group.dst.sa.sa_family = AF_INET6;
group.dst.sin6.sin6_addr = ipv6_hdr(skb)->daddr;
group.src.sa.sa_family = AF_INET6;
group.src.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
break;
#endif
default:
return NULL;
}
mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group);
if (mdb_entry)
return mdb_entry;
memset(&group.src, 0, sizeof(group.src));
mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group);
if (mdb_entry)
return mdb_entry;
/* No (S, G) or (*, G) found. Look up the all-zeros entry, but only if
* the destination IP address is not link-local multicast since we want
* to transmit such traffic together with broadcast and unknown unicast
* traffic.
*/
switch (skb->protocol) {
case htons(ETH_P_IP):
if (ipv4_is_local_multicast(group.dst.sin.sin_addr.s_addr))
return NULL;
group.dst.sin.sin_addr.s_addr = 0;
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
if (ipv6_addr_type(&group.dst.sin6.sin6_addr) &
IPV6_ADDR_LINKLOCAL)
return NULL;
memset(&group.dst.sin6.sin6_addr, 0,
sizeof(group.dst.sin6.sin6_addr));
break;
#endif
default:
return NULL;
}
return vxlan_mdb_entry_lookup(vxlan, &group);
}
netdev_tx_t vxlan_mdb_xmit(struct vxlan_dev *vxlan,
const struct vxlan_mdb_entry *mdb_entry,
struct sk_buff *skb)
{
struct vxlan_mdb_remote *remote, *fremote = NULL;
__be32 src_vni = mdb_entry->key.vni;
list_for_each_entry_rcu(remote, &mdb_entry->remotes, list) {
struct sk_buff *skb1;
if ((vxlan_mdb_is_star_g(&mdb_entry->key) &&
READ_ONCE(remote->filter_mode) == MCAST_INCLUDE) ||
(READ_ONCE(remote->flags) & VXLAN_MDB_REMOTE_F_BLOCKED))
continue;
if (!fremote) {
fremote = remote;
continue;
}
skb1 = skb_clone(skb, GFP_ATOMIC);
if (skb1)
vxlan_xmit_one(skb1, vxlan->dev, src_vni,
rcu_dereference(remote->rd), false);
}
if (fremote)
vxlan_xmit_one(skb, vxlan->dev, src_vni,
rcu_dereference(fremote->rd), false);
else
kfree_skb(skb);
return NETDEV_TX_OK;
}
static void vxlan_mdb_check_empty(void *ptr, void *arg)
{
WARN_ON_ONCE(1);
}
static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan,
struct vxlan_mdb_entry *mdb_entry)
{
struct vxlan_mdb_remote *remote, *tmp;
list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list)
vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
}
static void vxlan_mdb_entries_flush(struct vxlan_dev *vxlan)
{
struct vxlan_mdb_entry *mdb_entry;
struct hlist_node *tmp;
/* The removal of an entry cannot trigger the removal of another entry
* since entries are always added to the head of the list.
*/
hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) {
vxlan_mdb_remotes_flush(vxlan, mdb_entry);
vxlan_mdb_entry_put(vxlan, mdb_entry);
}
}
int vxlan_mdb_init(struct vxlan_dev *vxlan)
{
int err;
err = rhashtable_init(&vxlan->mdb_tbl, &vxlan_mdb_rht_params);
if (err)
return err;
INIT_HLIST_HEAD(&vxlan->mdb_list);
return 0;
}
void vxlan_mdb_fini(struct vxlan_dev *vxlan)
{
vxlan_mdb_entries_flush(vxlan);
WARN_ON_ONCE(vxlan->cfg.flags & VXLAN_F_MDB);
rhashtable_free_and_destroy(&vxlan->mdb_tbl, vxlan_mdb_check_empty,
NULL);
}