linux-stable/include/linux/if_bridge.h
Vladimir Oltean 4f2673b3a2 net: bridge: add helper to replay port and host-joined mdb entries
I have a system with DSA ports, and udhcpcd is configured to bring
interfaces up as soon as they are created.

I create a bridge as follows:

ip link add br0 type bridge

As soon as I create the bridge and udhcpcd brings it up, I also have
avahi which automatically starts sending IPv6 packets to advertise some
local services, and because of that, the br0 bridge joins the following
IPv6 groups due to the code path detailed below:

33:33:ff:6d:c1:9c vid 0
33:33:00:00:00:6a vid 0
33:33:00:00:00:fb vid 0

br_dev_xmit
-> br_multicast_rcv
   -> br_ip6_multicast_add_group
      -> __br_multicast_add_group
         -> br_multicast_host_join
            -> br_mdb_notify

This is all fine, but inside br_mdb_notify we have br_mdb_switchdev_host
hooked up, and switchdev will attempt to offload the host joined groups
to an empty list of ports. Of course nobody offloads them.

Then when we add a port to br0:

ip link set swp0 master br0

the bridge doesn't replay the host-joined MDB entries from br_add_if,
and eventually the host joined addresses expire, and a switchdev
notification for deleting it is emitted, but surprise, the original
addition was already completely missed.

The strategy to address this problem is to replay the MDB entries (both
the port ones and the host joined ones) when the new port joins the
bridge, similar to what vxlan_fdb_replay does (in that case, its FDB can
be populated and only then attached to a bridge that you offload).
However there are 2 possibilities: the addresses can be 'pushed' by the
bridge into the port, or the port can 'pull' them from the bridge.

Considering that in the general case, the new port can be really late to
the party, and there may have been many other switchdev ports that
already received the initial notification, we would like to avoid
delivering duplicate events to them, since they might misbehave. And
currently, the bridge calls the entire switchdev notifier chain, whereas
for replaying it should just call the notifier block of the new guy.
But the bridge doesn't know what is the new guy's notifier block, it
just knows where the switchdev notifier chain is. So for simplification,
we make this a driver-initiated pull for now, and the notifier block is
passed as an argument.

To emulate the calling context for mdb objects (deferred and put on the
blocking notifier chain), we must iterate under RCU protection through
the bridge's mdb entries, queue them, and only call them once we're out
of the RCU read-side critical section.

There was some opportunity for reuse between br_mdb_switchdev_host_port,
br_mdb_notify and the newly added br_mdb_queue_one in how the switchdev
mdb object is created, so a helper was created.

Suggested-by: Ido Schimmel <idosch@idosch.org>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-03-23 14:49:05 -07:00

180 lines
4.5 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Linux ethernet bridge
*
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*/
#ifndef _LINUX_IF_BRIDGE_H
#define _LINUX_IF_BRIDGE_H
#include <linux/netdevice.h>
#include <uapi/linux/if_bridge.h>
#include <linux/bitops.h>
struct br_ip {
union {
__be32 ip4;
#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr ip6;
#endif
} src;
union {
__be32 ip4;
#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr ip6;
#endif
unsigned char mac_addr[ETH_ALEN];
} dst;
__be16 proto;
__u16 vid;
};
struct br_ip_list {
struct list_head list;
struct br_ip addr;
};
#define BR_HAIRPIN_MODE BIT(0)
#define BR_BPDU_GUARD BIT(1)
#define BR_ROOT_BLOCK BIT(2)
#define BR_MULTICAST_FAST_LEAVE BIT(3)
#define BR_ADMIN_COST BIT(4)
#define BR_LEARNING BIT(5)
#define BR_FLOOD BIT(6)
#define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING)
#define BR_PROMISC BIT(7)
#define BR_PROXYARP BIT(8)
#define BR_LEARNING_SYNC BIT(9)
#define BR_PROXYARP_WIFI BIT(10)
#define BR_MCAST_FLOOD BIT(11)
#define BR_MULTICAST_TO_UNICAST BIT(12)
#define BR_VLAN_TUNNEL BIT(13)
#define BR_BCAST_FLOOD BIT(14)
#define BR_NEIGH_SUPPRESS BIT(15)
#define BR_ISOLATED BIT(16)
#define BR_MRP_AWARE BIT(17)
#define BR_MRP_LOST_CONT BIT(18)
#define BR_MRP_LOST_IN_CONT BIT(19)
#define BR_DEFAULT_AGEING_TIME (300 * HZ)
extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
int br_multicast_list_adjacent(struct net_device *dev,
struct list_head *br_ip_list);
bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto);
bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
bool br_multicast_enabled(const struct net_device *dev);
bool br_multicast_router(const struct net_device *dev);
int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
struct notifier_block *nb, struct netlink_ext_ack *extack);
#else
static inline int br_multicast_list_adjacent(struct net_device *dev,
struct list_head *br_ip_list)
{
return 0;
}
static inline bool br_multicast_has_querier_anywhere(struct net_device *dev,
int proto)
{
return false;
}
static inline bool br_multicast_has_querier_adjacent(struct net_device *dev,
int proto)
{
return false;
}
static inline bool br_multicast_enabled(const struct net_device *dev)
{
return false;
}
static inline bool br_multicast_router(const struct net_device *dev)
{
return false;
}
static inline int br_mdb_replay(struct net_device *br_dev,
struct net_device *dev,
struct notifier_block *nb,
struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
#endif
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
bool br_vlan_enabled(const struct net_device *dev);
int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid);
int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid);
int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto);
int br_vlan_get_info(const struct net_device *dev, u16 vid,
struct bridge_vlan_info *p_vinfo);
#else
static inline bool br_vlan_enabled(const struct net_device *dev)
{
return false;
}
static inline int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
{
return -EINVAL;
}
static inline int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto)
{
return -EINVAL;
}
static inline int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
{
return -EINVAL;
}
static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
struct bridge_vlan_info *p_vinfo)
{
return -EINVAL;
}
#endif
#if IS_ENABLED(CONFIG_BRIDGE)
struct net_device *br_fdb_find_port(const struct net_device *br_dev,
const unsigned char *addr,
__u16 vid);
void br_fdb_clear_offload(const struct net_device *dev, u16 vid);
bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
u8 br_port_get_stp_state(const struct net_device *dev);
clock_t br_get_ageing_time(struct net_device *br_dev);
#else
static inline struct net_device *
br_fdb_find_port(const struct net_device *br_dev,
const unsigned char *addr,
__u16 vid)
{
return NULL;
}
static inline void br_fdb_clear_offload(const struct net_device *dev, u16 vid)
{
}
static inline bool
br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
{
return false;
}
static inline u8 br_port_get_stp_state(const struct net_device *dev)
{
return BR_STATE_DISABLED;
}
static inline clock_t br_get_ageing_time(struct net_device *br_dev)
{
return 0;
}
#endif
#endif