Merge branch 'rtnetlink-reduce-rtnl-pressure'

Eric Dumazet says:

====================
rtnetlink: reduce RTNL pressure for dumps

This series restarts the conversion of rtnl dump operations
to RCU protection, instead of requiring RTNL.

In this new attempt (prior one failed in 2011), I chose to
allow a gradual conversion of selected operations.

After this series, "ip -6 addr" and "ip -4 ro" no longer
need to acquire RTNL.

I refrained from changing inet_dump_ifaddr() and inet6_dump_addr()
to avoid merge conflicts because of two fixes in net tree.

I also started the work for "ip link" future conversion.

v2: rtnl_fill_link_ifmap() always emit IFLA_MAP (Jiri Pirko)
    Added "nexthop: allow nexthop_mpath_fill_node()
           to be called without RTNL" to avoid a lockdep splat (Ido Schimmel)
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2024-02-26 11:46:13 +00:00
commit 5fc3903c46
32 changed files with 238 additions and 198 deletions

View file

@ -1272,10 +1272,10 @@ static int ipoib_get_iflink(const struct net_device *dev)
/* parent interface */
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
return dev->ifindex;
return READ_ONCE(dev->ifindex);
/* child/vlan interface */
return priv->parent->ifindex;
return READ_ONCE(priv->parent->ifindex);
}
static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)

View file

@ -119,7 +119,7 @@ static int vxcan_get_iflink(const struct net_device *dev)
rcu_read_lock();
peer = rcu_dereference(priv->peer);
iflink = peer ? peer->ifindex : 0;
iflink = peer ? READ_ONCE(peer->ifindex) : 0;
rcu_read_unlock();
return iflink;

View file

@ -98,7 +98,7 @@ static int rmnet_vnd_get_iflink(const struct net_device *dev)
{
struct rmnet_priv *priv = netdev_priv(dev);
return priv->real_dev->ifindex;
return READ_ONCE(priv->real_dev->ifindex);
}
static int rmnet_vnd_init(struct net_device *dev)

View file

@ -349,7 +349,7 @@ static int ipvlan_get_iflink(const struct net_device *dev)
{
struct ipvl_dev *ipvlan = netdev_priv(dev);
return ipvlan->phy_dev->ifindex;
return READ_ONCE(ipvlan->phy_dev->ifindex);
}
static const struct net_device_ops ipvlan_netdev_ops = {

View file

@ -3753,7 +3753,7 @@ static void macsec_get_stats64(struct net_device *dev,
static int macsec_get_iflink(const struct net_device *dev)
{
return macsec_priv(dev)->real_dev->ifindex;
return READ_ONCE(macsec_priv(dev)->real_dev->ifindex);
}
static const struct net_device_ops macsec_netdev_ops = {

View file

@ -1158,7 +1158,7 @@ static int macvlan_dev_get_iflink(const struct net_device *dev)
{
struct macvlan_dev *vlan = netdev_priv(dev);
return vlan->lowerdev->ifindex;
return READ_ONCE(vlan->lowerdev->ifindex);
}
static const struct ethtool_ops macvlan_ethtool_ops = {

View file

@ -145,7 +145,7 @@ static int netkit_get_iflink(const struct net_device *dev)
rcu_read_lock();
peer = rcu_dereference(nk->peer);
if (peer)
iflink = peer->ifindex;
iflink = READ_ONCE(peer->ifindex);
rcu_read_unlock();
return iflink;
}

View file

@ -1461,7 +1461,7 @@ static int veth_get_iflink(const struct net_device *dev)
rcu_read_lock();
peer = rcu_dereference(priv->peer);
iflink = peer ? peer->ifindex : 0;
iflink = peer ? READ_ONCE(peer->ifindex) : 0;
rcu_read_unlock();
return iflink;

View file

@ -453,7 +453,7 @@ static int virt_wifi_net_device_get_iflink(const struct net_device *dev)
{
struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
return priv->lowerdev->ifindex;
return READ_ONCE(priv->lowerdev->ifindex);
}
static const struct net_device_ops virt_wifi_ops = {

View file

@ -4354,8 +4354,10 @@ static inline bool netif_testing(const struct net_device *dev)
*/
static inline bool netif_oper_up(const struct net_device *dev)
{
return (dev->operstate == IF_OPER_UP ||
dev->operstate == IF_OPER_UNKNOWN /* backward compat */);
unsigned int operstate = READ_ONCE(dev->operstate);
return operstate == IF_OPER_UP ||
operstate == IF_OPER_UNKNOWN /* backward compat */;
}
/**

View file

@ -291,6 +291,7 @@ struct netlink_callback {
u16 answer_flags;
u32 min_dump_alloc;
unsigned int prev_seq, seq;
int flags;
bool strict_check;
union {
u8 ctx[48];
@ -323,6 +324,7 @@ struct netlink_dump_control {
void *data;
struct module *module;
u32 min_dump_alloc;
int flags;
};
int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,

View file

@ -264,6 +264,7 @@ struct fib_dump_filter {
bool filter_set;
bool dump_routes;
bool dump_exceptions;
bool rtnl_held;
unsigned char protocol;
unsigned char rt_type;
unsigned int flags;

View file

@ -317,7 +317,7 @@ static inline
int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
u8 rt_family)
{
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
int i;
for (i = 0; i < nhg->num_nh; i++) {

View file

@ -12,6 +12,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
enum rtnl_link_flags {
RTNL_FLAG_DOIT_UNLOCKED = BIT(0),
RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1),
RTNL_FLAG_DUMP_UNLOCKED = BIT(2),
};
enum rtnl_kinds {

View file

@ -762,9 +762,9 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
static int vlan_dev_get_iflink(const struct net_device *dev)
{
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
const struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
return real_dev->ifindex;
return READ_ONCE(real_dev->ifindex);
}
static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,

View file

@ -641,7 +641,7 @@ int dev_get_iflink(const struct net_device *dev)
if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
return dev->netdev_ops->ndo_get_iflink(dev);
return dev->ifindex;
return READ_ONCE(dev->ifindex);
}
EXPORT_SYMBOL(dev_get_iflink);
@ -8632,12 +8632,12 @@ unsigned int dev_get_flags(const struct net_device *dev)
{
unsigned int flags;
flags = (dev->flags & ~(IFF_PROMISC |
flags = (READ_ONCE(dev->flags) & ~(IFF_PROMISC |
IFF_ALLMULTI |
IFF_RUNNING |
IFF_LOWER_UP |
IFF_DORMANT)) |
(dev->gflags & (IFF_PROMISC |
(READ_ONCE(dev->gflags) & (IFF_PROMISC |
IFF_ALLMULTI));
if (netif_running(dev)) {

View file

@ -1455,17 +1455,18 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
return 0;
}
static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
static int rtnl_fill_link_ifmap(struct sk_buff *skb,
const struct net_device *dev)
{
struct rtnl_link_ifmap map;
memset(&map, 0, sizeof(map));
map.mem_start = dev->mem_start;
map.mem_end = dev->mem_end;
map.base_addr = dev->base_addr;
map.irq = dev->irq;
map.dma = dev->dma;
map.port = dev->if_port;
map.mem_start = READ_ONCE(dev->mem_start);
map.mem_end = READ_ONCE(dev->mem_end);
map.base_addr = READ_ONCE(dev->base_addr);
map.irq = READ_ONCE(dev->irq);
map.dma = READ_ONCE(dev->dma);
map.port = READ_ONCE(dev->if_port);
if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD))
return -EMSGSIZE;
@ -1611,10 +1612,10 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev,
bool force)
{
int ifindex = dev_get_iflink(dev);
int iflink = dev_get_iflink(dev);
if (force || dev->ifindex != ifindex)
return nla_put_u32(skb, IFLA_LINK, ifindex);
if (force || READ_ONCE(dev->ifindex) != iflink)
return nla_put_u32(skb, IFLA_LINK, iflink);
return 0;
}
@ -1698,7 +1699,7 @@ static int rtnl_fill_alt_ifnames(struct sk_buff *skb,
struct netdev_name_node *name_node;
int count = 0;
list_for_each_entry(name_node, &dev->name_node->list, list) {
list_for_each_entry_rcu(name_node, &dev->name_node->list, list) {
if (nla_put_string(skb, IFLA_ALT_IFNAME, name_node->name))
return -EMSGSIZE;
count++;
@ -1706,6 +1707,7 @@ static int rtnl_fill_alt_ifnames(struct sk_buff *skb,
return count;
}
/* RCU protected. */
static int rtnl_fill_prop_list(struct sk_buff *skb,
const struct net_device *dev)
{
@ -1875,9 +1877,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
}
if (rtnl_fill_link_ifmap(skb, dev))
goto nla_put_failure;
if (dev->addr_len) {
if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
nla_put(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast))
@ -1927,10 +1926,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
rcu_read_lock();
if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
goto nla_put_failure_rcu;
rcu_read_unlock();
if (rtnl_fill_link_ifmap(skb, dev))
goto nla_put_failure_rcu;
if (rtnl_fill_prop_list(skb, dev))
goto nla_put_failure;
goto nla_put_failure_rcu;
rcu_read_unlock();
if (dev->dev.parent &&
nla_put_string(skb, IFLA_PARENT_DEV_NAME,
@ -6532,6 +6532,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
}
owner = link->owner;
dumpit = link->dumpit;
flags = link->flags;
if (type == RTM_GETLINK - RTM_BASE)
min_dump_alloc = rtnl_calcit(skb, nlh);
@ -6549,6 +6550,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
.dump = dumpit,
.min_dump_alloc = min_dump_alloc,
.module = owner,
.flags = flags,
};
err = netlink_dump_start(rtnl, skb, nlh, &c);
/* netlink_dump_start() will keep a reference on

View file

@ -352,7 +352,7 @@ void dsa_user_mii_bus_init(struct dsa_switch *ds)
/* user device handling ****************************************************/
static int dsa_user_get_iflink(const struct net_device *dev)
{
return dsa_user_to_conduit(dev)->ifindex;
return READ_ONCE(dsa_user_to_conduit(dev)->ifindex);
}
static int dsa_user_open(struct net_device *dev)

View file

@ -93,7 +93,7 @@ static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n)
static int lowpan_get_iflink(const struct net_device *dev)
{
return lowpan_802154_dev(dev)->wdev->ifindex;
return READ_ONCE(lowpan_802154_dev(dev)->wdev->ifindex);
}
static const struct net_device_ops lowpan_netdev_ops = {

View file

@ -916,7 +916,8 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
struct rtmsg *rtm;
int err, i;
ASSERT_RTNL();
if (filter->rtnl_held)
ASSERT_RTNL();
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
@ -961,7 +962,10 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
break;
case RTA_OIF:
ifindex = nla_get_u32(tb[i]);
filter->dev = __dev_get_by_index(net, ifindex);
if (filter->rtnl_held)
filter->dev = __dev_get_by_index(net, ifindex);
else
filter->dev = dev_get_by_index_rcu(net, ifindex);
if (!filter->dev)
return -ENODEV;
break;
@ -983,20 +987,24 @@ EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req);
static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
struct fib_dump_filter filter = { .dump_routes = true,
.dump_exceptions = true };
struct fib_dump_filter filter = {
.dump_routes = true,
.dump_exceptions = true,
.rtnl_held = false,
};
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
unsigned int e = 0, s_e;
struct fib_table *tb;
struct hlist_head *head;
int dumped = 0, err;
int dumped = 0, err = 0;
rcu_read_lock();
if (cb->strict_check) {
err = ip_valid_fib_dump_req(net, nlh, &filter, cb);
if (err < 0)
return err;
goto unlock;
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(nlh);
@ -1005,29 +1013,28 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
/* ipv4 does not use prefix flag */
if (filter.flags & RTM_F_PREFIX)
return skb->len;
goto unlock;
if (filter.table_id) {
tb = fib_get_table(net, filter.table_id);
if (!tb) {
if (rtnl_msg_family(cb->nlh) != PF_INET)
return skb->len;
goto unlock;
NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
return -ENOENT;
err = -ENOENT;
goto unlock;
}
rcu_read_lock();
err = fib_table_dump(tb, skb, cb, &filter);
rcu_read_unlock();
return skb->len ? : err;
if (err < 0 && skb->len)
err = skb->len;
goto unlock;
}
s_h = cb->args[0];
s_e = cb->args[1];
rcu_read_lock();
err = 0;
for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv4.fib_table_hash[h];
@ -1040,9 +1047,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
err = fib_table_dump(tb, skb, cb, &filter);
if (err < 0) {
if (likely(skb->len))
goto out;
goto out_err;
err = skb->len;
goto out;
}
dumped = 1;
next:
@ -1050,13 +1056,12 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
}
}
out:
err = skb->len;
out_err:
rcu_read_unlock();
cb->args[1] = e;
cb->args[0] = h;
unlock:
rcu_read_unlock();
return err;
}
@ -1659,5 +1664,6 @@ void __init ip_fib_init(void)
rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0);
rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib,
RTNL_FLAG_DUMP_UNLOCKED);
}

View file

@ -2368,7 +2368,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
* and key == 0 means the dump has wrapped around and we are done.
*/
if (count && !key)
return skb->len;
return 0;
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
int err;
@ -2394,7 +2394,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
cb->args[3] = key;
cb->args[2] = count;
return skb->len;
return 0;
}
void __init fib_trie_init(void)

View file

@ -2587,7 +2587,9 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
struct fib_dump_filter filter = {};
struct fib_dump_filter filter = {
.rtnl_held = true,
};
int err;
if (cb->strict_check) {

View file

@ -3492,7 +3492,8 @@ static void addrconf_dev_config(struct net_device *dev)
/* this device type has no EUI support */
if (dev->type == ARPHRD_NONE &&
idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)
idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM;
WRITE_ONCE(idev->cnf.addr_gen_mode,
IN6_ADDR_GEN_MODE_RANDOM);
addrconf_addr_gen(idev, false);
}
@ -3764,7 +3765,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
rt6_mtu_change(dev, dev->mtu);
idev->cnf.mtu6 = dev->mtu;
}
idev->tstamp = jiffies;
WRITE_ONCE(idev->tstamp, jiffies);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
/*
@ -4006,7 +4007,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
ipv6_mc_down(idev);
}
idev->tstamp = jiffies;
WRITE_ONCE(idev->tstamp, jiffies);
idev->ra_mtu = 0;
/* Last: Shot the device (if unregistered) */
@ -5634,87 +5635,97 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
}
static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
__s32 *array, int bytes)
static void ipv6_store_devconf(const struct ipv6_devconf *cnf,
__s32 *array, int bytes)
{
BUG_ON(bytes < (DEVCONF_MAX * 4));
memset(array, 0, bytes);
array[DEVCONF_FORWARDING] = cnf->forwarding;
array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
array[DEVCONF_MTU6] = cnf->mtu6;
array[DEVCONF_ACCEPT_RA] = cnf->accept_ra;
array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects;
array[DEVCONF_AUTOCONF] = cnf->autoconf;
array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
array[DEVCONF_FORWARDING] = READ_ONCE(cnf->forwarding);
array[DEVCONF_HOPLIMIT] = READ_ONCE(cnf->hop_limit);
array[DEVCONF_MTU6] = READ_ONCE(cnf->mtu6);
array[DEVCONF_ACCEPT_RA] = READ_ONCE(cnf->accept_ra);
array[DEVCONF_ACCEPT_REDIRECTS] = READ_ONCE(cnf->accept_redirects);
array[DEVCONF_AUTOCONF] = READ_ONCE(cnf->autoconf);
array[DEVCONF_DAD_TRANSMITS] = READ_ONCE(cnf->dad_transmits);
array[DEVCONF_RTR_SOLICITS] = READ_ONCE(cnf->rtr_solicits);
array[DEVCONF_RTR_SOLICIT_INTERVAL] =
jiffies_to_msecs(cnf->rtr_solicit_interval);
jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_interval));
array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] =
jiffies_to_msecs(cnf->rtr_solicit_max_interval);
jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_max_interval));
array[DEVCONF_RTR_SOLICIT_DELAY] =
jiffies_to_msecs(cnf->rtr_solicit_delay);
array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_delay));
array[DEVCONF_FORCE_MLD_VERSION] = READ_ONCE(cnf->force_mld_version);
array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] =
jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
jiffies_to_msecs(READ_ONCE(cnf->mldv1_unsolicited_report_interval));
array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
array[DEVCONF_RA_DEFRTR_METRIC] = cnf->ra_defrtr_metric;
array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit;
array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
jiffies_to_msecs(READ_ONCE(cnf->mldv2_unsolicited_report_interval));
array[DEVCONF_USE_TEMPADDR] = READ_ONCE(cnf->use_tempaddr);
array[DEVCONF_TEMP_VALID_LFT] = READ_ONCE(cnf->temp_valid_lft);
array[DEVCONF_TEMP_PREFERED_LFT] = READ_ONCE(cnf->temp_prefered_lft);
array[DEVCONF_REGEN_MAX_RETRY] = READ_ONCE(cnf->regen_max_retry);
array[DEVCONF_MAX_DESYNC_FACTOR] = READ_ONCE(cnf->max_desync_factor);
array[DEVCONF_MAX_ADDRESSES] = READ_ONCE(cnf->max_addresses);
array[DEVCONF_ACCEPT_RA_DEFRTR] = READ_ONCE(cnf->accept_ra_defrtr);
array[DEVCONF_RA_DEFRTR_METRIC] = READ_ONCE(cnf->ra_defrtr_metric);
array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] =
READ_ONCE(cnf->accept_ra_min_hop_limit);
array[DEVCONF_ACCEPT_RA_PINFO] = READ_ONCE(cnf->accept_ra_pinfo);
#ifdef CONFIG_IPV6_ROUTER_PREF
array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
array[DEVCONF_ACCEPT_RA_RTR_PREF] = READ_ONCE(cnf->accept_ra_rtr_pref);
array[DEVCONF_RTR_PROBE_INTERVAL] =
jiffies_to_msecs(cnf->rtr_probe_interval);
jiffies_to_msecs(READ_ONCE(cnf->rtr_probe_interval));
#ifdef CONFIG_IPV6_ROUTE_INFO
array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen;
array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] =
READ_ONCE(cnf->accept_ra_rt_info_min_plen);
array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] =
READ_ONCE(cnf->accept_ra_rt_info_max_plen);
#endif
#endif
array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
array[DEVCONF_PROXY_NDP] = READ_ONCE(cnf->proxy_ndp);
array[DEVCONF_ACCEPT_SOURCE_ROUTE] =
READ_ONCE(cnf->accept_source_route);
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
array[DEVCONF_OPTIMISTIC_DAD] = READ_ONCE(cnf->optimistic_dad);
array[DEVCONF_USE_OPTIMISTIC] = READ_ONCE(cnf->use_optimistic);
#endif
#ifdef CONFIG_IPV6_MROUTE
array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
#endif
array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu;
array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
array[DEVCONF_DISABLE_IPV6] = READ_ONCE(cnf->disable_ipv6);
array[DEVCONF_ACCEPT_DAD] = READ_ONCE(cnf->accept_dad);
array[DEVCONF_FORCE_TLLAO] = READ_ONCE(cnf->force_tllao);
array[DEVCONF_NDISC_NOTIFY] = READ_ONCE(cnf->ndisc_notify);
array[DEVCONF_SUPPRESS_FRAG_NDISC] =
READ_ONCE(cnf->suppress_frag_ndisc);
array[DEVCONF_ACCEPT_RA_FROM_LOCAL] =
READ_ONCE(cnf->accept_ra_from_local);
array[DEVCONF_ACCEPT_RA_MTU] = READ_ONCE(cnf->accept_ra_mtu);
array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] =
READ_ONCE(cnf->ignore_routes_with_linkdown);
/* we omit DEVCONF_STABLE_SECRET for now */
array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled;
array[DEVCONF_USE_OIF_ADDRS_ONLY] = READ_ONCE(cnf->use_oif_addrs_only);
array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] =
READ_ONCE(cnf->drop_unicast_in_l2_multicast);
array[DEVCONF_DROP_UNSOLICITED_NA] = READ_ONCE(cnf->drop_unsolicited_na);
array[DEVCONF_KEEP_ADDR_ON_DOWN] = READ_ONCE(cnf->keep_addr_on_down);
array[DEVCONF_SEG6_ENABLED] = READ_ONCE(cnf->seg6_enabled);
#ifdef CONFIG_IPV6_SEG6_HMAC
array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac;
array[DEVCONF_SEG6_REQUIRE_HMAC] = READ_ONCE(cnf->seg6_require_hmac);
#endif
array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled;
array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
array[DEVCONF_ACCEPT_RA_MIN_LFT] = cnf->accept_ra_min_lft;
array[DEVCONF_ENHANCED_DAD] = READ_ONCE(cnf->enhanced_dad);
array[DEVCONF_ADDR_GEN_MODE] = READ_ONCE(cnf->addr_gen_mode);
array[DEVCONF_DISABLE_POLICY] = READ_ONCE(cnf->disable_policy);
array[DEVCONF_NDISC_TCLASS] = READ_ONCE(cnf->ndisc_tclass);
array[DEVCONF_RPL_SEG_ENABLED] = READ_ONCE(cnf->rpl_seg_enabled);
array[DEVCONF_IOAM6_ENABLED] = READ_ONCE(cnf->ioam6_enabled);
array[DEVCONF_IOAM6_ID] = READ_ONCE(cnf->ioam6_id);
array[DEVCONF_IOAM6_ID_WIDE] = READ_ONCE(cnf->ioam6_id_wide);
array[DEVCONF_NDISC_EVICT_NOCARRIER] =
READ_ONCE(cnf->ndisc_evict_nocarrier);
array[DEVCONF_ACCEPT_UNTRACKED_NA] =
READ_ONCE(cnf->accept_untracked_na);
array[DEVCONF_ACCEPT_RA_MIN_LFT] = READ_ONCE(cnf->accept_ra_min_lft);
}
static inline size_t inet6_ifla6_size(void)
@ -5794,13 +5805,14 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
u32 ext_filter_mask)
{
struct nlattr *nla;
struct ifla_cacheinfo ci;
struct nlattr *nla;
u32 ra_mtu;
if (nla_put_u32(skb, IFLA_INET6_FLAGS, idev->if_flags))
if (nla_put_u32(skb, IFLA_INET6_FLAGS, READ_ONCE(idev->if_flags)))
goto nla_put_failure;
ci.max_reasm_len = IPV6_MAXPLEN;
ci.tstamp = cstamp_delta(idev->tstamp);
ci.tstamp = cstamp_delta(READ_ONCE(idev->tstamp));
ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME));
if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci))
@ -5832,11 +5844,12 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
read_unlock_bh(&idev->lock);
if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE,
READ_ONCE(idev->cnf.addr_gen_mode)))
goto nla_put_failure;
if (idev->ra_mtu &&
nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu))
ra_mtu = READ_ONCE(idev->ra_mtu);
if (ra_mtu && nla_put_u32(skb, IFLA_INET6_RA_MTU, ra_mtu))
goto nla_put_failure;
return 0;
@ -6037,7 +6050,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla,
if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
idev->cnf.addr_gen_mode = mode;
WRITE_ONCE(idev->cnf.addr_gen_mode, mode);
}
return 0;
@ -6049,6 +6062,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
struct net_device *dev = idev->dev;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
int ifindex, iflink;
void *protoinfo;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
@ -6059,16 +6073,18 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
hdr->ifi_family = AF_INET6;
hdr->__ifi_pad = 0;
hdr->ifi_type = dev->type;
hdr->ifi_index = dev->ifindex;
ifindex = READ_ONCE(dev->ifindex);
hdr->ifi_index = ifindex;
hdr->ifi_flags = dev_get_flags(dev);
hdr->ifi_change = 0;
iflink = dev_get_iflink(dev);
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
(dev->addr_len &&
nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
(dev->ifindex != dev_get_iflink(dev) &&
nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) ||
(ifindex != iflink &&
nla_put_u32(skb, IFLA_LINK, iflink)) ||
nla_put_u8(skb, IFLA_OPERSTATE,
netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN))
goto nla_put_failure;
@ -6116,50 +6132,42 @@ static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh,
static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
int h, s_h;
int idx = 0, s_idx;
struct {
unsigned long ifindex;
} *ctx = (void *)cb->ctx;
struct net_device *dev;
struct inet6_dev *idev;
struct hlist_head *head;
int err;
/* only requests using strict checking can pass data to
* influence the dump
*/
if (cb->strict_check) {
int err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);
err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);
if (err < 0)
return err;
}
s_h = cb->args[0];
s_idx = cb->args[1];
err = 0;
rcu_read_lock();
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
idev = __in6_dev_get(dev);
if (!idev)
goto cont;
if (inet6_fill_ifinfo(skb, idev,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWLINK, NLM_F_MULTI) < 0)
goto out;
cont:
idx++;
for_each_netdev_dump(net, dev, ctx->ifindex) {
idev = __in6_dev_get(dev);
if (!idev)
continue;
err = inet6_fill_ifinfo(skb, idev,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWLINK, NLM_F_MULTI);
if (err < 0) {
if (likely(skb->len))
err = skb->len;
break;
}
}
out:
rcu_read_unlock();
cb->args[1] = idx;
cb->args[0] = h;
return skb->len;
return err;
}
void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
@ -6516,7 +6524,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
}
if (idev->cnf.addr_gen_mode != new_val) {
idev->cnf.addr_gen_mode = new_val;
WRITE_ONCE(idev->cnf.addr_gen_mode, new_val);
addrconf_init_auto_addrs(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
@ -6527,7 +6535,8 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
idev = __in6_dev_get(dev);
if (idev &&
idev->cnf.addr_gen_mode != new_val) {
idev->cnf.addr_gen_mode = new_val;
WRITE_ONCE(idev->cnf.addr_gen_mode,
new_val);
addrconf_init_auto_addrs(idev->dev);
}
}
@ -6592,14 +6601,15 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
struct inet6_dev *idev = __in6_dev_get(dev);
if (idev) {
idev->cnf.addr_gen_mode =
IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
WRITE_ONCE(idev->cnf.addr_gen_mode,
IN6_ADDR_GEN_MODE_STABLE_PRIVACY);
}
}
} else {
struct inet6_dev *idev = ctl->extra1;
idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
WRITE_ONCE(idev->cnf.addr_gen_mode,
IN6_ADDR_GEN_MODE_STABLE_PRIVACY);
}
out:
@ -7452,7 +7462,7 @@ int __init addrconf_init(void)
rtnl_af_register(&inet6_ops);
err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
NULL, inet6_dump_ifinfo, 0);
NULL, inet6_dump_ifinfo, RTNL_FLAG_DUMP_UNLOCKED);
if (err < 0)
goto errout;

View file

@ -620,8 +620,11 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
.filter.dump_routes = true };
struct rt6_rtnl_dump_arg arg = {
.filter.dump_exceptions = true,
.filter.dump_routes = true,
.filter.rtnl_held = true,
};
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int h, s_h;

View file

@ -1756,7 +1756,7 @@ int ip6_tnl_get_iflink(const struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
return t->parms.link;
return READ_ONCE(t->parms.link);
}
EXPORT_SYMBOL(ip6_tnl_get_iflink);

View file

@ -2592,7 +2592,9 @@ static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
struct fib_dump_filter filter = {};
struct fib_dump_filter filter = {
.rtnl_held = true,
};
int err;
if (cb->strict_check) {

View file

@ -1975,7 +1975,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer,
if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME))
idev->nd_parms->reachable_time =
neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));
idev->tstamp = jiffies;
WRITE_ONCE(idev->tstamp, jiffies);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
in6_dev_put(idev);
}

View file

@ -2179,7 +2179,9 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct mpls_route __rcu **platform_label;
struct fib_dump_filter filter = {};
struct fib_dump_filter filter = {
.rtnl_held = true,
};
unsigned int flags = NLM_F_MULTI;
size_t platform_labels;
unsigned int index;

View file

@ -130,7 +130,7 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
"nlk_cb_mutex-MAX_LINKS"
};
static int netlink_dump(struct sock *sk);
static int netlink_dump(struct sock *sk, bool lock_taken);
/* nl_table locking explained:
* Lookup and traversal are protected with an RCU read-side lock. Insertion
@ -636,7 +636,7 @@ static struct proto netlink_proto = {
};
static int __netlink_create(struct net *net, struct socket *sock,
struct mutex *cb_mutex, int protocol,
struct mutex *dump_cb_mutex, int protocol,
int kern)
{
struct sock *sk;
@ -651,15 +651,11 @@ static int __netlink_create(struct net *net, struct socket *sock,
sock_init_data(sock, sk);
nlk = nlk_sk(sk);
if (cb_mutex) {
nlk->cb_mutex = cb_mutex;
} else {
nlk->cb_mutex = &nlk->cb_def_mutex;
mutex_init(nlk->cb_mutex);
lockdep_set_class_and_name(nlk->cb_mutex,
mutex_init(&nlk->nl_cb_mutex);
lockdep_set_class_and_name(&nlk->nl_cb_mutex,
nlk_cb_mutex_keys + protocol,
nlk_cb_mutex_key_strings[protocol]);
}
nlk->dump_cb_mutex = dump_cb_mutex;
init_waitqueue_head(&nlk->wait);
sk->sk_destruct = netlink_sock_destruct;
@ -1987,7 +1983,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (READ_ONCE(nlk->cb_running) &&
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
ret = netlink_dump(sk);
ret = netlink_dump(sk, false);
if (ret) {
WRITE_ONCE(sk->sk_err, -ret);
sk_error_report(sk);
@ -2196,7 +2192,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
return 0;
}
static int netlink_dump(struct sock *sk)
static int netlink_dump(struct sock *sk, bool lock_taken)
{
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_ext_ack extack = {};
@ -2208,7 +2204,8 @@ static int netlink_dump(struct sock *sk)
int alloc_min_size;
int alloc_size;
mutex_lock(nlk->cb_mutex);
if (!lock_taken)
mutex_lock(&nlk->nl_cb_mutex);
if (!nlk->cb_running) {
err = -EINVAL;
goto errout_skb;
@ -2260,14 +2257,24 @@ static int netlink_dump(struct sock *sk)
netlink_skb_set_owner_r(skb, sk);
if (nlk->dump_done_errno > 0) {
struct mutex *extra_mutex = nlk->dump_cb_mutex;
cb->extack = &extack;
if (cb->flags & RTNL_FLAG_DUMP_UNLOCKED)
extra_mutex = NULL;
if (extra_mutex)
mutex_lock(extra_mutex);
nlk->dump_done_errno = cb->dump(skb, cb);
if (extra_mutex)
mutex_unlock(extra_mutex);
cb->extack = NULL;
}
if (nlk->dump_done_errno > 0 ||
skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
mutex_unlock(nlk->cb_mutex);
mutex_unlock(&nlk->nl_cb_mutex);
if (sk_filter(sk, skb))
kfree_skb(skb);
@ -2301,13 +2308,13 @@ static int netlink_dump(struct sock *sk)
WRITE_ONCE(nlk->cb_running, false);
module = cb->module;
skb = cb->skb;
mutex_unlock(nlk->cb_mutex);
mutex_unlock(&nlk->nl_cb_mutex);
module_put(module);
consume_skb(skb);
return 0;
errout_skb:
mutex_unlock(nlk->cb_mutex);
mutex_unlock(&nlk->nl_cb_mutex);
kfree_skb(skb);
return err;
}
@ -2330,7 +2337,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
}
nlk = nlk_sk(sk);
mutex_lock(nlk->cb_mutex);
mutex_lock(&nlk->nl_cb_mutex);
/* A dump is in progress... */
if (nlk->cb_running) {
ret = -EBUSY;
@ -2350,6 +2357,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->data = control->data;
cb->module = control->module;
cb->min_dump_alloc = control->min_dump_alloc;
cb->flags = control->flags;
cb->skb = skb;
cb->strict_check = nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
@ -2365,9 +2373,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
WRITE_ONCE(nlk->cb_running, true);
nlk->dump_done_errno = INT_MAX;
mutex_unlock(nlk->cb_mutex);
ret = netlink_dump(sk);
ret = netlink_dump(sk, true);
sock_put(sk);
@ -2383,7 +2389,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
module_put(control->module);
error_unlock:
sock_put(sk);
mutex_unlock(nlk->cb_mutex);
mutex_unlock(&nlk->nl_cb_mutex);
error_free:
kfree_skb(skb);
return ret;

View file

@ -39,8 +39,9 @@ struct netlink_sock {
bool cb_running;
int dump_done_errno;
struct netlink_callback cb;
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
struct mutex nl_cb_mutex;
struct mutex *dump_cb_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);

View file

@ -207,7 +207,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
}
return err < 0 ? err : skb->len;
return err <= 0 ? err : skb->len;
}
static int netlink_diag_dump_done(struct netlink_callback *cb)

View file

@ -727,7 +727,7 @@ static int xfrmi_get_iflink(const struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
return xi->p.link;
return READ_ONCE(xi->p.link);
}
static const struct net_device_ops xfrmi_netdev_ops = {