drop_monitor: remove quadratic behavior

drop_monitor is using an unique list on which all netdevices in
the host have an element, regardless of their netns.

This scales poorly, not only at device unregister time (what I
caught during my netns dismantle stress tests), but also at packet
processing time whenever trace_napi_poll_hit() is called.

If the intent was to avoid adding one pointer in 'struct net_device'
then surely we prefer O(1) behavior.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2022-02-22 14:04:50 -08:00 committed by David S. Miller
parent 503310a5d4
commit b26ef81c46
2 changed files with 27 additions and 56 deletions

View file

@ -2236,7 +2236,9 @@ struct net_device {
#if IS_ENABLED(CONFIG_MRP)
struct mrp_port __rcu *mrp_port;
#endif
#if IS_ENABLED(CONFIG_NET_DROP_MONITOR)
struct dm_hw_stat_delta __rcu *dm_private;
#endif
struct device dev;
const struct attribute_group *sysfs_groups[4];
const struct attribute_group *sysfs_rx_queue_group;

View file

@ -64,7 +64,6 @@ static const char * const drop_reasons[] = {
/* net_dm_mutex
*
* An overall lock guarding every operation coming from userspace.
* It also guards the global 'hw_stats_list' list.
*/
static DEFINE_MUTEX(net_dm_mutex);
@ -100,11 +99,9 @@ struct per_cpu_dm_data {
};
struct dm_hw_stat_delta {
struct net_device *dev;
unsigned long last_rx;
struct list_head list;
struct rcu_head rcu;
unsigned long last_drop_val;
struct rcu_head rcu;
};
static struct genl_family net_drop_monitor_family;
@ -115,7 +112,6 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
static int dm_hit_limit = 64;
static int dm_delay = 1;
static unsigned long dm_hw_check_delta = 2*HZ;
static LIST_HEAD(hw_stats_list);
static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
static u32 net_dm_trunc_len;
@ -287,33 +283,27 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
int work, int budget)
{
struct dm_hw_stat_delta *new_stat;
struct net_device *dev = napi->dev;
struct dm_hw_stat_delta *stat;
/*
* Don't check napi structures with no associated device
*/
if (!napi->dev)
if (!dev)
return;
rcu_read_lock();
list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
struct net_device *dev;
stat = rcu_dereference(dev->dm_private);
if (stat) {
/*
* only add a note to our monitor buffer if:
* 1) this is the dev we received on
* 2) its after the last_rx delta
* 3) our rx_dropped count has gone up
* 1) its after the last_rx delta
* 2) our rx_dropped count has gone up
*/
/* Paired with WRITE_ONCE() in dropmon_net_event() */
dev = READ_ONCE(new_stat->dev);
if ((dev == napi->dev) &&
(time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
(napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
if (time_after(jiffies, stat->last_rx + dm_hw_check_delta) &&
(dev->stats.rx_dropped != stat->last_drop_val)) {
trace_drop_common(NULL, NULL);
new_stat->last_drop_val = napi->dev->stats.rx_dropped;
new_stat->last_rx = jiffies;
break;
stat->last_drop_val = dev->stats.rx_dropped;
stat->last_rx = jiffies;
}
}
rcu_read_unlock();
@ -1198,7 +1188,6 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
static void net_dm_trace_off_set(void)
{
struct dm_hw_stat_delta *new_stat, *temp;
const struct net_dm_alert_ops *ops;
int cpu;
@ -1222,13 +1211,6 @@ static void net_dm_trace_off_set(void)
consume_skb(skb);
}
list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
if (new_stat->dev == NULL) {
list_del_rcu(&new_stat->list);
kfree_rcu(new_stat, rcu);
}
}
module_put(THIS_MODULE);
}
@ -1589,41 +1571,28 @@ static int dropmon_net_event(struct notifier_block *ev_block,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct dm_hw_stat_delta *new_stat = NULL;
struct dm_hw_stat_delta *tmp;
struct dm_hw_stat_delta *stat;
switch (event) {
case NETDEV_REGISTER:
new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
if (WARN_ON_ONCE(rtnl_dereference(dev->dm_private)))
break;
stat = kzalloc(sizeof(*stat), GFP_KERNEL);
if (!stat)
break;
if (!new_stat)
goto out;
stat->last_rx = jiffies;
rcu_assign_pointer(dev->dm_private, stat);
new_stat->dev = dev;
new_stat->last_rx = jiffies;
mutex_lock(&net_dm_mutex);
list_add_rcu(&new_stat->list, &hw_stats_list);
mutex_unlock(&net_dm_mutex);
break;
case NETDEV_UNREGISTER:
mutex_lock(&net_dm_mutex);
list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
if (new_stat->dev == dev) {
/* Paired with READ_ONCE() in trace_napi_poll_hit() */
WRITE_ONCE(new_stat->dev, NULL);
if (trace_state == TRACE_OFF) {
list_del_rcu(&new_stat->list);
kfree_rcu(new_stat, rcu);
break;
}
}
stat = rtnl_dereference(dev->dm_private);
if (stat) {
rcu_assign_pointer(dev->dm_private, NULL);
kfree_rcu(stat, rcu);
}
mutex_unlock(&net_dm_mutex);
break;
}
out:
return NOTIFY_DONE;
}