IB/ipoib: Use cancel_delayed_work_sync for neigh-clean task

The neigh_reap_task is self restarting, but so long as we call
cancel_delayed_work_sync() it will be guaranteed to not be running and
never start again. Thus we don't need to have the racy
IPOIB_STOP_NEIGH_GC bit, or the confusing mismatch of places sometimes
calling flush_workqueue after the cancel.

This fixes a situation where the GC work could have been left running
in some rare situations.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Erez Shitrit 2018-07-29 11:34:52 +03:00 committed by Jason Gunthorpe
parent 577e07ffba
commit cda8daf179
2 changed files with 10 additions and 24 deletions

View file

@ -91,7 +91,6 @@ enum {
IPOIB_STOP_REAPER = 7,
IPOIB_FLAG_ADMIN_CM = 9,
IPOIB_FLAG_UMCAST = 10,
IPOIB_STOP_NEIGH_GC = 11,
IPOIB_NEIGH_TBL_FLUSH = 12,
IPOIB_FLAG_DEV_ADDR_SET = 13,
IPOIB_FLAG_DEV_ADDR_CTRL = 14,

View file

@ -1310,9 +1310,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
int i;
LIST_HEAD(remove_list);
if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
return;
spin_lock_irqsave(&priv->lock, flags);
htbl = rcu_dereference_protected(ntbl->htbl,
@ -1324,9 +1321,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
/* neigh is obsolete if it was idle for two GC periods */
dt = 2 * arp_tbl.gc_interval;
neigh_obsolete = jiffies - dt;
/* handle possible race condition */
if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
goto out_unlock;
for (i = 0; i < htbl->size; i++) {
struct ipoib_neigh *neigh;
@ -1364,9 +1358,8 @@ static void ipoib_reap_neigh(struct work_struct *work)
__ipoib_reap_neigh(priv);
if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval);
queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval);
}
@ -1528,7 +1521,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
if (!htbl)
return -ENOMEM;
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
size = roundup_pow_of_two(arp_tbl.gc_thresh3);
buckets = kvcalloc(size, sizeof(*buckets), GFP_KERNEL);
if (!buckets) {
@ -1543,7 +1535,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
atomic_set(&ntbl->entries, 0);
/* start garbage collection */
clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval);
@ -1653,15 +1644,11 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
static void ipoib_neigh_hash_uninit(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
int stopped;
ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
init_completion(&priv->ntbl.deleted);
/* Stop GC if called at init fail need to cancel work */
stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
if (!stopped)
cancel_delayed_work(&priv->neigh_reap_task);
cancel_delayed_work_sync(&priv->neigh_reap_task);
ipoib_flush_neighs(priv);
@ -1801,12 +1788,15 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
if (ipoib_ib_dev_open(dev)) {
pr_warn("%s failed to open device\n", dev->name);
ret = -ENODEV;
goto out_dev_uninit;
goto out_hash_uninit;
}
}
return 0;
out_hash_uninit:
ipoib_neigh_hash_uninit(dev);
out_dev_uninit:
ipoib_ib_dev_cleanup(dev);
@ -1836,8 +1826,7 @@ void ipoib_dev_cleanup(struct net_device *dev)
/* Delete any child interfaces first */
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
/* Stop GC on child */
set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags);
cancel_delayed_work(&cpriv->neigh_reap_task);
cancel_delayed_work_sync(&cpriv->neigh_reap_task);
unregister_netdevice_queue(cpriv->dev, &head);
}
unregister_netdevice_many(&head);
@ -2345,8 +2334,7 @@ static struct net_device *ipoib_add_port(const char *format,
ib_unregister_event_handler(&priv->event_handler);
flush_workqueue(ipoib_workqueue);
/* Stop GC if started before flush */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task);
cancel_delayed_work_sync(&priv->neigh_reap_task);
flush_workqueue(priv->wq);
ipoib_dev_cleanup(priv->dev);
@ -2411,8 +2399,7 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
rtnl_unlock();
/* Stop GC */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task);
cancel_delayed_work_sync(&priv->neigh_reap_task);
flush_workqueue(priv->wq);
/* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */