IPoIB: Fix AB-BA deadlock when deleting neighbours

Lockdep points out a circular locking dependency betwwen the ipoib
device priv spinlock (priv->lock) and the neighbour table rwlock
(ntbl->rwlock).

In the normal path, ie neigbour garbage collection task, the neigh
table rwlock is taken first and then if the neighbour needs to be
deleted, priv->lock is taken.

However in some error paths, such as in ipoib_cm_handle_tx_wc(),
priv->lock is taken first and then ipoib_neigh_free routine is called
which in turn takes the neighbour table ntbl->rwlock.

The solution is to get rid the neigh table rwlock completely and use
only priv->lock.

Signed-off-by: Shlomo Pongratz <shlomop@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
Shlomo Pongratz 2012-08-29 15:14:34 +00:00 committed by Roland Dreier
parent 66172c0993
commit b5120a6e11
3 changed files with 27 additions and 46 deletions

View file

@ -274,7 +274,6 @@ struct ipoib_neigh_hash {
struct ipoib_neigh_table { struct ipoib_neigh_table {
struct ipoib_neigh_hash __rcu *htbl; struct ipoib_neigh_hash __rcu *htbl;
rwlock_t rwlock;
atomic_t entries; atomic_t entries;
struct completion flushed; struct completion flushed;
struct completion deleted; struct completion deleted;

View file

@ -546,15 +546,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
struct ipoib_neigh *neigh; struct ipoib_neigh *neigh;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&priv->lock, flags);
neigh = ipoib_neigh_alloc(daddr, dev); neigh = ipoib_neigh_alloc(daddr, dev);
if (!neigh) { if (!neigh) {
spin_unlock_irqrestore(&priv->lock, flags);
++dev->stats.tx_dropped; ++dev->stats.tx_dropped;
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
return; return;
} }
spin_lock_irqsave(&priv->lock, flags);
path = __path_find(dev, daddr + 4); path = __path_find(dev, daddr + 4);
if (!path) { if (!path) {
path = path_rec_create(dev, daddr + 4); path = path_rec_create(dev, daddr + 4);
@ -863,10 +863,10 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
return; return;
write_lock_bh(&ntbl->rwlock); spin_lock_irqsave(&priv->lock, flags);
htbl = rcu_dereference_protected(ntbl->htbl, htbl = rcu_dereference_protected(ntbl->htbl,
lockdep_is_held(&ntbl->rwlock)); lockdep_is_held(&priv->lock));
if (!htbl) if (!htbl)
goto out_unlock; goto out_unlock;
@ -883,16 +883,14 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
struct ipoib_neigh __rcu **np = &htbl->buckets[i]; struct ipoib_neigh __rcu **np = &htbl->buckets[i];
while ((neigh = rcu_dereference_protected(*np, while ((neigh = rcu_dereference_protected(*np,
lockdep_is_held(&ntbl->rwlock))) != NULL) { lockdep_is_held(&priv->lock))) != NULL) {
/* was the neigh idle for two GC periods */ /* was the neigh idle for two GC periods */
if (time_after(neigh_obsolete, neigh->alive)) { if (time_after(neigh_obsolete, neigh->alive)) {
rcu_assign_pointer(*np, rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext, rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&ntbl->rwlock))); lockdep_is_held(&priv->lock)));
/* remove from path/mc list */ /* remove from path/mc list */
spin_lock_irqsave(&priv->lock, flags);
list_del(&neigh->list); list_del(&neigh->list);
spin_unlock_irqrestore(&priv->lock, flags);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim); call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
} else { } else {
np = &neigh->hnext; np = &neigh->hnext;
@ -902,7 +900,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
} }
out_unlock: out_unlock:
write_unlock_bh(&ntbl->rwlock); spin_unlock_irqrestore(&priv->lock, flags);
} }
static void ipoib_reap_neigh(struct work_struct *work) static void ipoib_reap_neigh(struct work_struct *work)
@ -947,10 +945,8 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
struct ipoib_neigh *neigh; struct ipoib_neigh *neigh;
u32 hash_val; u32 hash_val;
write_lock_bh(&ntbl->rwlock);
htbl = rcu_dereference_protected(ntbl->htbl, htbl = rcu_dereference_protected(ntbl->htbl,
lockdep_is_held(&ntbl->rwlock)); lockdep_is_held(&priv->lock));
if (!htbl) { if (!htbl) {
neigh = NULL; neigh = NULL;
goto out_unlock; goto out_unlock;
@ -961,10 +957,10 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
*/ */
hash_val = ipoib_addr_hash(htbl, daddr); hash_val = ipoib_addr_hash(htbl, daddr);
for (neigh = rcu_dereference_protected(htbl->buckets[hash_val], for (neigh = rcu_dereference_protected(htbl->buckets[hash_val],
lockdep_is_held(&ntbl->rwlock)); lockdep_is_held(&priv->lock));
neigh != NULL; neigh != NULL;
neigh = rcu_dereference_protected(neigh->hnext, neigh = rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&ntbl->rwlock))) { lockdep_is_held(&priv->lock))) {
if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
/* found, take one ref on behalf of the caller */ /* found, take one ref on behalf of the caller */
if (!atomic_inc_not_zero(&neigh->refcnt)) { if (!atomic_inc_not_zero(&neigh->refcnt)) {
@ -987,12 +983,11 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
/* put in hash */ /* put in hash */
rcu_assign_pointer(neigh->hnext, rcu_assign_pointer(neigh->hnext,
rcu_dereference_protected(htbl->buckets[hash_val], rcu_dereference_protected(htbl->buckets[hash_val],
lockdep_is_held(&ntbl->rwlock))); lockdep_is_held(&priv->lock)));
rcu_assign_pointer(htbl->buckets[hash_val], neigh); rcu_assign_pointer(htbl->buckets[hash_val], neigh);
atomic_inc(&ntbl->entries); atomic_inc(&ntbl->entries);
out_unlock: out_unlock:
write_unlock_bh(&ntbl->rwlock);
return neigh; return neigh;
} }
@ -1040,35 +1035,29 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh)
struct ipoib_neigh *n; struct ipoib_neigh *n;
u32 hash_val; u32 hash_val;
write_lock_bh(&ntbl->rwlock);
htbl = rcu_dereference_protected(ntbl->htbl, htbl = rcu_dereference_protected(ntbl->htbl,
lockdep_is_held(&ntbl->rwlock)); lockdep_is_held(&priv->lock));
if (!htbl) if (!htbl)
goto out_unlock; return;
hash_val = ipoib_addr_hash(htbl, neigh->daddr); hash_val = ipoib_addr_hash(htbl, neigh->daddr);
np = &htbl->buckets[hash_val]; np = &htbl->buckets[hash_val];
for (n = rcu_dereference_protected(*np, for (n = rcu_dereference_protected(*np,
lockdep_is_held(&ntbl->rwlock)); lockdep_is_held(&priv->lock));
n != NULL; n != NULL;
n = rcu_dereference_protected(*np, n = rcu_dereference_protected(*np,
lockdep_is_held(&ntbl->rwlock))) { lockdep_is_held(&priv->lock))) {
if (n == neigh) { if (n == neigh) {
/* found */ /* found */
rcu_assign_pointer(*np, rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext, rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&ntbl->rwlock))); lockdep_is_held(&priv->lock)));
call_rcu(&neigh->rcu, ipoib_neigh_reclaim); call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
goto out_unlock; return;
} else { } else {
np = &n->hnext; np = &n->hnext;
} }
} }
out_unlock:
write_unlock_bh(&ntbl->rwlock);
} }
static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
@ -1080,7 +1069,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
ntbl->htbl = NULL; ntbl->htbl = NULL;
rwlock_init(&ntbl->rwlock);
htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
if (!htbl) if (!htbl)
return -ENOMEM; return -ENOMEM;
@ -1128,10 +1116,10 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
int i; int i;
/* remove all neigh connected to a given path or mcast */ /* remove all neigh connected to a given path or mcast */
write_lock_bh(&ntbl->rwlock); spin_lock_irqsave(&priv->lock, flags);
htbl = rcu_dereference_protected(ntbl->htbl, htbl = rcu_dereference_protected(ntbl->htbl,
lockdep_is_held(&ntbl->rwlock)); lockdep_is_held(&priv->lock));
if (!htbl) if (!htbl)
goto out_unlock; goto out_unlock;
@ -1141,16 +1129,14 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
struct ipoib_neigh __rcu **np = &htbl->buckets[i]; struct ipoib_neigh __rcu **np = &htbl->buckets[i];
while ((neigh = rcu_dereference_protected(*np, while ((neigh = rcu_dereference_protected(*np,
lockdep_is_held(&ntbl->rwlock))) != NULL) { lockdep_is_held(&priv->lock))) != NULL) {
/* delete neighs belong to this parent */ /* delete neighs belong to this parent */
if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) { if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) {
rcu_assign_pointer(*np, rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext, rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&ntbl->rwlock))); lockdep_is_held(&priv->lock)));
/* remove from parent list */ /* remove from parent list */
spin_lock_irqsave(&priv->lock, flags);
list_del(&neigh->list); list_del(&neigh->list);
spin_unlock_irqrestore(&priv->lock, flags);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim); call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
} else { } else {
np = &neigh->hnext; np = &neigh->hnext;
@ -1159,7 +1145,7 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
} }
} }
out_unlock: out_unlock:
write_unlock_bh(&ntbl->rwlock); spin_unlock_irqrestore(&priv->lock, flags);
} }
static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
@ -1171,10 +1157,10 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
init_completion(&priv->ntbl.flushed); init_completion(&priv->ntbl.flushed);
write_lock_bh(&ntbl->rwlock); spin_lock_irqsave(&priv->lock, flags);
htbl = rcu_dereference_protected(ntbl->htbl, htbl = rcu_dereference_protected(ntbl->htbl,
lockdep_is_held(&ntbl->rwlock)); lockdep_is_held(&priv->lock));
if (!htbl) if (!htbl)
goto out_unlock; goto out_unlock;
@ -1187,14 +1173,12 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
struct ipoib_neigh __rcu **np = &htbl->buckets[i]; struct ipoib_neigh __rcu **np = &htbl->buckets[i];
while ((neigh = rcu_dereference_protected(*np, while ((neigh = rcu_dereference_protected(*np,
lockdep_is_held(&ntbl->rwlock))) != NULL) { lockdep_is_held(&priv->lock))) != NULL) {
rcu_assign_pointer(*np, rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext, rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&ntbl->rwlock))); lockdep_is_held(&priv->lock)));
/* remove from path/mc list */ /* remove from path/mc list */
spin_lock_irqsave(&priv->lock, flags);
list_del(&neigh->list); list_del(&neigh->list);
spin_unlock_irqrestore(&priv->lock, flags);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim); call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
} }
} }
@ -1204,7 +1188,7 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
call_rcu(&htbl->rcu, neigh_hash_free_rcu); call_rcu(&htbl->rcu, neigh_hash_free_rcu);
out_unlock: out_unlock:
write_unlock_bh(&ntbl->rwlock); spin_unlock_irqrestore(&priv->lock, flags);
if (wait_flushed) if (wait_flushed)
wait_for_completion(&priv->ntbl.flushed); wait_for_completion(&priv->ntbl.flushed);
} }

View file

@ -707,9 +707,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
neigh = ipoib_neigh_get(dev, daddr); neigh = ipoib_neigh_get(dev, daddr);
spin_lock_irqsave(&priv->lock, flags); spin_lock_irqsave(&priv->lock, flags);
if (!neigh) { if (!neigh) {
spin_unlock_irqrestore(&priv->lock, flags);
neigh = ipoib_neigh_alloc(daddr, dev); neigh = ipoib_neigh_alloc(daddr, dev);
spin_lock_irqsave(&priv->lock, flags);
if (neigh) { if (neigh) {
kref_get(&mcast->ah->ref); kref_get(&mcast->ah->ref);
neigh->ah = mcast->ah; neigh->ah = mcast->ah;