RDMA/mlx5: Track netdev to avoid deadlock during netdev notifier unregister

When removing a network namespace with mlx5 devlink instance being in
it, following callchain is performed:

cleanup_net (takes down_read(&pernet_ops_rwsem)
devlink_pernet_pre_exit()
devlink_reload()
mlx5_devlink_reload_down()
mlx5_unload_one_devl_locked()
mlx5_detach_device()
del_adev()
mlx5r_remove()
__mlx5_ib_remove()
mlx5_ib_roce_cleanup()
mlx5_remove_netdev_notifier()
unregister_netdevice_notifier (takes down_write(&pernet_ops_rwsem)

This deadlocks.

Resolve this by converting to register_netdevice_notifier_dev_net()
which does not take pernet_ops_rwsem and moves the notifier block around
according to netdev it takes as arg.

Use previously introduced netdev added/removed events to track uplink
netdev to be used for register_netdevice_notifier_dev_net() purposes.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
This commit is contained in:
Jiri Pirko 2022-11-01 15:36:01 +01:00 committed by Saeed Mahameed
parent c7d4e6ab31
commit dca55da0a1
3 changed files with 60 additions and 25 deletions

View file

@ -3012,26 +3012,63 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
}
}
static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u32 port_num)
static void mlx5_netdev_notifier_register(struct mlx5_roce *roce,
struct net_device *netdev)
{
int err;
dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event;
err = register_netdevice_notifier(&dev->port[port_num].roce.nb);
if (err) {
dev->port[port_num].roce.nb.notifier_call = NULL;
return err;
}
return 0;
if (roce->tracking_netdev)
return;
roce->tracking_netdev = netdev;
roce->nb.notifier_call = mlx5_netdev_event;
err = register_netdevice_notifier_dev_net(netdev, &roce->nb, &roce->nn);
WARN_ON(err);
}
static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u32 port_num)
static void mlx5_netdev_notifier_unregister(struct mlx5_roce *roce)
{
if (dev->port[port_num].roce.nb.notifier_call) {
unregister_netdevice_notifier(&dev->port[port_num].roce.nb);
dev->port[port_num].roce.nb.notifier_call = NULL;
if (!roce->tracking_netdev)
return;
unregister_netdevice_notifier_dev_net(roce->tracking_netdev, &roce->nb,
&roce->nn);
roce->tracking_netdev = NULL;
}
static int mlx5e_mdev_notifier_event(struct notifier_block *nb,
unsigned long event, void *data)
{
struct mlx5_roce *roce = container_of(nb, struct mlx5_roce, mdev_nb);
struct net_device *netdev = data;
switch (event) {
case MLX5_DRIVER_EVENT_UPLINK_NETDEV:
if (netdev)
mlx5_netdev_notifier_register(roce, netdev);
else
mlx5_netdev_notifier_unregister(roce);
break;
default:
return NOTIFY_DONE;
}
return NOTIFY_OK;
}
static void mlx5_mdev_netdev_track(struct mlx5_ib_dev *dev, u32 port_num)
{
struct mlx5_roce *roce = &dev->port[port_num].roce;
roce->mdev_nb.notifier_call = mlx5e_mdev_notifier_event;
mlx5_blocking_notifier_register(dev->mdev, &roce->mdev_nb);
mlx5_core_uplink_netdev_event_replay(dev->mdev);
}
static void mlx5_mdev_netdev_untrack(struct mlx5_ib_dev *dev, u32 port_num)
{
struct mlx5_roce *roce = &dev->port[port_num].roce;
mlx5_blocking_notifier_unregister(dev->mdev, &roce->mdev_nb);
mlx5_netdev_notifier_unregister(roce);
}
static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
@ -3138,7 +3175,7 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
if (mpi->mdev_events.notifier_call)
mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
mpi->mdev_events.notifier_call = NULL;
mlx5_remove_netdev_notifier(ibdev, port_num);
mlx5_mdev_netdev_untrack(ibdev, port_num);
spin_lock(&port->mp.mpi_lock);
comps = mpi->mdev_refcnt;
@ -3196,12 +3233,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
if (err)
goto unbind;
err = mlx5_add_netdev_notifier(ibdev, port_num);
if (err) {
mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n",
port_num + 1);
goto unbind;
}
mlx5_mdev_netdev_track(ibdev, port_num);
mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
@ -3909,9 +3941,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
/* Register only for native ports */
err = mlx5_add_netdev_notifier(dev, port_num);
if (err)
return err;
mlx5_mdev_netdev_track(dev, port_num);
err = mlx5_enable_eth(dev);
if (err)
@ -3920,7 +3950,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
return 0;
cleanup:
mlx5_remove_netdev_notifier(dev, port_num);
mlx5_mdev_netdev_untrack(dev, port_num);
return err;
}
@ -3938,7 +3968,7 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
mlx5_disable_eth(dev);
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
mlx5_remove_netdev_notifier(dev, port_num);
mlx5_mdev_netdev_untrack(dev, port_num);
}
}

View file

@ -832,6 +832,9 @@ struct mlx5_roce {
rwlock_t netdev_lock;
struct net_device *netdev;
struct notifier_block nb;
struct netdev_net_notifier nn;
struct notifier_block mdev_nb;
struct net_device *tracking_netdev;
atomic_t tx_port_affinity;
enum ib_port_state last_port_state;
struct mlx5_ib_dev *dev;

View file

@ -424,6 +424,7 @@ int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_b
return blocking_notifier_chain_register(&events->sw_nh, nb);
}
EXPORT_SYMBOL(mlx5_blocking_notifier_register);
int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
{
@ -431,6 +432,7 @@ int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier
return blocking_notifier_chain_unregister(&events->sw_nh, nb);
}
EXPORT_SYMBOL(mlx5_blocking_notifier_unregister);
int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
void *data)