mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-27 12:57:53 +00:00
Merge branch 'mlx5-misc-fixes'
Tariq Toukan says:
====================
mlx5 misc fixes
This patchset provides bug fixes to mlx5 driver.
Patch 1 by Shay fixes the error flow in mlx5e_suspend().
Patch 2 by Shay aligns the peer devlink set logic with the register devlink flow.
Patch 3 by Maher solves a deadlock in lag enable/disable.
Patches 4 and 5 by Akiva address issues in command interface corner cases.
Series generated against:
commit 393ceeb921
("Merge branch 'there-are-some-bugfix-for-the-hns3-ethernet-driver'")
====================
Link: https://lore.kernel.org/r/20240509112951.590184-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
1164057b3c
9 changed files with 80 additions and 52 deletions
|
@ -969,19 +969,32 @@ static void cmd_work_handler(struct work_struct *work)
|
|||
bool poll_cmd = ent->polling;
|
||||
struct mlx5_cmd_layout *lay;
|
||||
struct mlx5_core_dev *dev;
|
||||
unsigned long cb_timeout;
|
||||
struct semaphore *sem;
|
||||
unsigned long timeout;
|
||||
unsigned long flags;
|
||||
int alloc_ret;
|
||||
int cmd_mode;
|
||||
|
||||
dev = container_of(cmd, struct mlx5_core_dev, cmd);
|
||||
cb_timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD));
|
||||
|
||||
complete(&ent->handling);
|
||||
sem = ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem;
|
||||
down(sem);
|
||||
|
||||
dev = container_of(cmd, struct mlx5_core_dev, cmd);
|
||||
timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD));
|
||||
|
||||
if (!ent->page_queue) {
|
||||
if (down_timeout(&cmd->vars.sem, timeout)) {
|
||||
mlx5_core_warn(dev, "%s(0x%x) timed out while waiting for a slot.\n",
|
||||
mlx5_command_str(ent->op), ent->op);
|
||||
if (ent->callback) {
|
||||
ent->callback(-EBUSY, ent->context);
|
||||
mlx5_free_cmd_msg(dev, ent->out);
|
||||
free_msg(dev, ent->in);
|
||||
cmd_ent_put(ent);
|
||||
} else {
|
||||
ent->ret = -EBUSY;
|
||||
complete(&ent->done);
|
||||
}
|
||||
complete(&ent->slotted);
|
||||
return;
|
||||
}
|
||||
alloc_ret = cmd_alloc_index(cmd, ent);
|
||||
if (alloc_ret < 0) {
|
||||
mlx5_core_err_rl(dev, "failed to allocate command entry\n");
|
||||
|
@ -994,10 +1007,11 @@ static void cmd_work_handler(struct work_struct *work)
|
|||
ent->ret = -EAGAIN;
|
||||
complete(&ent->done);
|
||||
}
|
||||
up(sem);
|
||||
up(&cmd->vars.sem);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
down(&cmd->vars.pages_sem);
|
||||
ent->idx = cmd->vars.max_reg_cmds;
|
||||
spin_lock_irqsave(&cmd->alloc_lock, flags);
|
||||
clear_bit(ent->idx, &cmd->vars.bitmask);
|
||||
|
@ -1005,6 +1019,8 @@ static void cmd_work_handler(struct work_struct *work)
|
|||
spin_unlock_irqrestore(&cmd->alloc_lock, flags);
|
||||
}
|
||||
|
||||
complete(&ent->slotted);
|
||||
|
||||
lay = get_inst(cmd, ent->idx);
|
||||
ent->lay = lay;
|
||||
memset(lay, 0, sizeof(*lay));
|
||||
|
@ -1023,7 +1039,7 @@ static void cmd_work_handler(struct work_struct *work)
|
|||
ent->ts1 = ktime_get_ns();
|
||||
cmd_mode = cmd->mode;
|
||||
|
||||
if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout))
|
||||
if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, timeout))
|
||||
cmd_ent_get(ent);
|
||||
set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
|
||||
|
||||
|
@ -1143,6 +1159,9 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
|
|||
ent->ret = -ECANCELED;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
wait_for_completion(&ent->slotted);
|
||||
|
||||
if (cmd->mode == CMD_MODE_POLLING || ent->polling)
|
||||
wait_for_completion(&ent->done);
|
||||
else if (!wait_for_completion_timeout(&ent->done, timeout))
|
||||
|
@ -1157,6 +1176,9 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
|
|||
} else if (err == -ECANCELED) {
|
||||
mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n",
|
||||
mlx5_command_str(ent->op), ent->op);
|
||||
} else if (err == -EBUSY) {
|
||||
mlx5_core_warn(dev, "%s(0x%x) timeout while waiting for command semaphore.\n",
|
||||
mlx5_command_str(ent->op), ent->op);
|
||||
}
|
||||
mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n",
|
||||
err, deliv_status_to_str(ent->status), ent->status);
|
||||
|
@ -1208,6 +1230,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
|
|||
ent->polling = force_polling;
|
||||
|
||||
init_completion(&ent->handling);
|
||||
init_completion(&ent->slotted);
|
||||
if (!callback)
|
||||
init_completion(&ent->done);
|
||||
|
||||
|
@ -1225,7 +1248,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
|
|||
return 0; /* mlx5_cmd_comp_handler() will put(ent) */
|
||||
|
||||
err = wait_func(dev, ent);
|
||||
if (err == -ETIMEDOUT || err == -ECANCELED)
|
||||
if (err == -ETIMEDOUT || err == -ECANCELED || err == -EBUSY)
|
||||
goto out_free;
|
||||
|
||||
ds = ent->ts2 - ent->ts1;
|
||||
|
@ -1611,6 +1634,9 @@ static int cmd_comp_notifier(struct notifier_block *nb,
|
|||
dev = container_of(cmd, struct mlx5_core_dev, cmd);
|
||||
eqe = data;
|
||||
|
||||
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
|
||||
|
||||
return NOTIFY_OK;
|
||||
|
|
|
@ -6058,7 +6058,7 @@ static int mlx5e_resume(struct auxiliary_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int _mlx5e_suspend(struct auxiliary_device *adev)
|
||||
static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg)
|
||||
{
|
||||
struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev);
|
||||
struct mlx5e_priv *priv = mlx5e_dev->priv;
|
||||
|
@ -6067,7 +6067,7 @@ static int _mlx5e_suspend(struct auxiliary_device *adev)
|
|||
struct mlx5_core_dev *pos;
|
||||
int i;
|
||||
|
||||
if (!netif_device_present(netdev)) {
|
||||
if (!pre_netdev_reg && !netif_device_present(netdev)) {
|
||||
if (test_bit(MLX5E_STATE_DESTROYING, &priv->state))
|
||||
mlx5_sd_for_each_dev(i, mdev, pos)
|
||||
mlx5e_destroy_mdev_resources(pos);
|
||||
|
@ -6090,7 +6090,7 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
|
|||
|
||||
actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
|
||||
if (actual_adev)
|
||||
err = _mlx5e_suspend(actual_adev);
|
||||
err = _mlx5e_suspend(actual_adev, false);
|
||||
|
||||
mlx5_sd_cleanup(mdev);
|
||||
return err;
|
||||
|
@ -6157,7 +6157,7 @@ static int _mlx5e_probe(struct auxiliary_device *adev)
|
|||
return 0;
|
||||
|
||||
err_resume:
|
||||
_mlx5e_suspend(adev);
|
||||
_mlx5e_suspend(adev, true);
|
||||
err_profile_cleanup:
|
||||
profile->cleanup(priv);
|
||||
err_destroy_netdev:
|
||||
|
@ -6197,7 +6197,7 @@ static void _mlx5e_remove(struct auxiliary_device *adev)
|
|||
mlx5_core_uplink_netdev_set(mdev, NULL);
|
||||
mlx5e_dcbnl_delete_app(priv);
|
||||
unregister_netdev(priv->netdev);
|
||||
_mlx5e_suspend(adev);
|
||||
_mlx5e_suspend(adev, false);
|
||||
priv->profile->cleanup(priv);
|
||||
mlx5e_destroy_netdev(priv);
|
||||
mlx5e_devlink_port_unregister(mlx5e_dev);
|
||||
|
|
|
@ -833,7 +833,7 @@ int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw,
|
|||
struct mlx5_eswitch *slave_esw, int max_slaves);
|
||||
void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
|
||||
struct mlx5_eswitch *slave_esw);
|
||||
int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
|
||||
int mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw);
|
||||
|
||||
bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev);
|
||||
void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev);
|
||||
|
@ -925,7 +925,7 @@ mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
|
|||
static inline int mlx5_eswitch_get_npeers(struct mlx5_eswitch *esw) { return 0; }
|
||||
|
||||
static inline int
|
||||
mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
|
||||
mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -2502,6 +2502,16 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw)
|
|||
esw_offloads_cleanup_reps(esw);
|
||||
}
|
||||
|
||||
static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
|
||||
struct mlx5_eswitch_rep *rep, u8 rep_type)
|
||||
{
|
||||
if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
|
||||
REP_REGISTERED, REP_LOADED) == REP_REGISTERED)
|
||||
return esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
|
||||
struct mlx5_eswitch_rep *rep, u8 rep_type)
|
||||
{
|
||||
|
@ -2526,13 +2536,11 @@ static int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num)
|
|||
int err;
|
||||
|
||||
rep = mlx5_eswitch_get_rep(esw, vport_num);
|
||||
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
|
||||
if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
|
||||
REP_REGISTERED, REP_LOADED) == REP_REGISTERED) {
|
||||
err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
|
||||
if (err)
|
||||
goto err_reps;
|
||||
}
|
||||
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
|
||||
err = __esw_offloads_load_rep(esw, rep, rep_type);
|
||||
if (err)
|
||||
goto err_reps;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -3277,7 +3285,7 @@ static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
|
|||
esw_vport_destroy_offloads_acl_tables(esw, vport);
|
||||
}
|
||||
|
||||
int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
|
||||
int mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw)
|
||||
{
|
||||
struct mlx5_eswitch_rep *rep;
|
||||
unsigned long i;
|
||||
|
@ -3290,13 +3298,13 @@ int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
|
|||
if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
|
||||
return 0;
|
||||
|
||||
ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK);
|
||||
ret = __esw_offloads_load_rep(esw, rep, REP_IB);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mlx5_esw_for_each_rep(esw, i, rep) {
|
||||
if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED)
|
||||
mlx5_esw_offloads_rep_load(esw, rep->vport);
|
||||
__esw_offloads_load_rep(esw, rep, REP_IB);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -814,7 +814,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
|
|||
if (shared_fdb)
|
||||
for (i = 0; i < ldev->ports; i++)
|
||||
if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
|
||||
mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch);
|
||||
mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
|
||||
}
|
||||
|
||||
static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
|
||||
|
@ -922,7 +922,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
|
|||
mlx5_rescan_drivers_locked(dev0);
|
||||
|
||||
for (i = 0; i < ldev->ports; i++) {
|
||||
err = mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch);
|
||||
err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
@ -933,7 +933,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
|
|||
mlx5_deactivate_lag(ldev);
|
||||
mlx5_lag_add_devices(ldev);
|
||||
for (i = 0; i < ldev->ports; i++)
|
||||
mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch);
|
||||
mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
|
||||
mlx5_core_err(dev0, "Failed to enable lag\n");
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -99,7 +99,7 @@ static int enable_mpesw(struct mlx5_lag *ldev)
|
|||
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
|
||||
mlx5_rescan_drivers_locked(dev0);
|
||||
for (i = 0; i < ldev->ports; i++) {
|
||||
err = mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch);
|
||||
err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
|
||||
if (err)
|
||||
goto err_rescan_drivers;
|
||||
}
|
||||
|
@ -113,7 +113,7 @@ static int enable_mpesw(struct mlx5_lag *ldev)
|
|||
err_add_devices:
|
||||
mlx5_lag_add_devices(ldev);
|
||||
for (i = 0; i < ldev->ports; i++)
|
||||
mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch);
|
||||
mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
|
||||
mlx5_mpesw_metadata_cleanup(ldev);
|
||||
return err;
|
||||
}
|
||||
|
|
|
@ -1680,6 +1680,8 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev)
|
|||
struct devlink *devlink = priv_to_devlink(dev);
|
||||
int err;
|
||||
|
||||
devl_lock(devlink);
|
||||
devl_register(devlink);
|
||||
dev->state = MLX5_DEVICE_STATE_UP;
|
||||
err = mlx5_function_enable(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
|
||||
if (err) {
|
||||
|
@ -1693,27 +1695,21 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev)
|
|||
goto query_hca_caps_err;
|
||||
}
|
||||
|
||||
devl_lock(devlink);
|
||||
devl_register(devlink);
|
||||
|
||||
err = mlx5_devlink_params_register(priv_to_devlink(dev));
|
||||
if (err) {
|
||||
mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err);
|
||||
goto params_reg_err;
|
||||
goto query_hca_caps_err;
|
||||
}
|
||||
|
||||
devl_unlock(devlink);
|
||||
return 0;
|
||||
|
||||
params_reg_err:
|
||||
devl_unregister(devlink);
|
||||
devl_unlock(devlink);
|
||||
query_hca_caps_err:
|
||||
devl_unregister(devlink);
|
||||
devl_unlock(devlink);
|
||||
mlx5_function_disable(dev, true);
|
||||
out:
|
||||
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
|
||||
devl_unregister(devlink);
|
||||
devl_unlock(devlink);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
|
@ -60,6 +60,13 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
|
|||
goto remap_err;
|
||||
}
|
||||
|
||||
/* Peer devlink logic expects to work on unregistered devlink instance. */
|
||||
err = mlx5_core_peer_devlink_set(sf_dev, devlink);
|
||||
if (err) {
|
||||
mlx5_core_warn(mdev, "mlx5_core_peer_devlink_set err=%d\n", err);
|
||||
goto peer_devlink_set_err;
|
||||
}
|
||||
|
||||
if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev))
|
||||
err = mlx5_init_one_light(mdev);
|
||||
else
|
||||
|
@ -69,20 +76,10 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
|
|||
goto init_one_err;
|
||||
}
|
||||
|
||||
err = mlx5_core_peer_devlink_set(sf_dev, devlink);
|
||||
if (err) {
|
||||
mlx5_core_warn(mdev, "mlx5_core_peer_devlink_set err=%d\n", err);
|
||||
goto peer_devlink_set_err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
peer_devlink_set_err:
|
||||
if (mlx5_dev_is_lightweight(sf_dev->mdev))
|
||||
mlx5_uninit_one_light(sf_dev->mdev);
|
||||
else
|
||||
mlx5_uninit_one(sf_dev->mdev);
|
||||
init_one_err:
|
||||
peer_devlink_set_err:
|
||||
iounmap(mdev->iseg);
|
||||
remap_err:
|
||||
mlx5_mdev_uninit(mdev);
|
||||
|
|
|
@ -862,6 +862,7 @@ struct mlx5_cmd_work_ent {
|
|||
void *context;
|
||||
int idx;
|
||||
struct completion handling;
|
||||
struct completion slotted;
|
||||
struct completion done;
|
||||
struct mlx5_cmd *cmd;
|
||||
struct work_struct work;
|
||||
|
|
Loading…
Reference in a new issue