net/mlx5e: Disable softirq in mlx5e_activate_rq to avoid race condition

[ Upstream commit 2e642afb61 ]

When the driver activates the channels, it assumes NAPI isn't running
yet. mlx5e_activate_rq posts a NOP WQE to ICOSQ to trigger a hardware
interrupt and start NAPI, which will run mlx5e_alloc_rx_mpwqe and post
UMR WQEs to ICOSQ to be able to receive packets with striding RQ.

Unfortunately, a race condition is possible if NAPI is triggered by
something else (for example, TX) at a bad timing, before
mlx5e_activate_rq finishes. In this case, mlx5e_alloc_rx_mpwqe may post
UMR WQEs to ICOSQ, and with the bad timing, the wqe_info of the first
UMR may be overwritten by the wqe_info of the NOP posted by
mlx5e_activate_rq.

The consequence is that icosq->db.wqe_info[0].num_wqebbs will be changed
from MLX5E_UMR_WQEBBS to 1, disrupting the integrity of the array-based
linked list in wqe_info[]. mlx5e_poll_ico_cq will hang in an infinite
loop after processing wqe_info[0], because after the corruption, the
next item to be processed will be wqe_info[1], which is filled with
zeros, and `sqcc += wi->num_wqebbs` will never move further.

This commit fixes this race condition by using async_icosq to post the
NOP and trigger the interrupt. async_icosq is always protected with a
spinlock, eliminating the race condition.

Fixes: bc77b240b3 ("net/mlx5e: Add fragmented memory support for RX multi packet WQE")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reported-by: Karsten Nielsen <karsten@foo-bar.dk>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Maxim Mikityanskiy 2022-04-15 16:19:15 +03:00 committed by Greg Kroah-Hartman
parent cdd1d5c5e9
commit bea6bd8dad
7 changed files with 31 additions and 11 deletions

View File

@ -730,6 +730,7 @@ struct mlx5e_rq {
u8 wq_type;
u32 rqn;
struct mlx5_core_dev *mdev;
struct mlx5e_channel *channel;
u32 umr_mkey;
struct mlx5e_dma_info wqe_overflow;
@ -1044,6 +1045,9 @@ void mlx5e_close_cq(struct mlx5e_cq *cq);
int mlx5e_open_locked(struct net_device *netdev);
int mlx5e_close_locked(struct net_device *netdev);
void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c);
void mlx5e_trigger_napi_sched(struct napi_struct *napi);
int mlx5e_open_channels(struct mlx5e_priv *priv,
struct mlx5e_channels *chs);
void mlx5e_close_channels(struct mlx5e_channels *chs);

View File

@ -737,6 +737,7 @@ void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c)
if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
mlx5e_ptp_rx_set_fs(c->priv);
mlx5e_activate_rq(&c->rq);
mlx5e_trigger_napi_sched(&c->napi);
}
}

View File

@ -123,6 +123,8 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
xskrq->stats->recover++;
}
mlx5e_trigger_napi_icosq(icosq->channel);
mutex_unlock(&icosq->channel->icosq_recovery_lock);
return 0;
@ -166,6 +168,10 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
mlx5e_activate_rq(rq);
rq->stats->recover++;
if (rq->channel)
mlx5e_trigger_napi_icosq(rq->channel);
else
mlx5e_trigger_napi_sched(rq->cq.napi);
return 0;
out:
clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);

View File

@ -172,6 +172,7 @@ static void mlx5e_activate_trap(struct mlx5e_trap *trap)
{
napi_enable(&trap->napi);
mlx5e_activate_rq(&trap->rq);
mlx5e_trigger_napi_sched(&trap->napi);
}
void mlx5e_deactivate_trap(struct mlx5e_priv *priv)

View File

@ -117,6 +117,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
goto err_remove_pool;
mlx5e_activate_xsk(c);
mlx5e_trigger_napi_icosq(c);
/* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
* any Fill Ring entries at the setup stage.

View File

@ -64,6 +64,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
rq->clock = &mdev->clock;
rq->icosq = &c->icosq;
rq->ix = c->ix;
rq->channel = c;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
rq->xdpsq = &c->rq_xdpsq;
@ -179,10 +180,6 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c)
mlx5e_reporter_icosq_resume_recovery(c);
/* TX queue is created active. */
spin_lock_bh(&c->async_icosq_lock);
mlx5e_trigger_irq(&c->async_icosq);
spin_unlock_bh(&c->async_icosq_lock);
}
void mlx5e_deactivate_xsk(struct mlx5e_channel *c)

View File

@ -477,6 +477,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
rq->clock = &mdev->clock;
rq->icosq = &c->icosq;
rq->ix = c->ix;
rq->channel = c;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
rq->xdpsq = &c->rq_xdpsq;
@ -1070,13 +1071,6 @@ err_free_rq:
void mlx5e_activate_rq(struct mlx5e_rq *rq)
{
set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
if (rq->icosq) {
mlx5e_trigger_irq(rq->icosq);
} else {
local_bh_disable();
napi_schedule(rq->cq.napi);
local_bh_enable();
}
}
void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
@ -2218,6 +2212,20 @@ static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu)
return 0;
}
void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c)
{
spin_lock_bh(&c->async_icosq_lock);
mlx5e_trigger_irq(&c->async_icosq);
spin_unlock_bh(&c->async_icosq_lock);
}
void mlx5e_trigger_napi_sched(struct napi_struct *napi)
{
local_bh_disable();
napi_schedule(napi);
local_bh_enable();
}
static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam,
@ -2299,6 +2307,8 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
mlx5e_activate_xsk(c);
mlx5e_trigger_napi_icosq(c);
}
static void mlx5e_deactivate_channel(struct mlx5e_channel *c)