net/mlx5e: xsk: Fix crash on regular rq reactivation

[ Upstream commit 39646d9bcd ]

When the regular rq is reactivated after the XSK socket is closed
it could be reading stale cqes which eventually corrupts the rq.
This leads to no more traffic being received on the regular rq and a
crash on the next close or deactivation of the rq.

Kal Cuttler Conely reported this issue as a crash on the release
path when the xdpsock sample program is stopped (killed) and restarted
in sequence while traffic is running.

This patch flushes all cqes when during the rq flush. The cqe flushing
is done in the reset state of the rq. mlx5e_rq_to_ready code is moved
into the flush function to allow for this.

Fixes: 082a9edf12 ("net/mlx5e: xsk: Flush RQ on XSK activation to save memory")
Reported-by: Kal Cutter Conley <kal.conley@dectris.com>
Closes: https://lore.kernel.org/xdp-newbies/CAHApi-nUAs4TeFWUDV915CZJo07XVg2Vp63-no7UDfj6wur9nQ@mail.gmail.com
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Dragos Tatulea 2023-04-24 18:19:00 +03:00 committed by Greg Kroah-Hartman
parent 58a113a358
commit 02a84eb2af

View file

@ -1036,7 +1036,23 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_s
return err;
}
static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
static void mlx5e_flush_rq_cq(struct mlx5e_rq *rq)
{
struct mlx5_cqwq *cqwq = &rq->cq.wq;
struct mlx5_cqe64 *cqe;
if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) {
while ((cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq)))
mlx5_cqwq_pop(cqwq);
} else {
while ((cqe = mlx5_cqwq_get_cqe(cqwq)))
mlx5_cqwq_pop(cqwq);
}
mlx5_cqwq_update_db_record(cqwq);
}
int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
{
struct net_device *dev = rq->netdev;
int err;
@ -1046,6 +1062,10 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
return err;
}
mlx5e_free_rx_descs(rq);
mlx5e_flush_rq_cq(rq);
err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
if (err) {
netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
@ -1055,13 +1075,6 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
return 0;
}
int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
{
mlx5e_free_rx_descs(rq);
return mlx5e_rq_to_ready(rq, curr_state);
}
static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
{
struct mlx5_core_dev *mdev = rq->mdev;