net/mlx5: Expose SF firmware pages counter

Currently, each core device has VF pages counter which stores number of
fw pages used by its VFs and SFs.

The current design led to a hang when performing firmware reset on DPU,
where the DPU PFs stalled in sriov unload flow due to waiting on release
of SFs pages instead of waiting on only VFs pages.

Thus, Add a separate counter for SF firmware pages, which will prevent
the stall scenario described above.

Fixes: 1958fc2f07 ("net/mlx5: SF, Add auxiliary device driver")
Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
This commit is contained in:
Maher Sanalla 2023-01-22 23:24:56 +02:00 committed by Saeed Mahameed
parent c3bdbaea65
commit 9965bbebae
3 changed files with 3 additions and 1 deletions

View File

@ -246,6 +246,7 @@ void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev)
debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages);
debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.page_counters[MLX5_VF]);
debugfs_create_u32("fw_pages_sfs", 0400, pages, &dev->priv.page_counters[MLX5_SF]);
debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.page_counters[MLX5_HOST_PF]);
debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed);
debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped);

View File

@ -79,7 +79,7 @@ static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_funct
if (!func_id)
return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF;
return MLX5_VF;
return func_id <= mlx5_core_max_vfs(dev) ? MLX5_VF : MLX5_SF;
}
static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function)

View File

@ -576,6 +576,7 @@ struct mlx5_debugfs_entries {
enum mlx5_func_type {
MLX5_PF,
MLX5_VF,
MLX5_SF,
MLX5_HOST_PF,
MLX5_FUNC_TYPE_NUM,
};