RDMA/hns: Support hns HW stats

Support query hns HW stats for rdma-tool to help debugging.

Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://lore.kernel.org/r/20230816091812.2899366-3-huangjunxian6@hisilicon.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
This commit is contained in:
Chengchang Tang 2023-08-16 17:18:11 +08:00 committed by Leon Romanovsky
parent c4bb187379
commit 5a87279591
4 changed files with 159 additions and 0 deletions

View File

@ -840,6 +840,32 @@ enum hns_roce_device_state {
HNS_ROCE_DEVICE_STATE_UNINIT,
};
enum hns_roce_hw_pkt_stat_index {
HNS_ROCE_HW_RX_RC_PKT_CNT,
HNS_ROCE_HW_RX_UC_PKT_CNT,
HNS_ROCE_HW_RX_UD_PKT_CNT,
HNS_ROCE_HW_RX_XRC_PKT_CNT,
HNS_ROCE_HW_RX_PKT_CNT,
HNS_ROCE_HW_RX_ERR_PKT_CNT,
HNS_ROCE_HW_RX_CNP_PKT_CNT,
HNS_ROCE_HW_TX_RC_PKT_CNT,
HNS_ROCE_HW_TX_UC_PKT_CNT,
HNS_ROCE_HW_TX_UD_PKT_CNT,
HNS_ROCE_HW_TX_XRC_PKT_CNT,
HNS_ROCE_HW_TX_PKT_CNT,
HNS_ROCE_HW_TX_ERR_PKT_CNT,
HNS_ROCE_HW_TX_CNP_PKT_CNT,
HNS_ROCE_HW_TRP_GET_MPT_ERR_PKT_CNT,
HNS_ROCE_HW_TRP_GET_IRRL_ERR_PKT_CNT,
HNS_ROCE_HW_ECN_DB_CNT,
HNS_ROCE_HW_RX_BUF_CNT,
HNS_ROCE_HW_TRP_RX_SOF_CNT,
HNS_ROCE_HW_CQ_CQE_CNT,
HNS_ROCE_HW_CQ_POE_CNT,
HNS_ROCE_HW_CQ_NOTIFY_CNT,
HNS_ROCE_HW_CNT_TOTAL
};
struct hns_roce_hw {
int (*cmq_init)(struct hns_roce_dev *hr_dev);
void (*cmq_exit)(struct hns_roce_dev *hr_dev);
@ -882,6 +908,8 @@ struct hns_roce_hw {
int (*query_cqc)(struct hns_roce_dev *hr_dev, u32 cqn, void *buffer);
int (*query_qpc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer);
int (*query_hw_counter)(struct hns_roce_dev *hr_dev,
u64 *stats, u32 port, int *hw_counters);
const struct ib_device_ops *hns_roce_dev_ops;
const struct ib_device_ops *hns_roce_dev_srq_ops;
};

View File

@ -1613,6 +1613,56 @@ static int hns_roce_query_func_info(struct hns_roce_dev *hr_dev)
return 0;
}
static int hns_roce_hw_v2_query_counter(struct hns_roce_dev *hr_dev,
u64 *stats, u32 port, int *num_counters)
{
#define CNT_PER_DESC 3
struct hns_roce_cmq_desc *desc;
int bd_idx, cnt_idx;
__le64 *cnt_data;
int desc_num;
int ret;
int i;
if (port > hr_dev->caps.num_ports)
return -EINVAL;
desc_num = DIV_ROUND_UP(HNS_ROCE_HW_CNT_TOTAL, CNT_PER_DESC);
desc = kcalloc(desc_num, sizeof(*desc), GFP_KERNEL);
if (!desc)
return -ENOMEM;
for (i = 0; i < desc_num; i++) {
hns_roce_cmq_setup_basic_desc(&desc[i],
HNS_ROCE_OPC_QUERY_COUNTER, true);
if (i != desc_num - 1)
desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
}
ret = hns_roce_cmq_send(hr_dev, desc, desc_num);
if (ret) {
ibdev_err(&hr_dev->ib_dev,
"failed to get counter, ret = %d.\n", ret);
goto err_out;
}
for (i = 0; i < HNS_ROCE_HW_CNT_TOTAL && i < *num_counters; i++) {
bd_idx = i / CNT_PER_DESC;
if (!(desc[bd_idx].flag & HNS_ROCE_CMD_FLAG_NEXT) &&
bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC)
break;
cnt_data = (__le64 *)&desc[bd_idx].data[0];
cnt_idx = i % CNT_PER_DESC;
stats[i] = le64_to_cpu(cnt_data[cnt_idx]);
}
*num_counters = i;
err_out:
kfree(desc);
return ret;
}
static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmq_desc desc;
@ -6582,6 +6632,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.query_cqc = hns_roce_v2_query_cqc,
.query_qpc = hns_roce_v2_query_qpc,
.query_mpt = hns_roce_v2_query_mpt,
.query_hw_counter = hns_roce_hw_v2_query_counter,
.hns_roce_dev_ops = &hns_roce_v2_dev_ops,
.hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops,
};

View File

@ -198,6 +198,7 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_QUERY_HW_VER = 0x8000,
HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001,
HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004,
HNS_ROCE_OPC_QUERY_COUNTER = 0x8206,
HNS_ROCE_OPC_QUERY_PF_RES = 0x8400,
HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401,
HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403,

View File

@ -515,6 +515,83 @@ static void hns_roce_get_fw_ver(struct ib_device *device, char *str)
sub_minor);
}
#define HNS_ROCE_HW_CNT(ename, cname) \
[HNS_ROCE_HW_##ename##_CNT].name = cname
static const struct rdma_stat_desc hns_roce_port_stats_descs[] = {
HNS_ROCE_HW_CNT(RX_RC_PKT, "rx_rc_pkt"),
HNS_ROCE_HW_CNT(RX_UC_PKT, "rx_uc_pkt"),
HNS_ROCE_HW_CNT(RX_UD_PKT, "rx_ud_pkt"),
HNS_ROCE_HW_CNT(RX_XRC_PKT, "rx_xrc_pkt"),
HNS_ROCE_HW_CNT(RX_PKT, "rx_pkt"),
HNS_ROCE_HW_CNT(RX_ERR_PKT, "rx_err_pkt"),
HNS_ROCE_HW_CNT(RX_CNP_PKT, "rx_cnp_pkt"),
HNS_ROCE_HW_CNT(TX_RC_PKT, "tx_rc_pkt"),
HNS_ROCE_HW_CNT(TX_UC_PKT, "tx_uc_pkt"),
HNS_ROCE_HW_CNT(TX_UD_PKT, "tx_ud_pkt"),
HNS_ROCE_HW_CNT(TX_XRC_PKT, "tx_xrc_pkt"),
HNS_ROCE_HW_CNT(TX_PKT, "tx_pkt"),
HNS_ROCE_HW_CNT(TX_ERR_PKT, "tx_err_pkt"),
HNS_ROCE_HW_CNT(TX_CNP_PKT, "tx_cnp_pkt"),
HNS_ROCE_HW_CNT(TRP_GET_MPT_ERR_PKT, "trp_get_mpt_err_pkt"),
HNS_ROCE_HW_CNT(TRP_GET_IRRL_ERR_PKT, "trp_get_irrl_err_pkt"),
HNS_ROCE_HW_CNT(ECN_DB, "ecn_doorbell"),
HNS_ROCE_HW_CNT(RX_BUF, "rx_buffer"),
HNS_ROCE_HW_CNT(TRP_RX_SOF, "trp_rx_sof"),
HNS_ROCE_HW_CNT(CQ_CQE, "cq_cqe"),
HNS_ROCE_HW_CNT(CQ_POE, "cq_poe"),
HNS_ROCE_HW_CNT(CQ_NOTIFY, "cq_notify"),
};
static struct rdma_hw_stats *hns_roce_alloc_hw_port_stats(
struct ib_device *device, u32 port_num)
{
struct hns_roce_dev *hr_dev = to_hr_dev(device);
u32 port = port_num - 1;
if (port > hr_dev->caps.num_ports) {
ibdev_err(device, "invalid port num.\n");
return NULL;
}
if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 ||
hr_dev->is_vf)
return NULL;
return rdma_alloc_hw_stats_struct(hns_roce_port_stats_descs,
ARRAY_SIZE(hns_roce_port_stats_descs),
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int hns_roce_get_hw_stats(struct ib_device *device,
struct rdma_hw_stats *stats,
u32 port, int index)
{
struct hns_roce_dev *hr_dev = to_hr_dev(device);
int num_counters = HNS_ROCE_HW_CNT_TOTAL;
int ret;
if (port == 0)
return 0;
if (port > hr_dev->caps.num_ports)
return -EINVAL;
if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 ||
hr_dev->is_vf)
return -EOPNOTSUPP;
ret = hr_dev->hw->query_hw_counter(hr_dev, stats->value, port,
&num_counters);
if (ret) {
ibdev_err(device, "failed to query hw counter, ret = %d\n",
ret);
return ret;
}
return num_counters;
}
static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
{
struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;
@ -557,6 +634,8 @@ static const struct ib_device_ops hns_roce_dev_ops = {
.query_pkey = hns_roce_query_pkey,
.query_port = hns_roce_query_port,
.reg_user_mr = hns_roce_reg_user_mr,
.alloc_hw_port_stats = hns_roce_alloc_hw_port_stats,
.get_hw_stats = hns_roce_get_hw_stats,
INIT_RDMA_OBJ_SIZE(ib_ah, hns_roce_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_cq, hns_roce_cq, ib_cq),