RDMA/cm: Add tracepoints to track MAD send operations

Surface the operation of MAD exchanges during connection
establishment. Some samples:

[root@klimt ~]# trace-cmd report -F ib_cma
cpus=4
     kworker/0:4-123   [000]    60.677388: icm_send_rep:         local_id=1965336542 remote_id=1096195961 state=REQ_RCVD lap_state=LAP_UNINIT
   kworker/u8:11-391   [002]    60.678808: icm_send_req:         local_id=1982113758 remote_id=0 state=IDLE lap_state=LAP_UNINIT
     kworker/0:4-123   [000]    60.679652: icm_send_rtu:         local_id=1982113758 remote_id=1079418745 state=REP_RCVD lap_state=LAP_UNINIT
            nfsd-1954  [001]    60.691350: icm_send_rep:         local_id=1998890974 remote_id=1129750393 state=MRA_REQ_SENT lap_state=LAP_UNINIT
            nfsd-1954  [003]    62.017931: icm_send_drep:        local_id=1998890974 remote_id=1129750393 state=TIMEWAIT lap_state=LAP_UNINIT

Link: https://lore.kernel.org/r/159767240197.2968.12048458026453596018.stgit@klimt.1015granger.net
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Chuck Lever 2020-08-17 09:53:22 -04:00 committed by Jason Gunthorpe
parent 75874b3d50
commit 8dc105befe
2 changed files with 125 additions and 2 deletions

View File

@ -1563,6 +1563,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
trace_icm_send_req(&cm_id_priv->id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
ret = ib_post_send_mad(cm_id_priv->msg, NULL);
if (ret) {
@ -1610,6 +1611,9 @@ static int cm_issue_rej(struct cm_port *port,
IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
}
trace_icm_issue_rej(
IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg),
IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@ -1961,6 +1965,7 @@ static void cm_dup_req_handler(struct cm_work *work,
}
spin_unlock_irq(&cm_id_priv->lock);
trace_icm_send_dup_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
@ -2287,6 +2292,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
trace_icm_send_rep(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@ -2358,6 +2364,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
trace_icm_send_rtu(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@ -2439,6 +2446,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
goto unlock;
spin_unlock_irq(&cm_id_priv->lock);
trace_icm_send_dup_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
@ -2660,6 +2668,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
trace_icm_send_dreq(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_enter_timewait(cm_id_priv);
@ -2730,6 +2739,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
trace_icm_send_drep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_free_msg(msg);
@ -2779,6 +2789,9 @@ static int cm_issue_drep(struct cm_port *port,
IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
trace_icm_issue_drep(
IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@ -2936,6 +2949,7 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
return -EINVAL;
}
trace_icm_send_rej(&cm_id_priv->id, reason);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_free_msg(msg);
@ -3114,6 +3128,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
msg_response, service_timeout,
private_data, private_data_len);
trace_icm_send_mra(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto error2;
@ -3484,10 +3499,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state == IB_CM_IDLE)
if (cm_id->state == IB_CM_IDLE) {
trace_icm_send_sidr_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
else
} else {
ret = -EINVAL;
}
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@ -3649,6 +3666,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
param);
trace_icm_send_sidr_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_free_msg(msg);

View File

@ -80,6 +80,59 @@ IB_CM_LAP_STATE_LIST
#define show_ib_cm_lap_state(x) \
__print_symbolic(x, IB_CM_LAP_STATE_LIST)
/*
* enum ib_cm_rej_reason, from include/rdma/ib_cm.h
*/
#define IB_CM_REJ_REASON_LIST \
ib_cm_rej_reason(REJ_NO_QP) \
ib_cm_rej_reason(REJ_NO_EEC) \
ib_cm_rej_reason(REJ_NO_RESOURCES) \
ib_cm_rej_reason(REJ_TIMEOUT) \
ib_cm_rej_reason(REJ_UNSUPPORTED) \
ib_cm_rej_reason(REJ_INVALID_COMM_ID) \
ib_cm_rej_reason(REJ_INVALID_COMM_INSTANCE) \
ib_cm_rej_reason(REJ_INVALID_SERVICE_ID) \
ib_cm_rej_reason(REJ_INVALID_TRANSPORT_TYPE) \
ib_cm_rej_reason(REJ_STALE_CONN) \
ib_cm_rej_reason(REJ_RDC_NOT_EXIST) \
ib_cm_rej_reason(REJ_INVALID_GID) \
ib_cm_rej_reason(REJ_INVALID_LID) \
ib_cm_rej_reason(REJ_INVALID_SL) \
ib_cm_rej_reason(REJ_INVALID_TRAFFIC_CLASS) \
ib_cm_rej_reason(REJ_INVALID_HOP_LIMIT) \
ib_cm_rej_reason(REJ_INVALID_PACKET_RATE) \
ib_cm_rej_reason(REJ_INVALID_ALT_GID) \
ib_cm_rej_reason(REJ_INVALID_ALT_LID) \
ib_cm_rej_reason(REJ_INVALID_ALT_SL) \
ib_cm_rej_reason(REJ_INVALID_ALT_TRAFFIC_CLASS) \
ib_cm_rej_reason(REJ_INVALID_ALT_HOP_LIMIT) \
ib_cm_rej_reason(REJ_INVALID_ALT_PACKET_RATE) \
ib_cm_rej_reason(REJ_PORT_CM_REDIRECT) \
ib_cm_rej_reason(REJ_PORT_REDIRECT) \
ib_cm_rej_reason(REJ_INVALID_MTU) \
ib_cm_rej_reason(REJ_INSUFFICIENT_RESP_RESOURCES) \
ib_cm_rej_reason(REJ_CONSUMER_DEFINED) \
ib_cm_rej_reason(REJ_INVALID_RNR_RETRY) \
ib_cm_rej_reason(REJ_DUPLICATE_LOCAL_COMM_ID) \
ib_cm_rej_reason(REJ_INVALID_CLASS_VERSION) \
ib_cm_rej_reason(REJ_INVALID_FLOW_LABEL) \
ib_cm_rej_reason(REJ_INVALID_ALT_FLOW_LABEL) \
ib_cm_rej_reason_end(REJ_VENDOR_OPTION_NOT_SUPPORTED)
#undef ib_cm_rej_reason
#undef ib_cm_rej_reason_end
#define ib_cm_rej_reason(x) TRACE_DEFINE_ENUM(IB_CM_##x);
#define ib_cm_rej_reason_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
IB_CM_REJ_REASON_LIST
#undef ib_cm_rej_reason
#undef ib_cm_rej_reason_end
#define ib_cm_rej_reason(x) { IB_CM_##x, #x },
#define ib_cm_rej_reason_end(x) { IB_CM_##x, #x }
#define show_ib_cm_rej_reason(x) \
__print_symbolic(x, IB_CM_REJ_REASON_LIST)
DECLARE_EVENT_CLASS(icm_id_class,
TP_PROTO(
@ -111,6 +164,56 @@ DECLARE_EVENT_CLASS(icm_id_class,
)
);
#define DEFINE_CM_SEND_EVENT(name) \
DEFINE_EVENT(icm_id_class, \
icm_send_##name, \
TP_PROTO( \
const struct ib_cm_id *cm_id \
), \
TP_ARGS(cm_id))
DEFINE_CM_SEND_EVENT(req);
DEFINE_CM_SEND_EVENT(rep);
DEFINE_CM_SEND_EVENT(dup_req);
DEFINE_CM_SEND_EVENT(dup_rep);
DEFINE_CM_SEND_EVENT(rtu);
DEFINE_CM_SEND_EVENT(mra);
DEFINE_CM_SEND_EVENT(sidr_req);
DEFINE_CM_SEND_EVENT(sidr_rep);
DEFINE_CM_SEND_EVENT(dreq);
DEFINE_CM_SEND_EVENT(drep);
TRACE_EVENT(icm_send_rej,
TP_PROTO(
const struct ib_cm_id *cm_id,
enum ib_cm_rej_reason reason
),
TP_ARGS(cm_id, reason),
TP_STRUCT__entry(
__field(const void *, cm_id)
__field(u32, local_id)
__field(u32, remote_id)
__field(unsigned long, state)
__field(unsigned long, reason)
),
TP_fast_assign(
__entry->cm_id = cm_id;
__entry->local_id = be32_to_cpu(cm_id->local_id);
__entry->remote_id = be32_to_cpu(cm_id->remote_id);
__entry->state = cm_id->state;
__entry->reason = reason;
),
TP_printk("local_id=%u remote_id=%u state=%s reason=%s",
__entry->local_id, __entry->remote_id,
show_ib_cm_state(__entry->state),
show_ib_cm_rej_reason(__entry->reason)
)
);
#define DEFINE_CM_ERR_EVENT(name) \
DEFINE_EVENT(icm_id_class, \
icm_##name##_err, \
@ -172,6 +275,8 @@ DECLARE_EVENT_CLASS(icm_local_class,
), \
TP_ARGS(local_id, remote_id))
DEFINE_CM_LOCAL_EVENT(issue_rej);
DEFINE_CM_LOCAL_EVENT(issue_drep);
DEFINE_CM_LOCAL_EVENT(staleconn_err);
DEFINE_CM_LOCAL_EVENT(no_priv_err);