RDMA/rxe: Add error messages

This patch adds error and debug messages so that every interaction
with rdma-core through a verbs API call or a completion error return
will generate at least one error message backed up by debug messages
with more detail.

With dynamic debugging one can follow up after seeing an error message
by turning on the appropriate debug messages.

Link: https://lore.kernel.org/r/20230303221623.8053-5-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Bob Pearson 2023-03-03 16:16:24 -06:00 committed by Jason Gunthorpe
parent 9ac01f434a
commit 5bf944f241
5 changed files with 613 additions and 245 deletions

View file

@ -428,6 +428,10 @@ static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
uwc->wc_flags = IB_WC_WITH_IMM;
uwc->byte_len = wqe->dma.length;
}
} else {
if (wqe->status != IB_WC_WR_FLUSH_ERR)
rxe_err_qp(qp, "non-flush error status = %d",
wqe->status);
}
}

View file

@ -80,7 +80,6 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
int rxe_invalidate_mr(struct rxe_qp *qp, u32 key);
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
void rxe_mr_cleanup(struct rxe_pool_elem *elem);
/* rxe_mw.c */

View file

@ -722,19 +722,6 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
return 0;
}
int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct rxe_mr *mr = to_rmr(ibmr);
/* See IBA 10.6.7.2.6 */
if (atomic_read(&mr->num_mw) > 0)
return -EINVAL;
rxe_cleanup(mr);
kfree_rcu(mr);
return 0;
}
void rxe_mr_cleanup(struct rxe_pool_elem *elem)
{
struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);

View file

@ -1151,6 +1151,10 @@ static enum resp_states do_complete(struct rxe_qp *qp,
wc->port_num = qp->attr.port_num;
}
} else {
if (wc->status != IB_WC_WR_FLUSH_ERR)
rxe_err_qp(qp, "non-flush error status = %d",
wc->status);
}
/* have copy for srq and reference for !srq */

File diff suppressed because it is too large Load diff