v6.2 merge window pull request

Usual size of updates, a new driver a most of the bulk focusing on rxe:
 
 - Usual typos, style, and language updates
 
 - Driver updates for mlx5, irdma, siw, rts, srp, hfi1, hns, erdma, mlx4, srp
 
 - Lots of RXE updates
   * Improve reply error handling for bad MR operations
   * Code tidying
   * Debug printing uses common loggers
   * Remove half implemented RD related stuff
   * Support IBA's recently defined Atomic Write and Flush operations
 
 - erdma support for atomic operations
 
 - New driver "mana" for Ethernet HW available in Azure VMs. This driver
   only supports DPDK
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQRRRCHOFoQz/8F5bUaFwuHvBreFYQUCY5eIggAKCRCFwuHvBreF
 YeX7AP9+l5Y9J48OmK7y/YgADNo9g05agXp3E8EuUDmBU+PREgEAigdWaJVf2oea
 IctVja0ApLW5W+wsFt8Qh+V4PMiYTAM=
 =Q5V+
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "Usual size of updates, a new driver, and most of the bulk focusing on
  rxe:

   - Usual typos, style, and language updates

   - Driver updates for mlx5, irdma, siw, rts, srp, hfi1, hns, erdma,
     mlx4, srp

   - Lots of RXE updates:
      * Improve reply error handling for bad MR operations
      * Code tidying
      * Debug printing uses common loggers
      * Remove half implemented RD related stuff
      * Support IBA's recently defined Atomic Write and Flush operations

   - erdma support for atomic operations

   - New driver 'mana' for Ethernet HW available in Azure VMs. This
     driver only supports DPDK"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (122 commits)
  IB/IPoIB: Fix queue count inconsistency for PKEY child interfaces
  RDMA: Add missed netdev_put() for the netdevice_tracker
  RDMA/rxe: Enable RDMA FLUSH capability for rxe device
  RDMA/cm: Make QP FLUSHABLE for supported device
  RDMA/rxe: Implement flush completion
  RDMA/rxe: Implement flush execution in responder side
  RDMA/rxe: Implement RC RDMA FLUSH service in requester side
  RDMA/rxe: Extend rxe packet format to support flush
  RDMA/rxe: Allow registering persistent flag for pmem MR only
  RDMA/rxe: Extend rxe user ABI to support flush
  RDMA: Extend RDMA kernel verbs ABI to support flush
  RDMA: Extend RDMA user ABI to support flush
  RDMA/rxe: Fix incorrect responder length checking
  RDMA/rxe: Fix oops with zero length reads
  RDMA/mlx5: Remove not-used IB_FLOW_SPEC_IB define
  RDMA/hns: Fix XRC caps on HIP08
  RDMA/hns: Fix error code of CMD
  RDMA/hns: Fix page size cap from firmware
  RDMA/hns: Fix PBL page MTR find
  RDMA/hns: Fix AH attr queried by query_qp
  ...
This commit is contained in:
Linus Torvalds 2022-12-14 09:27:13 -08:00
commit ab425febda
99 changed files with 3622 additions and 898 deletions

View File

@ -13806,6 +13806,15 @@ F: drivers/scsi/smartpqi/smartpqi*.[ch]
F: include/linux/cciss*.h
F: include/uapi/linux/cciss*.h
MICROSOFT MANA RDMA DRIVER
M: Long Li <longli@microsoft.com>
M: Ajay Sharma <sharmaajay@microsoft.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/mana/
F: include/net/mana
F: include/uapi/rdma/mana-abi.h
MICROSOFT SURFACE AGGREGATOR TABLET-MODE SWITCH
M: Maximilian Luz <luzmaximilian@gmail.com>
L: platform-driver-x86@vger.kernel.org

View File

@ -78,6 +78,7 @@ config INFINIBAND_VIRT_DMA
def_bool !HIGHMEM
if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
if !UML
source "drivers/infiniband/hw/bnxt_re/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/efa/Kconfig"
@ -85,6 +86,7 @@ source "drivers/infiniband/hw/erdma/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/hns/Kconfig"
source "drivers/infiniband/hw/irdma/Kconfig"
source "drivers/infiniband/hw/mana/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
source "drivers/infiniband/hw/mthca/Kconfig"
@ -94,6 +96,7 @@ source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
endif # !UML
source "drivers/infiniband/sw/rxe/Kconfig"
source "drivers/infiniband/sw/siw/Kconfig"
endif

View File

@ -1422,7 +1422,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
*vlan_id = vlan_dev_vlan_id(ndev);
} else {
/* If the netdev is upper device and if it's lower
* device is vlan device, consider vlan id of the
* device is vlan device, consider vlan id of
* the lower vlan device for this gid entry.
*/
netdev_walk_all_lower_dev_rcu(attr->ndev,

View File

@ -4094,9 +4094,18 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX | IB_QP_PORT;
qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
if (cm_id_priv->responder_resources)
if (cm_id_priv->responder_resources) {
struct ib_device *ib_dev = cm_id_priv->id.device;
u64 support_flush = ib_dev->attrs.device_cap_flags &
(IB_DEVICE_FLUSH_GLOBAL | IB_DEVICE_FLUSH_PERSISTENT);
u32 flushable = support_flush ?
(IB_ACCESS_FLUSH_GLOBAL |
IB_ACCESS_FLUSH_PERSISTENT) : 0;
qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_ATOMIC;
IB_ACCESS_REMOTE_ATOMIC |
flushable;
}
qp_attr->pkey_index = cm_id_priv->av.pkey_index;
if (cm_id_priv->av.port)
qp_attr->port_num = cm_id_priv->av.port->port_num;

View File

@ -47,7 +47,7 @@ MODULE_LICENSE("Dual BSD/GPL");
#define CMA_CM_RESPONSE_TIMEOUT 20
#define CMA_MAX_CM_RETRIES 15
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
#define CMA_IBOE_PACKET_LIFETIME 18
#define CMA_IBOE_PACKET_LIFETIME 16
#define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP
static const char * const cma_events[] = {

View File

@ -2159,14 +2159,16 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
return 0;
}
if (old_ndev)
netdev_tracker_free(ndev, &pdata->netdev_tracker);
if (ndev)
dev_hold(ndev);
netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC);
rcu_assign_pointer(pdata->netdev, ndev);
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
add_ndev_hash(pdata);
if (old_ndev)
dev_put(old_ndev);
__dev_put(old_ndev);
return 0;
}
@ -2199,7 +2201,7 @@ static void free_netdevs(struct ib_device *ib_dev)
* comparisons after the put
*/
rcu_assign_pointer(pdata->netdev, NULL);
dev_put(ndev);
netdev_put(ndev, &pdata->netdev_tracker);
}
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
}
@ -2851,8 +2853,8 @@ err:
static void __exit ib_core_cleanup(void)
{
roce_gid_mgmt_cleanup();
nldev_exit();
rdma_nl_unregister(RDMA_NL_LS);
nldev_exit();
unregister_pernet_device(&rdma_dev_net_ops);
unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
ib_sa_cleanup();

View File

@ -59,9 +59,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_qp_info *qp_info,
struct trace_event_raw_ib_mad_send_template *entry)
{
u16 pkey;
struct ib_device *dev = qp_info->port_priv->device;
u32 pnum = qp_info->port_priv->port_num;
struct ib_ud_wr *wr = &mad_send_wr->send_wr;
struct rdma_ah_attr attr = {};
@ -69,8 +66,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
/* These are common */
entry->sl = attr.sl;
ib_query_pkey(dev, pnum, wr->pkey_index, &pkey);
entry->pkey = pkey;
entry->rqpn = wr->remote_qpn;
entry->rqkey = wr->remote_qkey;
entry->dlid = rdma_ah_get_dlid(&attr);

View File

@ -513,7 +513,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
/* In create_qp() port is not set yet */
if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
return -EINVAL;
return -EMSGSIZE;
ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
if (ret)
@ -552,7 +552,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_cm_id *cm_id = &id_priv->id;
if (port && port != cm_id->port_num)
return 0;
return -EAGAIN;
if (cm_id->port_num &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
@ -894,6 +894,8 @@ static int fill_stat_counter_qps(struct sk_buff *msg,
int ret = 0;
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
if (!table_attr)
return -EMSGSIZE;
rt = &counter->device->res[RDMA_RESTRACK_QP];
xa_lock(&rt->xa);
@ -1041,6 +1043,10 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
if (!nlh) {
err = -EMSGSIZE;
goto err_free;
}
err = fill_dev_info(msg, device);
if (err)
@ -1126,7 +1132,7 @@ static int _nldev_get_dumpit(struct ib_device *device,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, NLM_F_MULTI);
if (fill_dev_info(skb, device)) {
if (!nlh || fill_dev_info(skb, device)) {
nlmsg_cancel(skb, nlh);
goto out;
}
@ -1185,6 +1191,10 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
if (!nlh) {
err = -EMSGSIZE;
goto err_free;
}
err = fill_port_info(msg, device, port, sock_net(skb->sk));
if (err)
@ -1246,7 +1256,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
RDMA_NLDEV_CMD_PORT_GET),
0, NLM_F_MULTI);
if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) {
nlmsg_cancel(skb, nlh);
goto out;
}
@ -1288,6 +1298,10 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
0, 0);
if (!nlh) {
ret = -EMSGSIZE;
goto err_free;
}
ret = fill_res_info(msg, device);
if (ret)
@ -1319,7 +1333,7 @@ static int _nldev_res_get_dumpit(struct ib_device *device,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
0, NLM_F_MULTI);
if (fill_res_info(skb, device)) {
if (!nlh || fill_res_info(skb, device)) {
nlmsg_cancel(skb, nlh);
goto out;
}
@ -1454,7 +1468,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_OP(nlh->nlmsg_type)),
0, 0);
if (fill_nldev_handle(msg, device)) {
if (!nlh || fill_nldev_handle(msg, device)) {
ret = -EMSGSIZE;
goto err_free;
}
@ -1533,7 +1547,7 @@ static int res_get_common_dumpit(struct sk_buff *skb,
RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
0, NLM_F_MULTI);
if (fill_nldev_handle(skb, device)) {
if (!nlh || fill_nldev_handle(skb, device)) {
ret = -EMSGSIZE;
goto err;
}
@ -1795,6 +1809,10 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_GET_CHARDEV),
0, 0);
if (!nlh) {
err = -EMSGSIZE;
goto out_nlmsg;
}
data.nl_msg = msg;
err = ib_get_client_nl_info(ibdev, client_name, &data);
@ -1852,6 +1870,10 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_SYS_GET),
0, 0);
if (!nlh) {
nlmsg_free(msg);
return -EMSGSIZE;
}
err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
(u8)ib_devices_shared_netns);
@ -2032,7 +2054,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_STAT_SET),
0, 0);
if (fill_nldev_handle(msg, device) ||
if (!nlh || fill_nldev_handle(msg, device) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
ret = -EMSGSIZE;
goto err_free_msg;
@ -2101,6 +2123,10 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_STAT_SET),
0, 0);
if (!nlh) {
ret = -EMSGSIZE;
goto err_fill;
}
cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
@ -2171,7 +2197,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
RDMA_NLDEV_CMD_STAT_GET),
0, 0);
if (fill_nldev_handle(msg, device) ||
if (!nlh || fill_nldev_handle(msg, device) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
ret = -EMSGSIZE;
goto err_msg;
@ -2259,6 +2285,10 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_STAT_GET),
0, 0);
if (!nlh) {
ret = -EMSGSIZE;
goto err_msg;
}
ret = rdma_counter_get_mode(device, port, &mode, &mask);
if (ret)
@ -2391,7 +2421,7 @@ static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
0, 0);
ret = -EMSGSIZE;
if (fill_nldev_handle(msg, device) ||
if (!nlh || fill_nldev_handle(msg, device) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
goto err_msg;

View File

@ -343,8 +343,6 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
rt = &dev->res[res->type];
old = xa_erase(&rt->xa, res->id);
if (res->type == RDMA_RESTRACK_MR)
return;
WARN_ON(old != res);
out:

View File

@ -1213,6 +1213,9 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num,
p->port_num = port_num;
kobject_init(&p->kobj, &port_type);
if (device->port_data && is_full_dev)
device->port_data[port_num].sysfs = p;
cur_group = p->groups_list;
ret = alloc_port_table_group("gids", &p->groups[0], p->attrs_list,
attr->gid_tbl_len, show_port_gid);
@ -1258,9 +1261,6 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num,
}
list_add_tail(&p->kobj.entry, &coredev->port_list);
if (device->port_data && is_full_dev)
device->port_data[port_num].sysfs = p;
return p;
err_groups:
@ -1268,6 +1268,8 @@ err_groups:
err_del:
kobject_del(&p->kobj);
err_put:
if (device->port_data && is_full_dev)
device->port_data[port_num].sysfs = NULL;
kobject_put(&p->kobj);
return ERR_PTR(ret);
}
@ -1276,14 +1278,17 @@ static void destroy_port(struct ib_core_device *coredev, struct ib_port *port)
{
bool is_full_dev = &port->ibdev->coredev == coredev;
if (port->ibdev->port_data &&
port->ibdev->port_data[port->port_num].sysfs == port)
port->ibdev->port_data[port->port_num].sysfs = NULL;
list_del(&port->kobj.entry);
if (is_full_dev)
sysfs_remove_groups(&port->kobj, port->ibdev->ops.port_groups);
sysfs_remove_groups(&port->kobj, port->groups_list);
kobject_del(&port->kobj);
if (port->ibdev->port_data &&
port->ibdev->port_data[port->port_num].sysfs == port)
port->ibdev->port_data[port->port_num].sysfs = NULL;
kobject_put(&port->kobj);
}

View File

@ -163,7 +163,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)(
UVERBS_ATTR_CREATE_QP_SRQ_HANDLE))
return -EINVAL;
/* send_cq is optinal */
/* send_cq is optional */
if (cap.max_send_wr) {
send_cq = uverbs_attr_get_obj(attrs,
UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE);

View File

@ -4,6 +4,7 @@ obj-$(CONFIG_INFINIBAND_QIB) += qib/
obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/
obj-$(CONFIG_INFINIBAND_EFA) += efa/
obj-$(CONFIG_INFINIBAND_IRDMA) += irdma/
obj-$(CONFIG_MANA_INFINIBAND) += mana/
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/

View File

@ -124,6 +124,7 @@ struct erdma_devattr {
u32 fw_version;
unsigned char peer_addr[ETH_ALEN];
unsigned long cap_flags;
int numa_node;
enum erdma_cc_alg cc;
@ -189,6 +190,7 @@ struct erdma_dev {
struct net_device *netdev;
struct pci_dev *pdev;
struct notifier_block netdev_nb;
struct workqueue_struct *reflush_wq;
resource_size_t func_bar_addr;
resource_size_t func_bar_len;
@ -218,7 +220,7 @@ struct erdma_dev {
DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT);
/*
* We provide max 496 uContexts that each has one SQ normal Db,
* and one directWQE db
* and one directWQE db.
*/
DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT);

View File

@ -64,6 +64,8 @@ static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = {
[ERDMA_OP_REG_MR] = IB_WC_REG_MR,
[ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV,
[ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ,
[ERDMA_OP_ATOMIC_CAS] = IB_WC_COMP_SWAP,
[ERDMA_OP_ATOMIC_FAD] = IB_WC_FETCH_ADD,
};
static const struct {

View File

@ -145,6 +145,7 @@ enum CMDQ_RDMA_OPCODE {
CMDQ_OPCODE_MODIFY_QP = 3,
CMDQ_OPCODE_CREATE_CQ = 4,
CMDQ_OPCODE_DESTROY_CQ = 5,
CMDQ_OPCODE_REFLUSH = 6,
CMDQ_OPCODE_REG_MR = 8,
CMDQ_OPCODE_DEREG_MR = 9
};
@ -224,8 +225,7 @@ struct erdma_cmdq_create_cq_req {
/* regmr cfg1 */
#define ERDMA_CMD_REGMR_PD_MASK GENMASK(31, 12)
#define ERDMA_CMD_REGMR_TYPE_MASK GENMASK(7, 6)
#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 2)
#define ERDMA_CMD_REGMR_ACC_MODE_MASK GENMASK(1, 0)
#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 1)
/* regmr cfg2 */
#define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27)
@ -302,8 +302,16 @@ struct erdma_cmdq_destroy_qp_req {
u32 qpn;
};
struct erdma_cmdq_reflush_req {
u64 hdr;
u32 qpn;
u32 sq_pi;
u32 rq_pi;
};
/* cap qword 0 definition */
#define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40)
#define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24)
#define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16)
#define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0)
@ -315,6 +323,10 @@ struct erdma_cmdq_destroy_qp_req {
#define ERDMA_NQP_PER_QBLOCK 1024
enum {
ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7,
};
#define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0)
/* CQE hdr */
@ -340,9 +352,9 @@ struct erdma_cqe {
};
struct erdma_sge {
__aligned_le64 laddr;
__aligned_le64 addr;
__le32 length;
__le32 lkey;
__le32 key;
};
/* Receive Queue Element */
@ -370,8 +382,7 @@ struct erdma_rqe {
#define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0)
/* REG MR attrs */
#define ERDMA_SQE_MR_MODE_MASK GENMASK(1, 0)
#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 2)
#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 1)
#define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6)
#define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12)
@ -410,6 +421,16 @@ struct erdma_readreq_sqe {
__le32 rsvd;
};
struct erdma_atomic_sqe {
__le64 hdr;
__le64 rsvd;
__le64 fetchadd_swap_data;
__le64 cmp_data;
struct erdma_sge remote;
struct erdma_sge sgl;
};
struct erdma_reg_mr_sqe {
__le64 hdr;
__le64 addr;
@ -469,7 +490,9 @@ enum erdma_opcode {
ERDMA_OP_REG_MR = 14,
ERDMA_OP_LOCAL_INV = 15,
ERDMA_OP_READ_WITH_INV = 16,
ERDMA_NUM_OPCODES = 17,
ERDMA_OP_ATOMIC_CAS = 17,
ERDMA_OP_ATOMIC_FAD = 18,
ERDMA_NUM_OPCODES = 19,
ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1
};

View File

@ -374,6 +374,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev)
dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
dev->attrs.max_mr = dev->attrs.max_qp << 1;
dev->attrs.max_cq = dev->attrs.max_qp << 1;
dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0);
dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
dev->attrs.max_ord = ERDMA_MAX_ORD;
@ -520,13 +521,22 @@ static int erdma_ib_device_add(struct pci_dev *pdev)
u64_to_ether_addr(mac, dev->attrs.peer_addr);
dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND,
WQ_UNBOUND_MAX_ACTIVE);
if (!dev->reflush_wq) {
ret = -ENOMEM;
goto err_alloc_workqueue;
}
ret = erdma_device_register(dev);
if (ret)
goto err_out;
goto err_register;
return 0;
err_out:
err_register:
destroy_workqueue(dev->reflush_wq);
err_alloc_workqueue:
xa_destroy(&dev->qp_xa);
xa_destroy(&dev->cq_xa);
@ -542,6 +552,7 @@ static void erdma_ib_device_remove(struct pci_dev *pdev)
unregister_netdevice_notifier(&dev->netdev_nb);
ib_unregister_device(&dev->ibdev);
destroy_workqueue(dev->reflush_wq);
erdma_res_cb_free(dev);
xa_destroy(&dev->qp_xa);
xa_destroy(&dev->cq_xa);

View File

@ -120,6 +120,7 @@ static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
enum erdma_qp_attr_mask mask)
{
bool need_reflush = false;
int drop_conn, ret = 0;
if (!mask)
@ -135,6 +136,7 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
ret = erdma_modify_qp_state_to_rts(qp, attrs, mask);
} else if (attrs->state == ERDMA_QP_STATE_ERROR) {
qp->attrs.state = ERDMA_QP_STATE_ERROR;
need_reflush = true;
if (qp->cep) {
erdma_cep_put(qp->cep);
qp->cep = NULL;
@ -145,17 +147,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
case ERDMA_QP_STATE_RTS:
drop_conn = 0;
if (attrs->state == ERDMA_QP_STATE_CLOSING) {
if (attrs->state == ERDMA_QP_STATE_CLOSING ||
attrs->state == ERDMA_QP_STATE_TERMINATE ||
attrs->state == ERDMA_QP_STATE_ERROR) {
ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
drop_conn = 1;
} else if (attrs->state == ERDMA_QP_STATE_TERMINATE) {
qp->attrs.state = ERDMA_QP_STATE_TERMINATE;
ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
drop_conn = 1;
} else if (attrs->state == ERDMA_QP_STATE_ERROR) {
ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
qp->attrs.state = ERDMA_QP_STATE_ERROR;
drop_conn = 1;
need_reflush = true;
}
if (drop_conn)
@ -180,6 +177,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
break;
}
if (need_reflush && !ret && rdma_is_kernel_res(&qp->ibqp.res)) {
qp->flags |= ERDMA_QP_IN_FLUSHING;
mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
usecs_to_jiffies(100));
}
return ret;
}
@ -285,15 +288,16 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
u32 idx = *pi & (qp->attrs.sq_size - 1);
enum ib_wr_opcode op = send_wr->opcode;
struct erdma_atomic_sqe *atomic_sqe;
struct erdma_readreq_sqe *read_sqe;
struct erdma_reg_mr_sqe *regmr_sge;
struct erdma_write_sqe *write_sqe;
struct erdma_send_sqe *send_sqe;
struct ib_rdma_wr *rdma_wr;
struct erdma_mr *mr;
struct erdma_sge *sge;
__le32 *length_field;
struct erdma_mr *mr;
u64 wqe_hdr, *entry;
struct ib_sge *sge;
u32 attrs;
int ret;
@ -360,9 +364,9 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
qp->attrs.sq_size, SQEBB_SHIFT);
sge->addr = rdma_wr->remote_addr;
sge->lkey = rdma_wr->rkey;
sge->length = send_wr->sg_list[0].length;
sge->addr = cpu_to_le64(rdma_wr->remote_addr);
sge->key = cpu_to_le32(rdma_wr->rkey);
sge->length = cpu_to_le32(send_wr->sg_list[0].length);
wqe_size = sizeof(struct erdma_readreq_sqe) +
send_wr->num_sge * sizeof(struct ib_sge);
@ -397,8 +401,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
regmr_sge->addr = cpu_to_le64(mr->ibmr.iova);
regmr_sge->length = cpu_to_le32(mr->ibmr.length);
regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key);
attrs = FIELD_PREP(ERDMA_SQE_MR_MODE_MASK, 0) |
FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
attrs = FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK,
mr->mem.mtt_nents);
@ -424,6 +427,35 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey);
wqe_size = sizeof(struct erdma_reg_mr_sqe);
goto out;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
atomic_sqe = (struct erdma_atomic_sqe *)entry;
if (op == IB_WR_ATOMIC_CMP_AND_SWP) {
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
ERDMA_OP_ATOMIC_CAS);
atomic_sqe->fetchadd_swap_data =
cpu_to_le64(atomic_wr(send_wr)->swap);
atomic_sqe->cmp_data =
cpu_to_le64(atomic_wr(send_wr)->compare_add);
} else {
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
ERDMA_OP_ATOMIC_FAD);
atomic_sqe->fetchadd_swap_data =
cpu_to_le64(atomic_wr(send_wr)->compare_add);
}
sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
qp->attrs.sq_size, SQEBB_SHIFT);
sge->addr = cpu_to_le64(atomic_wr(send_wr)->remote_addr);
sge->key = cpu_to_le32(atomic_wr(send_wr)->rkey);
sge++;
sge->addr = cpu_to_le64(send_wr->sg_list[0].addr);
sge->key = cpu_to_le32(send_wr->sg_list[0].lkey);
sge->length = cpu_to_le32(send_wr->sg_list[0].length);
wqe_size = sizeof(*atomic_sqe);
goto out;
default:
return -EOPNOTSUPP;
}
@ -498,6 +530,10 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
}
spin_unlock_irqrestore(&qp->lock, flags);
if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
usecs_to_jiffies(100));
return ret;
}
@ -551,5 +587,10 @@ int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
}
spin_unlock_irqrestore(&qp->lock, flags);
if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
usecs_to_jiffies(100));
return ret;
}

View File

@ -118,8 +118,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access) |
FIELD_PREP(ERDMA_CMD_REGMR_ACC_MODE_MASK, 0);
FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
ilog2(mr->mem.page_size)) |
FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) |
@ -289,6 +288,10 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
attr->max_mw = dev->attrs.max_mw;
attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
attr->atomic_cap = IB_ATOMIC_GLOB;
attr->fw_ver = dev->attrs.fw_version;
if (dev->netdev)
@ -376,6 +379,21 @@ int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
return 0;
}
static void erdma_flush_worker(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct erdma_qp *qp =
container_of(dwork, struct erdma_qp, reflush_dwork);
struct erdma_cmdq_reflush_req req;
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_REFLUSH);
req.qpn = QP_ID(qp);
req.sq_pi = qp->kern_qp.sq_pi;
req.rq_pi = qp->kern_qp.rq_pi;
erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL);
}
static int erdma_qp_validate_cap(struct erdma_dev *dev,
struct ib_qp_init_attr *attrs)
{
@ -732,6 +750,7 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
qp->attrs.max_send_sge = attrs->cap.max_send_sge;
qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
qp->attrs.state = ERDMA_QP_STATE_IDLE;
INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
ret = create_qp_cmd(dev, qp);
if (ret)
@ -1025,6 +1044,8 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
up_write(&qp->state_lock);
cancel_delayed_work_sync(&qp->reflush_dwork);
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_DESTROY_QP);
req.qpn = QP_ID(qp);

View File

@ -71,16 +71,18 @@ struct erdma_pd {
#define ERDMA_MR_INLINE_MTT 0
#define ERDMA_MR_INDIRECT_MTT 1
#define ERDMA_MR_ACC_LR BIT(0)
#define ERDMA_MR_ACC_LW BIT(1)
#define ERDMA_MR_ACC_RR BIT(2)
#define ERDMA_MR_ACC_RW BIT(3)
#define ERDMA_MR_ACC_RA BIT(0)
#define ERDMA_MR_ACC_LR BIT(1)
#define ERDMA_MR_ACC_LW BIT(2)
#define ERDMA_MR_ACC_RR BIT(3)
#define ERDMA_MR_ACC_RW BIT(4)
static inline u8 to_erdma_access_flags(int access)
{
return (access & IB_ACCESS_REMOTE_READ ? ERDMA_MR_ACC_RR : 0) |
(access & IB_ACCESS_LOCAL_WRITE ? ERDMA_MR_ACC_LW : 0) |
(access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0);
(access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0) |
(access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0);
}
struct erdma_mem {
@ -171,6 +173,10 @@ enum erdma_qp_attr_mask {
ERDMA_QP_ATTR_MPA = (1 << 7)
};
enum erdma_qp_flags {
ERDMA_QP_IN_FLUSHING = (1 << 0),
};
struct erdma_qp_attrs {
enum erdma_qp_state state;
enum erdma_cc_alg cc; /* Congestion control algorithm */
@ -195,6 +201,9 @@ struct erdma_qp {
struct erdma_cep *cep;
struct rw_semaphore state_lock;
unsigned long flags;
struct delayed_work reflush_dwork;
union {
struct erdma_kqp kern_qp;
struct erdma_uqp user_qp;

View File

@ -177,6 +177,8 @@ out:
for (node = 0; node < node_affinity.num_possible_nodes; node++)
hfi1_per_node_cntr[node] = 1;
pci_dev_put(dev);
return 0;
}

View File

@ -112,7 +112,7 @@ static int hfi1_caps_get(char *buffer, const struct kernel_param *kp)
cap_mask &= ~HFI1_CAP_LOCKED_SMASK;
cap_mask |= ((cap_mask & HFI1_CAP_K2U) << HFI1_CAP_USER_SHIFT);
return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask);
return sysfs_emit(buffer, "0x%lx\n", cap_mask);
}
struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)

View File

@ -1743,6 +1743,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
if (!dd->platform_config.data) {
dd_dev_err(dd, "%s: Missing config file\n", __func__);
ret = -EINVAL;
goto bail;
}
ptr = (u32 *)dd->platform_config.data;
@ -1751,6 +1752,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
ptr++;
if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) {
dd_dev_err(dd, "%s: Bad config file\n", __func__);
ret = -EINVAL;
goto bail;
}
@ -1774,6 +1776,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
if (file_length > dd->platform_config.size) {
dd_dev_info(dd, "%s:File claims to be larger than read size\n",
__func__);
ret = -EINVAL;
goto bail;
} else if (file_length < dd->platform_config.size) {
dd_dev_info(dd,
@ -1794,6 +1797,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
dd_dev_err(dd, "%s: Failed validation at offset %ld\n",
__func__, (ptr - (u32 *)
dd->platform_config.data));
ret = -EINVAL;
goto bail;
}
@ -1837,6 +1841,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
__func__, table_type,
(ptr - (u32 *)
dd->platform_config.data));
ret = -EINVAL;
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table = ptr;
@ -1856,6 +1861,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
__func__, table_type,
(ptr -
(u32 *)dd->platform_config.data));
ret = -EINVAL;
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table_metadata =

View File

@ -2437,9 +2437,9 @@ struct opa_port_data_counters_msg {
__be64 port_vl_xmit_wait_data;
__be64 port_vl_rcv_bubble;
__be64 port_vl_mark_fecn;
} vls[0];
} vls[];
/* array size defined by #bits set in vl_select_mask*/
} port[1]; /* array size defined by #ports in attribute modifier */
} port;
};
struct opa_port_error_counters64_msg {
@ -2470,9 +2470,9 @@ struct opa_port_error_counters64_msg {
u8 reserved3[7];
struct _vls_ectrs {
__be64 port_vl_xmit_discards;
} vls[0];
} vls[];
/* array size defined by #bits set in vl_select_mask */
} port[1]; /* array size defined by #ports in attribute modifier */
} port;
};
struct opa_port_error_info_msg {
@ -2543,7 +2543,7 @@ struct opa_port_error_info_msg {
u8 error_info;
} __packed fm_config_ei;
__u32 reserved9;
} port[1]; /* actual array size defined by #ports in attr modifier */
} port;
};
/* opa_port_error_info_msg error_info_select_mask bit definitions */
@ -2966,7 +2966,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
}
/* Sanity check */
response_data_size = struct_size(req, port[0].vls, num_vls);
response_data_size = struct_size(req, port.vls, num_vls);
if (response_data_size > sizeof(pmp->data)) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@ -2986,7 +2986,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
rsp = &req->port[0];
rsp = &req->port;
memset(rsp, 0, sizeof(*rsp));
rsp->port_number = port;
@ -3182,7 +3182,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
response_data_size = struct_size(req, port[0].vls, num_vls);
response_data_size = struct_size(req, port.vls, num_vls);
if (response_data_size > sizeof(pmp->data)) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@ -3201,7 +3201,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
rsp = &req->port[0];
rsp = &req->port;
ibp = to_iport(ibdev, port_num);
ppd = ppd_from_ibp(ibp);
@ -3340,7 +3340,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
u64 reg;
req = (struct opa_port_error_info_msg *)pmp->data;
rsp = &req->port[0];
rsp = &req->port;
num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
@ -3590,7 +3590,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
u32 error_info_select;
req = (struct opa_port_error_info_msg *)pmp->data;
rsp = &req->port[0];
rsp = &req->port;
num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));

View File

@ -216,7 +216,7 @@ static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx)
* right now.
*/
set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state);
netif_napi_add_weight(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi);
rc = msix_netdev_request_rcd_irq(rxq->rcd);
if (rc)
goto bail_context_irq_failure;

View File

@ -202,6 +202,7 @@ struct hns_roce_ucontext {
struct list_head page_list;
struct mutex page_mutex;
struct hns_user_mmap_entry *db_mmap_entry;
u32 config;
};
struct hns_roce_pd {
@ -334,6 +335,7 @@ struct hns_roce_wq {
u32 head;
u32 tail;
void __iomem *db_reg;
u32 ext_sge_cnt;
};
struct hns_roce_sge {
@ -635,6 +637,7 @@ struct hns_roce_qp {
struct list_head rq_node; /* all recv qps are on a list */
struct list_head sq_node; /* all send qps are on a list */
struct hns_user_mmap_entry *dwqe_mmap_entry;
u32 config;
};
struct hns_roce_ib_iboe {

View File

@ -192,7 +192,6 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
unsigned int *sge_idx, u32 msg_len)
{
struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev;
unsigned int ext_sge_sz = qp->sq.max_gs * HNS_ROCE_SGE_SIZE;
unsigned int left_len_in_pg;
unsigned int idx = *sge_idx;
unsigned int i = 0;
@ -200,7 +199,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
void *addr;
void *dseg;
if (msg_len > ext_sge_sz) {
if (msg_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE) {
ibdev_err(ibdev,
"no enough extended sge space for inline data.\n");
return -EINVAL;
@ -1274,6 +1273,30 @@ static void update_cmdq_status(struct hns_roce_dev *hr_dev)
hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR;
}
static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
{
struct hns_roce_cmd_errcode errcode_table[] = {
{CMD_EXEC_SUCCESS, 0},
{CMD_NO_AUTH, -EPERM},
{CMD_NOT_EXIST, -EOPNOTSUPP},
{CMD_CRQ_FULL, -EXFULL},
{CMD_NEXT_ERR, -ENOSR},
{CMD_NOT_EXEC, -ENOTBLK},
{CMD_PARA_ERR, -EINVAL},
{CMD_RESULT_ERR, -ERANGE},
{CMD_TIMEOUT, -ETIME},
{CMD_HILINK_ERR, -ENOLINK},
{CMD_INFO_ILLEGAL, -ENXIO},
{CMD_INVALID, -EBADR},
};
u16 i;
for (i = 0; i < ARRAY_SIZE(errcode_table); i++)
if (desc_ret == errcode_table[i].return_status)
return errcode_table[i].errno;
return -EIO;
}
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc *desc, int num)
{
@ -1319,7 +1342,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
dev_err_ratelimited(hr_dev->dev,
"Cmdq IO error, opcode = 0x%x, return = 0x%x.\n",
desc->opcode, desc_ret);
ret = -EIO;
ret = hns_roce_cmd_err_convert_errno(desc_ret);
}
} else {
/* FW/HW reset or incorrect number of desc */
@ -2024,13 +2047,14 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW |
HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR |
HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL | HNS_ROCE_CAP_FLAG_XRC;
HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
caps->flags |= HNS_ROCE_CAP_FLAG_STASH |
HNS_ROCE_CAP_FLAG_DIRECT_WQE;
HNS_ROCE_CAP_FLAG_DIRECT_WQE |
HNS_ROCE_CAP_FLAG_XRC;
caps->max_sq_inline = HNS_ROCE_V3_MAX_SQ_INLINE;
} else {
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
@ -2342,6 +2366,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM);
caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM);
if (!(caps->page_size_cap & PAGE_SIZE))
caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
return 0;
}
@ -2631,31 +2658,124 @@ static void free_dip_list(struct hns_roce_dev *hr_dev)
spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
}
static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_pd *hr_pd;
struct ib_pd *pd;
hr_pd = kzalloc(sizeof(*hr_pd), GFP_KERNEL);
if (ZERO_OR_NULL_PTR(hr_pd))
return NULL;
pd = &hr_pd->ibpd;
pd->device = ibdev;
if (hns_roce_alloc_pd(pd, NULL)) {
ibdev_err(ibdev, "failed to create pd for free mr.\n");
kfree(hr_pd);
return NULL;
}
free_mr->rsv_pd = to_hr_pd(pd);
free_mr->rsv_pd->ibpd.device = &hr_dev->ib_dev;
free_mr->rsv_pd->ibpd.uobject = NULL;
free_mr->rsv_pd->ibpd.__internal_mr = NULL;
atomic_set(&free_mr->rsv_pd->ibpd.usecnt, 0);
return pd;
}
static struct ib_cq *free_mr_init_cq(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
struct ib_device *ibdev = &hr_dev->ib_dev;
struct ib_cq_init_attr cq_init_attr = {};
struct hns_roce_cq *hr_cq;
struct ib_cq *cq;
cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
hr_cq = kzalloc(sizeof(*hr_cq), GFP_KERNEL);
if (ZERO_OR_NULL_PTR(hr_cq))
return NULL;
cq = &hr_cq->ib_cq;
cq->device = ibdev;
if (hns_roce_create_cq(cq, &cq_init_attr, NULL)) {
ibdev_err(ibdev, "failed to create cq for free mr.\n");
kfree(hr_cq);
return NULL;
}
free_mr->rsv_cq = to_hr_cq(cq);
free_mr->rsv_cq->ib_cq.device = &hr_dev->ib_dev;
free_mr->rsv_cq->ib_cq.uobject = NULL;
free_mr->rsv_cq->ib_cq.comp_handler = NULL;
free_mr->rsv_cq->ib_cq.event_handler = NULL;
free_mr->rsv_cq->ib_cq.cq_context = NULL;
atomic_set(&free_mr->rsv_cq->ib_cq.usecnt, 0);
return cq;
}
static int free_mr_init_qp(struct hns_roce_dev *hr_dev, struct ib_cq *cq,
struct ib_qp_init_attr *init_attr, int i)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_qp *hr_qp;
struct ib_qp *qp;
int ret;
hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
if (ZERO_OR_NULL_PTR(hr_qp))
return -ENOMEM;
qp = &hr_qp->ibqp;
qp->device = ibdev;
ret = hns_roce_create_qp(qp, init_attr, NULL);
if (ret) {
ibdev_err(ibdev, "failed to create qp for free mr.\n");
kfree(hr_qp);
return ret;
}
free_mr->rsv_qp[i] = hr_qp;
free_mr->rsv_qp[i]->ibqp.recv_cq = cq;
free_mr->rsv_qp[i]->ibqp.send_cq = cq;
return 0;
}
static void free_mr_exit(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
int ret;
struct ib_qp *qp;
int i;
for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
if (free_mr->rsv_qp[i]) {
ret = ib_destroy_qp(free_mr->rsv_qp[i]);
if (ret)
ibdev_err(&hr_dev->ib_dev,
"failed to destroy qp in free mr.\n");
qp = &free_mr->rsv_qp[i]->ibqp;
hns_roce_v2_destroy_qp(qp, NULL);
kfree(free_mr->rsv_qp[i]);
free_mr->rsv_qp[i] = NULL;
}
}
if (free_mr->rsv_cq) {
ib_destroy_cq(free_mr->rsv_cq);
hns_roce_destroy_cq(&free_mr->rsv_cq->ib_cq, NULL);
kfree(free_mr->rsv_cq);
free_mr->rsv_cq = NULL;
}
if (free_mr->rsv_pd) {
ib_dealloc_pd(free_mr->rsv_pd);
hns_roce_dealloc_pd(&free_mr->rsv_pd->ibpd, NULL);
kfree(free_mr->rsv_pd);
free_mr->rsv_pd = NULL;
}
}
@ -2664,55 +2784,46 @@ static int free_mr_alloc_res(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
struct ib_device *ibdev = &hr_dev->ib_dev;
struct ib_cq_init_attr cq_init_attr = {};
struct ib_qp_init_attr qp_init_attr = {};
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
int ret;
int i;
pd = ib_alloc_pd(ibdev, 0);
if (IS_ERR(pd)) {
ibdev_err(ibdev, "failed to create pd for free mr.\n");
return PTR_ERR(pd);
}
free_mr->rsv_pd = pd;
pd = free_mr_init_pd(hr_dev);
if (!pd)
return -ENOMEM;
cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_init_attr);
if (IS_ERR(cq)) {
ibdev_err(ibdev, "failed to create cq for free mr.\n");
ret = PTR_ERR(cq);
goto create_failed;
cq = free_mr_init_cq(hr_dev);
if (!cq) {
ret = -ENOMEM;
goto create_failed_cq;
}
free_mr->rsv_cq = cq;
qp_init_attr.qp_type = IB_QPT_RC;
qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
qp_init_attr.send_cq = free_mr->rsv_cq;
qp_init_attr.recv_cq = free_mr->rsv_cq;
qp_init_attr.send_cq = cq;
qp_init_attr.recv_cq = cq;
for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
qp_init_attr.cap.max_send_wr = HNS_ROCE_FREE_MR_USED_SQWQE_NUM;
qp_init_attr.cap.max_send_sge = HNS_ROCE_FREE_MR_USED_SQSGE_NUM;
qp_init_attr.cap.max_recv_wr = HNS_ROCE_FREE_MR_USED_RQWQE_NUM;
qp_init_attr.cap.max_recv_sge = HNS_ROCE_FREE_MR_USED_RQSGE_NUM;
qp = ib_create_qp(free_mr->rsv_pd, &qp_init_attr);
if (IS_ERR(qp)) {
ibdev_err(ibdev, "failed to create qp for free mr.\n");
ret = PTR_ERR(qp);
goto create_failed;
}
free_mr->rsv_qp[i] = qp;
ret = free_mr_init_qp(hr_dev, cq, &qp_init_attr, i);
if (ret)
goto create_failed_qp;
}
return 0;
create_failed:
free_mr_exit(hr_dev);
create_failed_qp:
hns_roce_destroy_cq(cq, NULL);
kfree(cq);
create_failed_cq:
hns_roce_dealloc_pd(pd, NULL);
kfree(pd);
return ret;
}
@ -2728,14 +2839,17 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
int mask;
int ret;
hr_qp = to_hr_qp(free_mr->rsv_qp[sl_num]);
hr_qp = to_hr_qp(&free_mr->rsv_qp[sl_num]->ibqp);
hr_qp->free_mr_en = 1;
hr_qp->ibqp.device = ibdev;
hr_qp->ibqp.qp_type = IB_QPT_RC;
mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS;
attr->qp_state = IB_QPS_INIT;
attr->port_num = 1;
attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
IB_QPS_INIT);
if (ret) {
ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
ret);
@ -2756,7 +2870,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num);
ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
IB_QPS_RTR);
hr_dev->loop_idc = loopback;
if (ret) {
ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n",
@ -2770,7 +2885,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
attr->sq_psn = HNS_ROCE_FREE_MR_USED_PSN;
attr->retry_cnt = HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT;
attr->timeout = HNS_ROCE_FREE_MR_USED_QP_TIMEOUT;
ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_RTR,
IB_QPS_RTS);
if (ret)
ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n",
ret);
@ -3186,7 +3302,8 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
int i, count;
count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
ARRAY_SIZE(pages), &pbl_ba);
min_t(int, ARRAY_SIZE(pages), mr->npages),
&pbl_ba);
if (count < 1) {
ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n",
count);
@ -3414,7 +3531,7 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
mutex_lock(&free_mr->mutex);
for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
hr_qp = to_hr_qp(free_mr->rsv_qp[i]);
hr_qp = free_mr->rsv_qp[i];
ret = free_mr_post_send_lp_wqe(hr_qp);
if (ret) {
@ -3429,7 +3546,7 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
end = msecs_to_jiffies(HNS_ROCE_V2_FREE_MR_TIMEOUT) + jiffies;
while (cqe_cnt) {
npolled = hns_roce_v2_poll_cq(free_mr->rsv_cq, cqe_cnt, wc);
npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc);
if (npolled < 0) {
ibdev_err(ibdev,
"failed to poll cqe for free mr, remain %d cqe.\n",
@ -5375,6 +5492,8 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
rdma_ah_set_sl(&qp_attr->ah_attr,
hr_reg_read(&context, QPC_SL));
rdma_ah_set_port_num(&qp_attr->ah_attr, hr_qp->port + 1);
rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH);
grh->flow_label = hr_reg_read(&context, QPC_FL);
grh->sgid_index = hr_reg_read(&context, QPC_GMV_IDX);
grh->hop_limit = hr_reg_read(&context, QPC_HOPLIMIT);
@ -5468,7 +5587,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
return ret;
}
static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);

View File

@ -272,6 +272,11 @@ enum hns_roce_cmd_return_status {
CMD_OTHER_ERR = 0xff
};
struct hns_roce_cmd_errcode {
enum hns_roce_cmd_return_status return_status;
int errno;
};
enum hns_roce_sgid_type {
GID_TYPE_FLAG_ROCE_V1 = 0,
GID_TYPE_FLAG_ROCE_V2_IPV4,
@ -1327,9 +1332,9 @@ struct hns_roce_link_table {
#define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2)
struct hns_roce_v2_free_mr {
struct ib_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM];
struct ib_cq *rsv_cq;
struct ib_pd *rsv_pd;
struct hns_roce_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM];
struct hns_roce_cq *rsv_cq;
struct hns_roce_pd *rsv_pd;
struct mutex mutex;
};
@ -1459,6 +1464,8 @@ struct hns_roce_sccc_clr_done {
__le32 rsv[5];
};
int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2],
void __iomem *dest)
{

View File

@ -354,10 +354,11 @@ static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx)
static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
int ret;
struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
struct hns_roce_ib_alloc_ucontext_resp resp = {};
struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
struct hns_roce_ib_alloc_ucontext_resp resp = {};
struct hns_roce_ib_alloc_ucontext ucmd = {};
int ret;
if (!hr_dev->active)
return -EAGAIN;
@ -365,6 +366,19 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
resp.qp_tab_size = hr_dev->caps.num_qps;
resp.srq_tab_size = hr_dev->caps.num_srqs;
ret = ib_copy_from_udata(&ucmd, udata,
min(udata->inlen, sizeof(ucmd)));
if (ret)
return ret;
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS;
if (context->config & HNS_ROCE_EXSGE_FLAGS) {
resp.config |= HNS_ROCE_RSP_EXSGE_FLAGS;
resp.max_inline_data = hr_dev->caps.max_sq_inline;
}
ret = hns_roce_uar_alloc(hr_dev, &context->uar);
if (ret)
goto error_fail_uar_alloc;

View File

@ -392,10 +392,10 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
return &mr->ibmr;
err_key:
free_mr_key(hr_dev, mr);
err_pbl:
free_mr_pbl(hr_dev, mr);
err_key:
free_mr_key(hr_dev, mr);
err_free:
kfree(mr);
return ERR_PTR(ret);

View File

@ -476,38 +476,109 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
return 0;
}
static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp)
static u32 get_max_inline_data(struct hns_roce_dev *hr_dev,
struct ib_qp_cap *cap)
{
/* GSI/UD QP only has extended sge */
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD)
return qp->sq.max_gs;
if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE;
if (cap->max_inline_data) {
cap->max_inline_data = roundup_pow_of_two(cap->max_inline_data);
return min(cap->max_inline_data,
hr_dev->caps.max_sq_inline);
}
return 0;
}
static void update_inline_data(struct hns_roce_qp *hr_qp,
struct ib_qp_cap *cap)
{
u32 sge_num = hr_qp->sq.ext_sge_cnt;
if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
if (!(hr_qp->ibqp.qp_type == IB_QPT_GSI ||
hr_qp->ibqp.qp_type == IB_QPT_UD))
sge_num = max((u32)HNS_ROCE_SGE_IN_WQE, sge_num);
cap->max_inline_data = max(cap->max_inline_data,
sge_num * HNS_ROCE_SGE_SIZE);
}
hr_qp->max_inline_data = cap->max_inline_data;
}
static u32 get_sge_num_from_max_send_sge(bool is_ud_or_gsi,
u32 max_send_sge)
{
unsigned int std_sge_num;
unsigned int min_sge;
std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
min_sge = is_ud_or_gsi ? 1 : 0;
return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) :
min_sge;
}
static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi,
u32 max_inline_data)
{
unsigned int inline_sge;
inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
/*
* if max_inline_data less than
* HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
* In addition to ud's mode, no need to extend sge.
*/
if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE)
inline_sge = 0;
return inline_sge;
}
static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap)
{
bool is_ud_or_gsi = (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
hr_qp->ibqp.qp_type == IB_QPT_UD);
unsigned int std_sge_num;
u32 inline_ext_sge = 0;
u32 ext_wqe_sge_cnt;
u32 total_sge_cnt;
u32 wqe_sge_cnt;
cap->max_inline_data = get_max_inline_data(hr_dev, cap);
hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud_or_gsi,
cap->max_send_sge);
hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
inline_ext_sge = max(ext_wqe_sge_cnt,
get_sge_num_from_max_inl_data(is_ud_or_gsi,
cap->max_inline_data));
hr_qp->sq.ext_sge_cnt = inline_ext_sge ?
roundup_pow_of_two(inline_ext_sge) : 0;
wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp);
hr_qp->sq.max_gs = max(1U, (hr_qp->sq.ext_sge_cnt + std_sge_num));
hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
ext_wqe_sge_cnt = hr_qp->sq.ext_sge_cnt;
} else {
hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
hr_qp->sq.ext_sge_cnt = hr_qp->sq.max_gs;
}
/* If the number of extended sge is not zero, they MUST use the
* space of HNS_HW_PAGE_SIZE at least.
*/
if (wqe_sge_cnt) {
total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt);
if (ext_wqe_sge_cnt) {
total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * ext_wqe_sge_cnt);
hr_qp->sge.sge_cnt = max(total_sge_cnt,
(u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
}
update_inline_data(hr_qp, cap);
}
static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
@ -556,6 +627,7 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
hr_qp->sq.wqe_cnt = cnt;
cap->max_send_sge = hr_qp->sq.max_gs;
return 0;
}
@ -986,13 +1058,9 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
struct hns_roce_ib_create_qp *ucmd)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_ucontext *uctx;
int ret;
if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline)
init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline;
hr_qp->max_inline_data = init_attr->cap.max_inline_data;
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
else
@ -1015,12 +1083,17 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
return ret;
}
uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext,
ibucontext);
hr_qp->config = uctx->config;
ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
if (ret)
ibdev_err(ibdev,
"failed to set user SQ size, ret = %d.\n",
ret);
} else {
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
if (ret)
ibdev_err(ibdev,

View File

@ -566,21 +566,37 @@ static void irdma_set_mw_bind_wqe_gen_1(__le64 *wqe,
/**
* irdma_copy_inline_data_gen_1 - Copy inline data to wqe
* @dest: pointer to wqe
* @src: pointer to inline data
* @len: length of inline data to copy
* @wqe: pointer to wqe
* @sge_list: table of pointers to inline data
* @num_sges: Total inline data length
* @polarity: compatibility parameter
*/
static void irdma_copy_inline_data_gen_1(u8 *dest, u8 *src, u32 len,
u8 polarity)
static void irdma_copy_inline_data_gen_1(u8 *wqe, struct ib_sge *sge_list,
u32 num_sges, u8 polarity)
{
if (len <= 16) {
memcpy(dest, src, len);
} else {
memcpy(dest, src, 16);
src += 16;
dest = dest + 32;
memcpy(dest, src, len - 16);
u32 quanta_bytes_remaining = 16;
int i;
for (i = 0; i < num_sges; i++) {
u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr;
u32 sge_len = sge_list[i].length;
while (sge_len) {
u32 bytes_copied;
bytes_copied = min(sge_len, quanta_bytes_remaining);
memcpy(wqe, cur_sge, bytes_copied);
wqe += bytes_copied;
cur_sge += bytes_copied;
quanta_bytes_remaining -= bytes_copied;
sge_len -= bytes_copied;
if (!quanta_bytes_remaining) {
/* Remaining inline bytes reside after hdr */
wqe += 16;
quanta_bytes_remaining = 32;
}
}
}
}
@ -612,35 +628,51 @@ static void irdma_set_mw_bind_wqe(__le64 *wqe,
/**
* irdma_copy_inline_data - Copy inline data to wqe
* @dest: pointer to wqe
* @src: pointer to inline data
* @len: length of inline data to copy
* @wqe: pointer to wqe
* @sge_list: table of pointers to inline data
* @num_sges: number of SGE's
* @polarity: polarity of wqe valid bit
*/
static void irdma_copy_inline_data(u8 *dest, u8 *src, u32 len, u8 polarity)
static void irdma_copy_inline_data(u8 *wqe, struct ib_sge *sge_list,
u32 num_sges, u8 polarity)
{
u8 inline_valid = polarity << IRDMA_INLINE_VALID_S;
u32 copy_size;
u32 quanta_bytes_remaining = 8;
bool first_quanta = true;
int i;
dest += 8;
if (len <= 8) {
memcpy(dest, src, len);
return;
}
*((u64 *)dest) = *((u64 *)src);
len -= 8;
src += 8;
dest += 24; /* point to additional 32 byte quanta */
while (len) {
copy_size = len < 31 ? len : 31;
memcpy(dest, src, copy_size);
*(dest + 31) = inline_valid;
len -= copy_size;
dest += 32;
src += copy_size;
wqe += 8;
for (i = 0; i < num_sges; i++) {
u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr;
u32 sge_len = sge_list[i].length;
while (sge_len) {
u32 bytes_copied;
bytes_copied = min(sge_len, quanta_bytes_remaining);
memcpy(wqe, cur_sge, bytes_copied);
wqe += bytes_copied;
cur_sge += bytes_copied;
quanta_bytes_remaining -= bytes_copied;
sge_len -= bytes_copied;
if (!quanta_bytes_remaining) {
quanta_bytes_remaining = 31;
/* Remaining inline bytes reside after hdr */
if (first_quanta) {
first_quanta = false;
wqe += 16;
} else {
*wqe = inline_valid;
wqe++;
}
}
}
}
if (!first_quanta && quanta_bytes_remaining < 31)
*(wqe + quanta_bytes_remaining) = inline_valid;
}
/**
@ -679,20 +711,27 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
struct irdma_post_sq_info *info, bool post_sq)
{
__le64 *wqe;
struct irdma_inline_rdma_write *op_info;
struct irdma_rdma_write *op_info;
u64 hdr = 0;
u32 wqe_idx;
bool read_fence = false;
u32 i, total_size = 0;
u16 quanta;
info->push_wqe = qp->push_db ? true : false;
op_info = &info->op.inline_rdma_write;
op_info = &info->op.rdma_write;
if (op_info->len > qp->max_inline_data)
if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges))
return -EINVAL;
quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len);
wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
for (i = 0; i < op_info->num_lo_sges; i++)
total_size += op_info->lo_sg_list[i].length;
if (unlikely(total_size > qp->max_inline_data))
return -EINVAL;
quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size);
wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
info);
if (!wqe)
return -ENOMEM;
@ -705,7 +744,7 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) |
FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) |
FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) |
FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) |
FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) |
FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? 1 : 0) |
@ -719,7 +758,8 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
set_64bit_val(wqe, 0,
FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len,
qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->lo_sg_list,
op_info->num_lo_sges,
qp->swqe_polarity);
dma_wmb(); /* make sure WQE is populated before valid bit is set */
@ -745,20 +785,27 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp,
struct irdma_post_sq_info *info, bool post_sq)
{
__le64 *wqe;
struct irdma_post_inline_send *op_info;
struct irdma_post_send *op_info;
u64 hdr;
u32 wqe_idx;
bool read_fence = false;
u32 i, total_size = 0;
u16 quanta;
info->push_wqe = qp->push_db ? true : false;
op_info = &info->op.inline_send;
op_info = &info->op.send;
if (op_info->len > qp->max_inline_data)
if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges))
return -EINVAL;
quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len);
wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
for (i = 0; i < op_info->num_sges; i++)
total_size += op_info->sg_list[i].length;
if (unlikely(total_size > qp->max_inline_data))
return -EINVAL;
quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size);
wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
info);
if (!wqe)
return -ENOMEM;
@ -773,7 +820,7 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp,
hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) |
FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) |
FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) |
FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) |
FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG,
(info->imm_data_valid ? 1 : 0)) |
FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) |
@ -789,8 +836,8 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp,
if (info->imm_data_valid)
set_64bit_val(wqe, 0,
FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len,
qp->swqe_polarity);
qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->sg_list,
op_info->num_sges, qp->swqe_polarity);
dma_wmb(); /* make sure WQE is populated before valid bit is set */
@ -1002,11 +1049,10 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
__le64 *cqe;
struct irdma_qp_uk *qp;
struct irdma_ring *pring = NULL;
u32 wqe_idx, q_type;
u32 wqe_idx;
int ret_code;
bool move_cq_head = true;
u8 polarity;
u8 op_type;
bool ext_valid;
__le64 *ext_cqe;
@ -1074,7 +1120,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
info->ud_vlan_valid = false;
}
q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3);
info->push_dropped = (bool)FIELD_GET(IRDMACQ_PSHDROP, qword3);
info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3);
@ -1113,8 +1159,9 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
}
wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3);
info->qp_handle = (irdma_qp_handle)(unsigned long)qp;
info->op_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
if (q_type == IRDMA_CQE_QTYPE_RQ) {
if (info->q_type == IRDMA_CQE_QTYPE_RQ) {
u32 array_idx;
array_idx = wqe_idx / qp->rq_wqe_size_multiplier;
@ -1134,10 +1181,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0);
if (info->imm_valid)
info->op_type = IRDMA_OP_TYPE_REC_IMM;
else
info->op_type = IRDMA_OP_TYPE_REC;
if (qword3 & IRDMACQ_STAG) {
info->stag_invalid_set = true;
info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2);
@ -1195,17 +1238,18 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
sw_wqe = qp->sq_base[tail].elem;
get_64bit_val(sw_wqe, 24,
&wqe_qword);
op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, wqe_qword);
info->op_type = op_type;
info->op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE,
wqe_qword);
IRDMA_RING_SET_TAIL(qp->sq_ring,
tail + qp->sq_wrtrk_array[tail].quanta);
if (op_type != IRDMAQP_OP_NOP) {
if (info->op_type != IRDMAQP_OP_NOP) {
info->wr_id = qp->sq_wrtrk_array[tail].wrid;
info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len;
break;
}
} while (1);
if (op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR)
if (info->op_type == IRDMA_OP_TYPE_BIND_MW &&
info->minor_err == FLUSH_PROT_ERR)
info->minor_err = FLUSH_MW_BIND_ERR;
qp->sq_flush_seen = true;
if (!IRDMA_RING_MORE_WORK(qp->sq_ring))

View File

@ -173,14 +173,6 @@ struct irdma_post_send {
u32 ah_id;
};
struct irdma_post_inline_send {
void *data;
u32 len;
u32 qkey;
u32 dest_qp;
u32 ah_id;
};
struct irdma_post_rq_info {
u64 wr_id;
struct ib_sge *sg_list;
@ -193,12 +185,6 @@ struct irdma_rdma_write {
struct ib_sge rem_addr;
};
struct irdma_inline_rdma_write {
void *data;
u32 len;
struct ib_sge rem_addr;
};
struct irdma_rdma_read {
struct ib_sge *lo_sg_list;
u32 num_lo_sges;
@ -241,8 +227,6 @@ struct irdma_post_sq_info {
struct irdma_rdma_read rdma_read;
struct irdma_bind_window bind_window;
struct irdma_inv_local_stag inv_local_stag;
struct irdma_inline_rdma_write inline_rdma_write;
struct irdma_post_inline_send inline_send;
} op;
};
@ -261,6 +245,7 @@ struct irdma_cq_poll_info {
u16 ud_vlan;
u8 ud_smac[6];
u8 op_type;
u8 q_type;
bool stag_invalid_set:1; /* or L_R_Key set */
bool push_dropped:1;
bool error:1;
@ -291,7 +276,8 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
bool post_sq);
struct irdma_wqe_uk_ops {
void (*iw_copy_inline_data)(u8 *dest, u8 *src, u32 len, u8 polarity);
void (*iw_copy_inline_data)(u8 *dest, struct ib_sge *sge_list,
u32 num_sges, u8 polarity);
u16 (*iw_inline_data_size_to_quanta)(u32 data_size);
void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct ib_sge *sge,
u8 valid);

View File

@ -2591,6 +2591,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
sw_wqe = qp->sq_base[wqe_idx].elem;
get_64bit_val(sw_wqe, 24, &wqe_qword);
cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE);
cmpl->cpi.q_type = IRDMA_CQE_QTYPE_SQ;
/* remove the SQ WR by moving SQ tail*/
IRDMA_RING_SET_TAIL(*sq_ring,
sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta);
@ -2629,6 +2630,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx];
cmpl->cpi.op_type = IRDMA_OP_TYPE_REC;
cmpl->cpi.q_type = IRDMA_CQE_QTYPE_RQ;
/* remove the RQ WR by moving RQ tail */
IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1);
ibdev_dbg(iwqp->iwrcq->ibcq.device,

View File

@ -63,36 +63,6 @@ static int irdma_query_device(struct ib_device *ibdev,
return 0;
}
/**
* irdma_get_eth_speed_and_width - Get IB port speed and width from netdev speed
* @link_speed: netdev phy link speed
* @active_speed: IB port speed
* @active_width: IB port width
*/
static void irdma_get_eth_speed_and_width(u32 link_speed, u16 *active_speed,
u8 *active_width)
{
if (link_speed <= SPEED_1000) {
*active_width = IB_WIDTH_1X;
*active_speed = IB_SPEED_SDR;
} else if (link_speed <= SPEED_10000) {
*active_width = IB_WIDTH_1X;
*active_speed = IB_SPEED_FDR10;
} else if (link_speed <= SPEED_20000) {
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_DDR;
} else if (link_speed <= SPEED_25000) {
*active_width = IB_WIDTH_1X;
*active_speed = IB_SPEED_EDR;
} else if (link_speed <= SPEED_40000) {
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_FDR10;
} else {
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_EDR;
}
}
/**
* irdma_query_port - get port attributes
* @ibdev: device pointer from stack
@ -120,8 +90,9 @@ static int irdma_query_port(struct ib_device *ibdev, u32 port,
props->state = IB_PORT_DOWN;
props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
}
irdma_get_eth_speed_and_width(SPEED_100000, &props->active_speed,
&props->active_width);
ib_get_eth_speed(ibdev, port, &props->active_speed,
&props->active_width);
if (rdma_protocol_roce(ibdev, 1)) {
props->gid_tbl_len = 32;
@ -1242,6 +1213,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
av->attrs = attr->ah_attr;
rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid);
rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid);
av->net_type = rdma_gid_attr_network_type(sgid_attr);
if (av->net_type == RDMA_NETWORK_IPV6) {
__be32 *daddr =
av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32;
@ -2358,9 +2330,10 @@ static bool irdma_check_mr_contiguous(struct irdma_pble_alloc *palloc,
* @rf: RDMA PCI function
* @iwmr: mr pointer for this memory registration
* @use_pbles: flag if to use pble's
* @lvl_1_only: request only level 1 pble if true
*/
static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr,
bool use_pbles)
bool use_pbles, bool lvl_1_only)
{
struct irdma_pbl *iwpbl = &iwmr->iwpbl;
struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
@ -2371,7 +2344,7 @@ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr,
if (use_pbles) {
status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt,
false);
lvl_1_only);
if (status)
return status;
@ -2414,16 +2387,10 @@ static int irdma_handle_q_mem(struct irdma_device *iwdev,
bool ret = true;
pg_size = iwmr->page_size;
err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles);
err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, true);
if (err)
return err;
if (use_pbles && palloc->level != PBLE_LEVEL_1) {
irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
iwpbl->pbl_allocated = false;
return -ENOMEM;
}
if (use_pbles)
arr = palloc->level1.addr;
@ -2899,7 +2866,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
case IRDMA_MEMREG_TYPE_MEM:
use_pbles = (iwmr->page_cnt != 1);
err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles);
err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false);
if (err)
goto error;
@ -3165,30 +3132,20 @@ static int irdma_post_send(struct ib_qp *ibqp,
info.stag_to_inv = ib_wr->ex.invalidate_rkey;
}
if (ib_wr->send_flags & IB_SEND_INLINE) {
info.op.inline_send.data = (void *)(unsigned long)
ib_wr->sg_list[0].addr;
info.op.inline_send.len = ib_wr->sg_list[0].length;
if (iwqp->ibqp.qp_type == IB_QPT_UD ||
iwqp->ibqp.qp_type == IB_QPT_GSI) {
ah = to_iwah(ud_wr(ib_wr)->ah);
info.op.inline_send.ah_id = ah->sc_ah.ah_info.ah_idx;
info.op.inline_send.qkey = ud_wr(ib_wr)->remote_qkey;
info.op.inline_send.dest_qp = ud_wr(ib_wr)->remote_qpn;
}
err = irdma_uk_inline_send(ukqp, &info, false);
} else {
info.op.send.num_sges = ib_wr->num_sge;
info.op.send.sg_list = ib_wr->sg_list;
if (iwqp->ibqp.qp_type == IB_QPT_UD ||
iwqp->ibqp.qp_type == IB_QPT_GSI) {
ah = to_iwah(ud_wr(ib_wr)->ah);
info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx;
info.op.send.qkey = ud_wr(ib_wr)->remote_qkey;
info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn;
}
err = irdma_uk_send(ukqp, &info, false);
info.op.send.num_sges = ib_wr->num_sge;
info.op.send.sg_list = ib_wr->sg_list;
if (iwqp->ibqp.qp_type == IB_QPT_UD ||
iwqp->ibqp.qp_type == IB_QPT_GSI) {
ah = to_iwah(ud_wr(ib_wr)->ah);
info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx;
info.op.send.qkey = ud_wr(ib_wr)->remote_qkey;
info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn;
}
if (ib_wr->send_flags & IB_SEND_INLINE)
err = irdma_uk_inline_send(ukqp, &info, false);
else
err = irdma_uk_send(ukqp, &info, false);
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
if (ukqp->qp_caps & IRDMA_WRITE_WITH_IMM) {
@ -3205,22 +3162,15 @@ static int irdma_post_send(struct ib_qp *ibqp,
else
info.op_type = IRDMA_OP_TYPE_RDMA_WRITE;
if (ib_wr->send_flags & IB_SEND_INLINE) {
info.op.inline_rdma_write.data = (void *)(uintptr_t)ib_wr->sg_list[0].addr;
info.op.inline_rdma_write.len =
ib_wr->sg_list[0].length;
info.op.inline_rdma_write.rem_addr.addr =
rdma_wr(ib_wr)->remote_addr;
info.op.inline_rdma_write.rem_addr.lkey =
rdma_wr(ib_wr)->rkey;
info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
info.op.rdma_write.lo_sg_list = ib_wr->sg_list;
info.op.rdma_write.rem_addr.addr =
rdma_wr(ib_wr)->remote_addr;
info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
if (ib_wr->send_flags & IB_SEND_INLINE)
err = irdma_uk_inline_rdma_write(ukqp, &info, false);
} else {
info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
info.op.rdma_write.rem_addr.addr = rdma_wr(ib_wr)->remote_addr;
info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
else
err = irdma_uk_rdma_write(ukqp, &info, false);
}
break;
case IB_WR_RDMA_READ_WITH_INV:
inv_stag = true;
@ -3380,7 +3330,6 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode
static void irdma_process_cqe(struct ib_wc *entry,
struct irdma_cq_poll_info *cq_poll_info)
{
struct irdma_qp *iwqp;
struct irdma_sc_qp *qp;
entry->wc_flags = 0;
@ -3388,7 +3337,6 @@ static void irdma_process_cqe(struct ib_wc *entry,
entry->wr_id = cq_poll_info->wr_id;
qp = cq_poll_info->qp_handle;
iwqp = qp->qp_uk.back_qp;
entry->qp = qp->qp_uk.back_qp;
if (cq_poll_info->error) {
@ -3421,42 +3369,17 @@ static void irdma_process_cqe(struct ib_wc *entry,
}
}
switch (cq_poll_info->op_type) {
case IRDMA_OP_TYPE_RDMA_WRITE:
case IRDMA_OP_TYPE_RDMA_WRITE_SOL:
entry->opcode = IB_WC_RDMA_WRITE;
break;
case IRDMA_OP_TYPE_RDMA_READ_INV_STAG:
case IRDMA_OP_TYPE_RDMA_READ:
entry->opcode = IB_WC_RDMA_READ;
break;
case IRDMA_OP_TYPE_SEND_INV:
case IRDMA_OP_TYPE_SEND_SOL:
case IRDMA_OP_TYPE_SEND_SOL_INV:
case IRDMA_OP_TYPE_SEND:
entry->opcode = IB_WC_SEND;
break;
case IRDMA_OP_TYPE_FAST_REG_NSMR:
entry->opcode = IB_WC_REG_MR;
break;
case IRDMA_OP_TYPE_INV_STAG:
entry->opcode = IB_WC_LOCAL_INV;
break;
case IRDMA_OP_TYPE_REC_IMM:
case IRDMA_OP_TYPE_REC:
entry->opcode = cq_poll_info->op_type == IRDMA_OP_TYPE_REC_IMM ?
IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) {
set_ib_wc_op_sq(cq_poll_info, entry);
} else {
set_ib_wc_op_rq(cq_poll_info, entry,
qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ?
true : false);
if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD &&
cq_poll_info->stag_invalid_set) {
entry->ex.invalidate_rkey = cq_poll_info->inv_stag;
entry->wc_flags |= IB_WC_WITH_INVALIDATE;
}
break;
default:
ibdev_err(&iwqp->iwdev->ibdev,
"Invalid opcode = %d in CQE\n", cq_poll_info->op_type);
entry->status = IB_WC_GENERAL_ERR;
return;
}
if (qp->qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) {

View File

@ -232,6 +232,59 @@ static inline u16 irdma_fw_minor_ver(struct irdma_sc_dev *dev)
return (u16)FIELD_GET(IRDMA_FW_VER_MINOR, dev->feature_info[IRDMA_FEATURE_FW_INFO]);
}
static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info,
struct ib_wc *entry)
{
switch (cq_poll_info->op_type) {
case IRDMA_OP_TYPE_RDMA_WRITE:
case IRDMA_OP_TYPE_RDMA_WRITE_SOL:
entry->opcode = IB_WC_RDMA_WRITE;
break;
case IRDMA_OP_TYPE_RDMA_READ_INV_STAG:
case IRDMA_OP_TYPE_RDMA_READ:
entry->opcode = IB_WC_RDMA_READ;
break;
case IRDMA_OP_TYPE_SEND_SOL:
case IRDMA_OP_TYPE_SEND_SOL_INV:
case IRDMA_OP_TYPE_SEND_INV:
case IRDMA_OP_TYPE_SEND:
entry->opcode = IB_WC_SEND;
break;
case IRDMA_OP_TYPE_FAST_REG_NSMR:
entry->opcode = IB_WC_REG_MR;
break;
case IRDMA_OP_TYPE_INV_STAG:
entry->opcode = IB_WC_LOCAL_INV;
break;
default:
entry->status = IB_WC_GENERAL_ERR;
}
}
static inline void set_ib_wc_op_rq(struct irdma_cq_poll_info *cq_poll_info,
struct ib_wc *entry, bool send_imm_support)
{
/**
* iWARP does not support sendImm, so the presence of Imm data
* must be WriteImm.
*/
if (!send_imm_support) {
entry->opcode = cq_poll_info->imm_valid ?
IB_WC_RECV_RDMA_WITH_IMM :
IB_WC_RECV;
return;
}
switch (cq_poll_info->op_type) {
case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
break;
default:
entry->opcode = IB_WC_RECV;
}
}
void irdma_mcast_mac(u32 *ip_addr, u8 *mac, bool ipv4);
int irdma_ib_register_device(struct irdma_device *iwdev);
void irdma_ib_unregister_device(struct irdma_device *iwdev);

View File

@ -0,0 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
config MANA_INFINIBAND
tristate "Microsoft Azure Network Adapter support"
depends on NETDEVICES && ETHERNET && PCI && MICROSOFT_MANA
help
This driver provides low-level RDMA support for Microsoft Azure
Network Adapter (MANA). MANA supports RDMA features that can be used
for workloads (e.g. DPDK, MPI etc) that uses RDMA verbs to directly
access hardware from user-mode processes in Microsoft Azure cloud
environment.

View File

@ -0,0 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o

View File

@ -0,0 +1,79 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2022, Microsoft Corporation. All rights reserved.
*/
#include "mana_ib.h"
int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata)
{
struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
struct ib_device *ibdev = ibcq->device;
struct mana_ib_create_cq ucmd = {};
struct mana_ib_dev *mdev;
int err;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
if (udata->inlen < sizeof(ucmd))
return -EINVAL;
err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
if (err) {
ibdev_dbg(ibdev,
"Failed to copy from udata for create cq, %d\n", err);
return err;
}
if (attr->cqe > MAX_SEND_BUFFERS_PER_QUEUE) {
ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
return -EINVAL;
}
cq->cqe = attr->cqe;
cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(cq->umem)) {
err = PTR_ERR(cq->umem);
ibdev_dbg(ibdev, "Failed to get umem for create cq, err %d\n",
err);
return err;
}
err = mana_ib_gd_create_dma_region(mdev, cq->umem, &cq->gdma_region);
if (err) {
ibdev_dbg(ibdev,
"Failed to create dma region for create cq, %d\n",
err);
goto err_release_umem;
}
ibdev_dbg(ibdev,
"mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
err, cq->gdma_region);
/*
* The CQ ID is not known at this time. The ID is generated at create_qp
*/
return 0;
err_release_umem:
ib_umem_release(cq->umem);
return err;
}
int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
struct ib_device *ibdev = ibcq->device;
struct mana_ib_dev *mdev;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
ib_umem_release(cq->umem);
return 0;
}

View File

@ -0,0 +1,117 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2022, Microsoft Corporation. All rights reserved.
*/
#include "mana_ib.h"
#include <net/mana/mana_auxiliary.h>
MODULE_DESCRIPTION("Microsoft Azure Network Adapter IB driver");
MODULE_LICENSE("GPL");
MODULE_IMPORT_NS(NET_MANA);
static const struct ib_device_ops mana_ib_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_MANA,
.uverbs_abi_ver = MANA_IB_UVERBS_ABI_VERSION,
.alloc_pd = mana_ib_alloc_pd,
.alloc_ucontext = mana_ib_alloc_ucontext,
.create_cq = mana_ib_create_cq,
.create_qp = mana_ib_create_qp,
.create_rwq_ind_table = mana_ib_create_rwq_ind_table,
.create_wq = mana_ib_create_wq,
.dealloc_pd = mana_ib_dealloc_pd,
.dealloc_ucontext = mana_ib_dealloc_ucontext,
.dereg_mr = mana_ib_dereg_mr,
.destroy_cq = mana_ib_destroy_cq,
.destroy_qp = mana_ib_destroy_qp,
.destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table,
.destroy_wq = mana_ib_destroy_wq,
.disassociate_ucontext = mana_ib_disassociate_ucontext,
.get_port_immutable = mana_ib_get_port_immutable,
.mmap = mana_ib_mmap,
.modify_qp = mana_ib_modify_qp,
.modify_wq = mana_ib_modify_wq,
.query_device = mana_ib_query_device,
.query_gid = mana_ib_query_gid,
.query_port = mana_ib_query_port,
.reg_user_mr = mana_ib_reg_user_mr,
INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp),
INIT_RDMA_OBJ_SIZE(ib_ucontext, mana_ib_ucontext, ibucontext),
INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mana_ib_rwq_ind_table,
ib_ind_table),
};
static int mana_ib_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
struct gdma_dev *mdev = madev->mdev;
struct mana_context *mc;
struct mana_ib_dev *dev;
int ret;
mc = mdev->driver_data;
dev = ib_alloc_device(mana_ib_dev, ib_dev);
if (!dev)
return -ENOMEM;
ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
dev->ib_dev.phys_port_cnt = mc->num_ports;
ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
dev->gdma_dev = mdev;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
/*
* num_comp_vectors needs to set to the max MSIX index
* when interrupts and event queues are implemented
*/
dev->ib_dev.num_comp_vectors = 1;
dev->ib_dev.dev.parent = mdev->gdma_context->dev;
ret = ib_register_device(&dev->ib_dev, "mana_%d",
mdev->gdma_context->dev);
if (ret) {
ib_dealloc_device(&dev->ib_dev);
return ret;
}
dev_set_drvdata(&adev->dev, dev);
return 0;
}
static void mana_ib_remove(struct auxiliary_device *adev)
{
struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
ib_unregister_device(&dev->ib_dev);
ib_dealloc_device(&dev->ib_dev);
}
static const struct auxiliary_device_id mana_id_table[] = {
{
.name = "mana.rdma",
},
{},
};
MODULE_DEVICE_TABLE(auxiliary, mana_id_table);
static struct auxiliary_driver mana_driver = {
.name = "rdma",
.probe = mana_ib_probe,
.remove = mana_ib_remove,
.id_table = mana_id_table,
};
module_auxiliary_driver(mana_driver);

View File

@ -0,0 +1,521 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2022, Microsoft Corporation. All rights reserved.
*/
#include "mana_ib.h"
void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
u32 port)
{
struct gdma_dev *gd = dev->gdma_dev;
struct mana_port_context *mpc;
struct net_device *ndev;
struct mana_context *mc;
mc = gd->driver_data;
ndev = mc->ports[port];
mpc = netdev_priv(ndev);
mutex_lock(&pd->vport_mutex);
pd->vport_use_count--;
WARN_ON(pd->vport_use_count < 0);
if (!pd->vport_use_count)
mana_uncfg_vport(mpc);
mutex_unlock(&pd->vport_mutex);
}
int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd,
u32 doorbell_id)
{
struct gdma_dev *mdev = dev->gdma_dev;
struct mana_port_context *mpc;
struct mana_context *mc;
struct net_device *ndev;
int err;
mc = mdev->driver_data;
ndev = mc->ports[port];
mpc = netdev_priv(ndev);
mutex_lock(&pd->vport_mutex);
pd->vport_use_count++;
if (pd->vport_use_count > 1) {
ibdev_dbg(&dev->ib_dev,
"Skip as this PD is already configured vport\n");
mutex_unlock(&pd->vport_mutex);
return 0;
}
err = mana_cfg_vport(mpc, pd->pdn, doorbell_id);
if (err) {
pd->vport_use_count--;
mutex_unlock(&pd->vport_mutex);
ibdev_dbg(&dev->ib_dev, "Failed to configure vPort %d\n", err);
return err;
}
mutex_unlock(&pd->vport_mutex);
pd->tx_shortform_allowed = mpc->tx_shortform_allowed;
pd->tx_vp_offset = mpc->tx_vp_offset;
ibdev_dbg(&dev->ib_dev, "vport handle %llx pdid %x doorbell_id %x\n",
mpc->port_handle, pd->pdn, doorbell_id);
return 0;
}
int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
struct ib_device *ibdev = ibpd->device;
struct gdma_create_pd_resp resp = {};
struct gdma_create_pd_req req = {};
enum gdma_pd_flags flags = 0;
struct mana_ib_dev *dev;
struct gdma_dev *mdev;
int err;
dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
mdev = dev->gdma_dev;
mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
sizeof(resp));
req.flags = flags;
err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
sizeof(resp), &resp);
if (err || resp.hdr.status) {
ibdev_dbg(&dev->ib_dev,
"Failed to get pd_id err %d status %u\n", err,
resp.hdr.status);
if (!err)
err = -EPROTO;
return err;
}
pd->pd_handle = resp.pd_handle;
pd->pdn = resp.pd_id;
ibdev_dbg(&dev->ib_dev, "pd_handle 0x%llx pd_id %d\n",
pd->pd_handle, pd->pdn);
mutex_init(&pd->vport_mutex);
pd->vport_use_count = 0;
return 0;
}
int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
struct ib_device *ibdev = ibpd->device;
struct gdma_destory_pd_resp resp = {};
struct gdma_destroy_pd_req req = {};
struct mana_ib_dev *dev;
struct gdma_dev *mdev;
int err;
dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
mdev = dev->gdma_dev;
mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req),
sizeof(resp));
req.pd_handle = pd->pd_handle;
err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
sizeof(resp), &resp);
if (err || resp.hdr.status) {
ibdev_dbg(&dev->ib_dev,
"Failed to destroy pd_handle 0x%llx err %d status %u",
pd->pd_handle, err, resp.hdr.status);
if (!err)
err = -EPROTO;
}
return err;
}
static int mana_gd_destroy_doorbell_page(struct gdma_context *gc,
int doorbell_page)
{
struct gdma_destroy_resource_range_req req = {};
struct gdma_resp_hdr resp = {};
int err;
mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_RESOURCE_RANGE,
sizeof(req), sizeof(resp));
req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
req.num_resources = 1;
req.allocated_resources = doorbell_page;
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
if (err || resp.status) {
dev_err(gc->dev,
"Failed to destroy doorbell page: ret %d, 0x%x\n",
err, resp.status);
return err ?: -EPROTO;
}
return 0;
}
static int mana_gd_allocate_doorbell_page(struct gdma_context *gc,
int *doorbell_page)
{
struct gdma_allocate_resource_range_req req = {};
struct gdma_allocate_resource_range_resp resp = {};
int err;
mana_gd_init_req_hdr(&req.hdr, GDMA_ALLOCATE_RESOURCE_RANGE,
sizeof(req), sizeof(resp));
req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
req.num_resources = 1;
req.alignment = 1;
/* Have GDMA start searching from 0 */
req.allocated_resources = 0;
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
if (err || resp.hdr.status) {
dev_err(gc->dev,
"Failed to allocate doorbell page: ret %d, 0x%x\n",
err, resp.hdr.status);
return err ?: -EPROTO;
}
*doorbell_page = resp.allocated_resources;
return 0;
}
int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
struct ib_udata *udata)
{
struct mana_ib_ucontext *ucontext =
container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
struct ib_device *ibdev = ibcontext->device;
struct mana_ib_dev *mdev;
struct gdma_context *gc;
struct gdma_dev *dev;
int doorbell_page;
int ret;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
dev = mdev->gdma_dev;
gc = dev->gdma_context;
/* Allocate a doorbell page index */
ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page);
if (ret) {
ibdev_dbg(ibdev, "Failed to allocate doorbell page %d\n", ret);
return ret;
}
ibdev_dbg(ibdev, "Doorbell page allocated %d\n", doorbell_page);
ucontext->doorbell = doorbell_page;
return 0;
}
void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mana_ib_ucontext *mana_ucontext =
container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
struct ib_device *ibdev = ibcontext->device;
struct mana_ib_dev *mdev;
struct gdma_context *gc;
int ret;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
gc = mdev->gdma_dev->gdma_context;
ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
if (ret)
ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
}
static int
mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
struct gdma_context *gc,
struct gdma_create_dma_region_req *create_req,
size_t num_pages, mana_handle_t *gdma_region)
{
struct gdma_create_dma_region_resp create_resp = {};
unsigned int create_req_msg_size;
int err;
create_req_msg_size =
struct_size(create_req, page_addr_list, num_pages);
create_req->page_addr_list_len = num_pages;
err = mana_gd_send_request(gc, create_req_msg_size, create_req,
sizeof(create_resp), &create_resp);
if (err || create_resp.hdr.status) {
ibdev_dbg(&dev->ib_dev,
"Failed to create DMA region: %d, 0x%x\n",
err, create_resp.hdr.status);
if (!err)
err = -EPROTO;
return err;
}
*gdma_region = create_resp.dma_region_handle;
ibdev_dbg(&dev->ib_dev, "Created DMA region handle 0x%llx\n",
*gdma_region);
return 0;
}
static int
mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc,
struct gdma_dma_region_add_pages_req *add_req,
unsigned int num_pages, u32 expected_status)
{
unsigned int add_req_msg_size =
struct_size(add_req, page_addr_list, num_pages);
struct gdma_general_resp add_resp = {};
int err;
mana_gd_init_req_hdr(&add_req->hdr, GDMA_DMA_REGION_ADD_PAGES,
add_req_msg_size, sizeof(add_resp));
add_req->page_addr_list_len = num_pages;
err = mana_gd_send_request(gc, add_req_msg_size, add_req,
sizeof(add_resp), &add_resp);
if (err || add_resp.hdr.status != expected_status) {
ibdev_dbg(&dev->ib_dev,
"Failed to create DMA region: %d, 0x%x\n",
err, add_resp.hdr.status);
if (!err)
err = -EPROTO;
return err;
}
return 0;
}
int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
mana_handle_t *gdma_region)
{
struct gdma_dma_region_add_pages_req *add_req = NULL;
size_t num_pages_processed = 0, num_pages_to_handle;
struct gdma_create_dma_region_req *create_req;
unsigned int create_req_msg_size;
struct hw_channel_context *hwc;
struct ib_block_iter biter;
size_t max_pgs_add_cmd = 0;
size_t max_pgs_create_cmd;
struct gdma_context *gc;
size_t num_pages_total;
struct gdma_dev *mdev;
unsigned long page_sz;
unsigned int tail = 0;
u64 *page_addr_list;
void *request_buf;
int err;
mdev = dev->gdma_dev;
gc = mdev->gdma_context;
hwc = gc->hwc.driver_data;
/* Hardware requires dma region to align to chosen page size */
page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0);
if (!page_sz) {
ibdev_dbg(&dev->ib_dev, "failed to find page size.\n");
return -ENOMEM;
}
num_pages_total = ib_umem_num_dma_blocks(umem, page_sz);
max_pgs_create_cmd =
(hwc->max_req_msg_size - sizeof(*create_req)) / sizeof(u64);
num_pages_to_handle =
min_t(size_t, num_pages_total, max_pgs_create_cmd);
create_req_msg_size =
struct_size(create_req, page_addr_list, num_pages_to_handle);
request_buf = kzalloc(hwc->max_req_msg_size, GFP_KERNEL);
if (!request_buf)
return -ENOMEM;
create_req = request_buf;
mana_gd_init_req_hdr(&create_req->hdr, GDMA_CREATE_DMA_REGION,
create_req_msg_size,
sizeof(struct gdma_create_dma_region_resp));
create_req->length = umem->length;
create_req->offset_in_page = umem->address & (page_sz - 1);
create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT;
create_req->page_count = num_pages_total;
ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n",
umem->length, num_pages_total);
ibdev_dbg(&dev->ib_dev, "page_sz %lu offset_in_page %u\n",
page_sz, create_req->offset_in_page);
ibdev_dbg(&dev->ib_dev, "num_pages_to_handle %lu, gdma_page_type %u",
num_pages_to_handle, create_req->gdma_page_type);
page_addr_list = create_req->page_addr_list;
rdma_umem_for_each_dma_block(umem, &biter, page_sz) {
page_addr_list[tail++] = rdma_block_iter_dma_address(&biter);
if (tail < num_pages_to_handle)
continue;
if (!num_pages_processed) {
/* First create message */
err = mana_ib_gd_first_dma_region(dev, gc, create_req,
tail, gdma_region);
if (err)
goto out;
max_pgs_add_cmd = (hwc->max_req_msg_size -
sizeof(*add_req)) / sizeof(u64);
add_req = request_buf;
add_req->dma_region_handle = *gdma_region;
add_req->reserved3 = 0;
page_addr_list = add_req->page_addr_list;
} else {
/* Subsequent create messages */
u32 expected_s = 0;
if (num_pages_processed + num_pages_to_handle <
num_pages_total)
expected_s = GDMA_STATUS_MORE_ENTRIES;
err = mana_ib_gd_add_dma_region(dev, gc, add_req, tail,
expected_s);
if (err)
break;
}
num_pages_processed += tail;
tail = 0;
/* The remaining pages to create */
num_pages_to_handle =
min_t(size_t,
num_pages_total - num_pages_processed,
max_pgs_add_cmd);
}
if (err)
mana_ib_gd_destroy_dma_region(dev, *gdma_region);
out:
kfree(request_buf);
return err;
}
int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64 gdma_region)
{
struct gdma_dev *mdev = dev->gdma_dev;
struct gdma_context *gc;
gc = mdev->gdma_context;
ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region);
return mana_gd_destroy_dma_region(gc, gdma_region);
}
int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mana_ib_ucontext *mana_ucontext =
container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
struct ib_device *ibdev = ibcontext->device;
struct mana_ib_dev *mdev;
struct gdma_context *gc;
phys_addr_t pfn;
pgprot_t prot;
int ret;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
gc = mdev->gdma_dev->gdma_context;
if (vma->vm_pgoff != 0) {
ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma->vm_pgoff);
return -EINVAL;
}
/* Map to the page indexed by ucontext->doorbell */
pfn = (gc->phys_db_page_base +
gc->db_page_size * mana_ucontext->doorbell) >>
PAGE_SHIFT;
prot = pgprot_writecombine(vma->vm_page_prot);
ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot,
NULL);
if (ret)
ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret);
else
ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n",
pfn, gc->db_page_size, ret);
return ret;
}
int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
/*
* This version only support RAW_PACKET
* other values need to be filled for other types
*/
immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
return 0;
}
int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
struct ib_udata *uhw)
{
props->max_qp = MANA_MAX_NUM_QUEUES;
props->max_qp_wr = MAX_SEND_BUFFERS_PER_QUEUE;
/*
* max_cqe could be potentially much bigger.
* As this version of driver only support RAW QP, set it to the same
* value as max_qp_wr
*/
props->max_cqe = MAX_SEND_BUFFERS_PER_QUEUE;
props->max_mr_size = MANA_IB_MAX_MR_SIZE;
props->max_mr = MANA_IB_MAX_MR;
props->max_send_sge = MAX_TX_WQE_SGL_ENTRIES;
props->max_recv_sge = MAX_RX_WQE_SGL_ENTRIES;
return 0;
}
int mana_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
/* This version doesn't return port properties */
return 0;
}
int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid)
{
/* This version doesn't return GID properties */
return 0;
}
void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
}

View File

@ -0,0 +1,162 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2022 Microsoft Corporation. All rights reserved.
*/
#ifndef _MANA_IB_H_
#define _MANA_IB_H_
#include <rdma/ib_verbs.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_umem.h>
#include <rdma/mana-abi.h>
#include <rdma/uverbs_ioctl.h>
#include <net/mana/mana.h>
#define PAGE_SZ_BM \
(SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \
SZ_512K | SZ_1M | SZ_2M)
/* MANA doesn't have any limit for MR size */
#define MANA_IB_MAX_MR_SIZE U64_MAX
/*
* The hardware limit of number of MRs is greater than maximum number of MRs
* that can possibly represent in 24 bits
*/
#define MANA_IB_MAX_MR 0xFFFFFFu
struct mana_ib_dev {
struct ib_device ib_dev;
struct gdma_dev *gdma_dev;
};
struct mana_ib_wq {
struct ib_wq ibwq;
struct ib_umem *umem;
int wqe;
u32 wq_buf_size;
u64 gdma_region;
u64 id;
mana_handle_t rx_object;
};
struct mana_ib_pd {
struct ib_pd ibpd;
u32 pdn;
mana_handle_t pd_handle;
/* Mutex for sharing access to vport_use_count */
struct mutex vport_mutex;
int vport_use_count;
bool tx_shortform_allowed;
u32 tx_vp_offset;
};
struct mana_ib_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
mana_handle_t mr_handle;
};
struct mana_ib_cq {
struct ib_cq ibcq;
struct ib_umem *umem;
int cqe;
u64 gdma_region;
u64 id;
};
struct mana_ib_qp {
struct ib_qp ibqp;
/* Work queue info */
struct ib_umem *sq_umem;
int sqe;
u64 sq_gdma_region;
u64 sq_id;
mana_handle_t tx_object;
/* The port on the IB device, starting with 1 */
u32 port;
};
struct mana_ib_ucontext {
struct ib_ucontext ibucontext;
u32 doorbell;
};
struct mana_ib_rwq_ind_table {
struct ib_rwq_ind_table ib_ind_table;
};
int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
mana_handle_t *gdma_region);
int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
mana_handle_t gdma_region);
struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata);
int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
struct ib_rwq_ind_table_init_attr *init_attr,
struct ib_udata *udata);
int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl);
struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags);
struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 iova, int access_flags,
struct ib_udata *udata);
int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
int mana_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr,
struct ib_udata *udata);
int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port_id,
struct mana_ib_pd *pd, u32 doorbell_id);
void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
u32 port);
int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
struct ib_udata *udata);
void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma);
int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable);
int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
struct ib_udata *uhw);
int mana_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props);
int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid);
void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
#endif

View File

@ -0,0 +1,197 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2022, Microsoft Corporation. All rights reserved.
*/
#include "mana_ib.h"
#define VALID_MR_FLAGS \
(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ)
static enum gdma_mr_access_flags
mana_ib_verbs_to_gdma_access_flags(int access_flags)
{
enum gdma_mr_access_flags flags = GDMA_ACCESS_FLAG_LOCAL_READ;
if (access_flags & IB_ACCESS_LOCAL_WRITE)
flags |= GDMA_ACCESS_FLAG_LOCAL_WRITE;
if (access_flags & IB_ACCESS_REMOTE_WRITE)
flags |= GDMA_ACCESS_FLAG_REMOTE_WRITE;
if (access_flags & IB_ACCESS_REMOTE_READ)
flags |= GDMA_ACCESS_FLAG_REMOTE_READ;
return flags;
}
static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
struct gdma_create_mr_params *mr_params)
{
struct gdma_create_mr_response resp = {};
struct gdma_create_mr_request req = {};
struct gdma_dev *mdev = dev->gdma_dev;
struct gdma_context *gc;
int err;
gc = mdev->gdma_context;
mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req),
sizeof(resp));
req.pd_handle = mr_params->pd_handle;
req.mr_type = mr_params->mr_type;
switch (mr_params->mr_type) {
case GDMA_MR_TYPE_GVA:
req.gva.dma_region_handle = mr_params->gva.dma_region_handle;
req.gva.virtual_address = mr_params->gva.virtual_address;
req.gva.access_flags = mr_params->gva.access_flags;
break;
default:
ibdev_dbg(&dev->ib_dev,
"invalid param (GDMA_MR_TYPE) passed, type %d\n",
req.mr_type);
return -EINVAL;
}
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
if (err || resp.hdr.status) {
ibdev_dbg(&dev->ib_dev, "Failed to create mr %d, %u", err,
resp.hdr.status);
if (!err)
err = -EPROTO;
return err;
}
mr->ibmr.lkey = resp.lkey;
mr->ibmr.rkey = resp.rkey;
mr->mr_handle = resp.mr_handle;
return 0;
}
static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle)
{
struct gdma_destroy_mr_response resp = {};
struct gdma_destroy_mr_request req = {};
struct gdma_dev *mdev = dev->gdma_dev;
struct gdma_context *gc;
int err;
gc = mdev->gdma_context;
mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req),
sizeof(resp));
req.mr_handle = mr_handle;
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
if (err || resp.hdr.status) {
dev_err(gc->dev, "Failed to destroy MR: %d, 0x%x\n", err,
resp.hdr.status);
if (!err)
err = -EPROTO;
return err;
}
return 0;
}
struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
u64 iova, int access_flags,
struct ib_udata *udata)
{
struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
struct gdma_create_mr_params mr_params = {};
struct ib_device *ibdev = ibpd->device;
struct mana_ib_dev *dev;
struct mana_ib_mr *mr;
u64 dma_region_handle;
int err;
dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
ibdev_dbg(ibdev,
"start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x",
start, iova, length, access_flags);
if (access_flags & ~VALID_MR_FLAGS)
return ERR_PTR(-EINVAL);
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->umem = ib_umem_get(ibdev, start, length, access_flags);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
ibdev_dbg(ibdev,
"Failed to get umem for register user-mr, %d\n", err);
goto err_free;
}
err = mana_ib_gd_create_dma_region(dev, mr->umem, &dma_region_handle);
if (err) {
ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
err);
goto err_umem;
}
ibdev_dbg(ibdev,
"mana_ib_gd_create_dma_region ret %d gdma_region %llx\n", err,
dma_region_handle);
mr_params.pd_handle = pd->pd_handle;
mr_params.mr_type = GDMA_MR_TYPE_GVA;
mr_params.gva.dma_region_handle = dma_region_handle;
mr_params.gva.virtual_address = iova;
mr_params.gva.access_flags =
mana_ib_verbs_to_gdma_access_flags(access_flags);
err = mana_ib_gd_create_mr(dev, mr, &mr_params);
if (err)
goto err_dma_region;
/*
* There is no need to keep track of dma_region_handle after MR is
* successfully created. The dma_region_handle is tracked in the PF
* as part of the lifecycle of this MR.
*/
return &mr->ibmr;
err_dma_region:
mana_gd_destroy_dma_region(dev->gdma_dev->gdma_context,
dma_region_handle);
err_umem:
ib_umem_release(mr->umem);
err_free:
kfree(mr);
return ERR_PTR(err);
}
int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr);
struct ib_device *ibdev = ibmr->device;
struct mana_ib_dev *dev;
int err;
dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
err = mana_ib_gd_destroy_mr(dev, mr->mr_handle);
if (err)
return err;
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
return 0;
}

View File

@ -0,0 +1,506 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2022, Microsoft Corporation. All rights reserved.
*/
#include "mana_ib.h"
static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
struct net_device *ndev,
mana_handle_t default_rxobj,
mana_handle_t ind_table[],
u32 log_ind_tbl_size, u32 rx_hash_key_len,
u8 *rx_hash_key)
{
struct mana_port_context *mpc = netdev_priv(ndev);
struct mana_cfg_rx_steer_req *req = NULL;
struct mana_cfg_rx_steer_resp resp = {};
mana_handle_t *req_indir_tab;
struct gdma_context *gc;
struct gdma_dev *mdev;
u32 req_buf_size;
int i, err;
mdev = dev->gdma_dev;
gc = mdev->gdma_context;
req_buf_size =
sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_SIZE;
req = kzalloc(req_buf_size, GFP_KERNEL);
if (!req)
return -ENOMEM;
mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size,
sizeof(resp));
req->vport = mpc->port_handle;
req->rx_enable = 1;
req->update_default_rxobj = 1;
req->default_rxobj = default_rxobj;
req->hdr.dev_id = mdev->dev_id;
/* If there are more than 1 entries in indirection table, enable RSS */
if (log_ind_tbl_size)
req->rss_enable = true;
req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE;
req->indir_tab_offset = sizeof(*req);
req->update_indir_tab = true;
req_indir_tab = (mana_handle_t *)(req + 1);
/* The ind table passed to the hardware must have
* MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb
* ind_table to MANA_INDIRECT_TABLE_SIZE if required
*/
ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
req_indir_tab[i]);
}
req->update_hashkey = true;
if (rx_hash_key_len)
memcpy(req->hashkey, rx_hash_key, rx_hash_key_len);
else
netdev_rss_key_fill(req->hashkey, MANA_HASH_KEY_SIZE);
ibdev_dbg(&dev->ib_dev, "vport handle %llu default_rxobj 0x%llx\n",
req->vport, default_rxobj);
err = mana_gd_send_request(gc, req_buf_size, req, sizeof(resp), &resp);
if (err) {
netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
goto out;
}
if (resp.hdr.status) {
netdev_err(ndev, "vPort RX configuration failed: 0x%x\n",
resp.hdr.status);
err = -EPROTO;
goto out;
}
netdev_info(ndev, "Configured steering vPort %llu log_entries %u\n",
mpc->port_handle, log_ind_tbl_size);
out:
kfree(req);
return err;
}
static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
struct ib_qp_init_attr *attr,
struct ib_udata *udata)
{
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
struct mana_ib_dev *mdev =
container_of(pd->device, struct mana_ib_dev, ib_dev);
struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
struct mana_ib_create_qp_rss_resp resp = {};
struct mana_ib_create_qp_rss ucmd = {};
struct gdma_dev *gd = mdev->gdma_dev;
mana_handle_t *mana_ind_table;
struct mana_port_context *mpc;
struct mana_context *mc;
struct net_device *ndev;
struct mana_ib_cq *cq;
struct mana_ib_wq *wq;
unsigned int ind_tbl_size;
struct ib_cq *ibcq;
struct ib_wq *ibwq;
int i = 0;
u32 port;
int ret;
mc = gd->driver_data;
if (!udata || udata->inlen < sizeof(ucmd))
return -EINVAL;
ret = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
if (ret) {
ibdev_dbg(&mdev->ib_dev,
"Failed copy from udata for create rss-qp, err %d\n",
ret);
return ret;
}
if (attr->cap.max_recv_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
ibdev_dbg(&mdev->ib_dev,
"Requested max_recv_wr %d exceeding limit\n",
attr->cap.max_recv_wr);
return -EINVAL;
}
if (attr->cap.max_recv_sge > MAX_RX_WQE_SGL_ENTRIES) {
ibdev_dbg(&mdev->ib_dev,
"Requested max_recv_sge %d exceeding limit\n",
attr->cap.max_recv_sge);
return -EINVAL;
}
ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) {
ibdev_dbg(&mdev->ib_dev,
"Indirect table size %d exceeding limit\n",
ind_tbl_size);
return -EINVAL;
}
if (ucmd.rx_hash_function != MANA_IB_RX_HASH_FUNC_TOEPLITZ) {
ibdev_dbg(&mdev->ib_dev,
"RX Hash function is not supported, %d\n",
ucmd.rx_hash_function);
return -EINVAL;
}
/* IB ports start with 1, MANA start with 0 */
port = ucmd.port;
if (port < 1 || port > mc->num_ports) {
ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
port);
return -EINVAL;
}
ndev = mc->ports[port - 1];
mpc = netdev_priv(ndev);
ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n",
ucmd.rx_hash_function, port);
mana_ind_table = kcalloc(ind_tbl_size, sizeof(mana_handle_t),
GFP_KERNEL);
if (!mana_ind_table) {
ret = -ENOMEM;
goto fail;
}
qp->port = port;
for (i = 0; i < ind_tbl_size; i++) {
struct mana_obj_spec wq_spec = {};
struct mana_obj_spec cq_spec = {};
ibwq = ind_tbl->ind_tbl[i];
wq = container_of(ibwq, struct mana_ib_wq, ibwq);
ibcq = ibwq->cq;
cq = container_of(ibcq, struct mana_ib_cq, ibcq);
wq_spec.gdma_region = wq->gdma_region;
wq_spec.queue_size = wq->wq_buf_size;
cq_spec.gdma_region = cq->gdma_region;
cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE;
cq_spec.modr_ctx_id = 0;
cq_spec.attached_eq = GDMA_CQ_NO_EQ;
ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
&wq_spec, &cq_spec, &wq->rx_object);
if (ret)
goto fail;
/* The GDMA regions are now owned by the WQ object */
wq->gdma_region = GDMA_INVALID_DMA_REGION;
cq->gdma_region = GDMA_INVALID_DMA_REGION;
wq->id = wq_spec.queue_index;
cq->id = cq_spec.queue_index;
ibdev_dbg(&mdev->ib_dev,
"ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
ret, wq->rx_object, wq->id, cq->id);
resp.entries[i].cqid = cq->id;
resp.entries[i].wqid = wq->id;
mana_ind_table[i] = wq->rx_object;
}
resp.num_entries = i;
ret = mana_ib_cfg_vport_steering(mdev, ndev, wq->rx_object,
mana_ind_table,
ind_tbl->log_ind_tbl_size,
ucmd.rx_hash_key_len,
ucmd.rx_hash_key);
if (ret)
goto fail;
ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (ret) {
ibdev_dbg(&mdev->ib_dev,
"Failed to copy to udata create rss-qp, %d\n",
ret);
goto fail;
}
kfree(mana_ind_table);
return 0;
fail:
while (i-- > 0) {
ibwq = ind_tbl->ind_tbl[i];
wq = container_of(ibwq, struct mana_ib_wq, ibwq);
mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
}
kfree(mana_ind_table);
return ret;
}
static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
struct ib_qp_init_attr *attr,
struct ib_udata *udata)
{
struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
struct mana_ib_dev *mdev =
container_of(ibpd->device, struct mana_ib_dev, ib_dev);
struct mana_ib_cq *send_cq =
container_of(attr->send_cq, struct mana_ib_cq, ibcq);
struct mana_ib_ucontext *mana_ucontext =
rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
ibucontext);
struct mana_ib_create_qp_resp resp = {};
struct gdma_dev *gd = mdev->gdma_dev;
struct mana_ib_create_qp ucmd = {};
struct mana_obj_spec wq_spec = {};
struct mana_obj_spec cq_spec = {};
struct mana_port_context *mpc;
struct mana_context *mc;
struct net_device *ndev;
struct ib_umem *umem;
int err;
u32 port;
mc = gd->driver_data;
if (!mana_ucontext || udata->inlen < sizeof(ucmd))
return -EINVAL;
err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
if (err) {
ibdev_dbg(&mdev->ib_dev,
"Failed to copy from udata create qp-raw, %d\n", err);
return err;
}
/* IB ports start with 1, MANA Ethernet ports start with 0 */
port = ucmd.port;
if (ucmd.port > mc->num_ports)
return -EINVAL;
if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
ibdev_dbg(&mdev->ib_dev,
"Requested max_send_wr %d exceeding limit\n",
attr->cap.max_send_wr);
return -EINVAL;
}
if (attr->cap.max_send_sge > MAX_TX_WQE_SGL_ENTRIES) {
ibdev_dbg(&mdev->ib_dev,
"Requested max_send_sge %d exceeding limit\n",
attr->cap.max_send_sge);
return -EINVAL;
}
ndev = mc->ports[port - 1];
mpc = netdev_priv(ndev);
ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev, mpc);
err = mana_ib_cfg_vport(mdev, port - 1, pd, mana_ucontext->doorbell);
if (err)
return -ENODEV;
qp->port = port;
ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
ucmd.sq_buf_addr, ucmd.port);
umem = ib_umem_get(ibpd->device, ucmd.sq_buf_addr, ucmd.sq_buf_size,
IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem)) {
err = PTR_ERR(umem);
ibdev_dbg(&mdev->ib_dev,
"Failed to get umem for create qp-raw, err %d\n",
err);
goto err_free_vport;
}
qp->sq_umem = umem;
err = mana_ib_gd_create_dma_region(mdev, qp->sq_umem,
&qp->sq_gdma_region);
if (err) {
ibdev_dbg(&mdev->ib_dev,
"Failed to create dma region for create qp-raw, %d\n",
err);
goto err_release_umem;
}
ibdev_dbg(&mdev->ib_dev,
"mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
err, qp->sq_gdma_region);
/* Create a WQ on the same port handle used by the Ethernet */
wq_spec.gdma_region = qp->sq_gdma_region;
wq_spec.queue_size = ucmd.sq_buf_size;
cq_spec.gdma_region = send_cq->gdma_region;
cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE;
cq_spec.modr_ctx_id = 0;
cq_spec.attached_eq = GDMA_CQ_NO_EQ;
err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec,
&cq_spec, &qp->tx_object);
if (err) {
ibdev_dbg(&mdev->ib_dev,
"Failed to create wq for create raw-qp, err %d\n",
err);
goto err_destroy_dma_region;
}
/* The GDMA regions are now owned by the WQ object */
qp->sq_gdma_region = GDMA_INVALID_DMA_REGION;
send_cq->gdma_region = GDMA_INVALID_DMA_REGION;
qp->sq_id = wq_spec.queue_index;
send_cq->id = cq_spec.queue_index;
ibdev_dbg(&mdev->ib_dev,
"ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err,
qp->tx_object, qp->sq_id, send_cq->id);
resp.sqid = qp->sq_id;
resp.cqid = send_cq->id;
resp.tx_vp_offset = pd->tx_vp_offset;
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (err) {
ibdev_dbg(&mdev->ib_dev,
"Failed copy udata for create qp-raw, %d\n",
err);
goto err_destroy_wq_obj;
}
return 0;
err_destroy_wq_obj:
mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
err_destroy_dma_region:
mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
err_release_umem:
ib_umem_release(umem);
err_free_vport:
mana_ib_uncfg_vport(mdev, pd, port - 1);
return err;
}
int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
struct ib_udata *udata)
{
switch (attr->qp_type) {
case IB_QPT_RAW_PACKET:
/* When rwq_ind_tbl is used, it's for creating WQs for RSS */
if (attr->rwq_ind_tbl)
return mana_ib_create_qp_rss(ibqp, ibqp->pd, attr,
udata);
return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
default:
/* Creating QP other than IB_QPT_RAW_PACKET is not supported */
ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
attr->qp_type);
}
return -EINVAL;
}
int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
/* modify_qp is not supported by this version of the driver */
return -EOPNOTSUPP;
}
static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
struct ib_rwq_ind_table *ind_tbl,
struct ib_udata *udata)
{
struct mana_ib_dev *mdev =
container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
struct gdma_dev *gd = mdev->gdma_dev;
struct mana_port_context *mpc;
struct mana_context *mc;
struct net_device *ndev;
struct mana_ib_wq *wq;
struct ib_wq *ibwq;
int i;
mc = gd->driver_data;
ndev = mc->ports[qp->port - 1];
mpc = netdev_priv(ndev);
for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
ibwq = ind_tbl->ind_tbl[i];
wq = container_of(ibwq, struct mana_ib_wq, ibwq);
ibdev_dbg(&mdev->ib_dev, "destroying wq->rx_object %llu\n",
wq->rx_object);
mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
}
return 0;
}
static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata)
{
struct mana_ib_dev *mdev =
container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
struct gdma_dev *gd = mdev->gdma_dev;
struct ib_pd *ibpd = qp->ibqp.pd;
struct mana_port_context *mpc;
struct mana_context *mc;
struct net_device *ndev;
struct mana_ib_pd *pd;
mc = gd->driver_data;
ndev = mc->ports[qp->port - 1];
mpc = netdev_priv(ndev);
pd = container_of(ibpd, struct mana_ib_pd, ibpd);
mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
if (qp->sq_umem) {
mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
ib_umem_release(qp->sq_umem);
}
mana_ib_uncfg_vport(mdev, pd, qp->port - 1);
return 0;
}
int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
switch (ibqp->qp_type) {
case IB_QPT_RAW_PACKET:
if (ibqp->rwq_ind_tbl)
return mana_ib_destroy_qp_rss(qp, ibqp->rwq_ind_tbl,
udata);
return mana_ib_destroy_qp_raw(qp, udata);
default:
ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
ibqp->qp_type);
}
return -ENOENT;
}

View File

@ -0,0 +1,115 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2022, Microsoft Corporation. All rights reserved.
*/
#include "mana_ib.h"
struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata)
{
struct mana_ib_dev *mdev =
container_of(pd->device, struct mana_ib_dev, ib_dev);
struct mana_ib_create_wq ucmd = {};
struct mana_ib_wq *wq;
struct ib_umem *umem;
int err;
if (udata->inlen < sizeof(ucmd))
return ERR_PTR(-EINVAL);
err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
if (err) {
ibdev_dbg(&mdev->ib_dev,
"Failed to copy from udata for create wq, %d\n", err);
return ERR_PTR(err);
}
wq = kzalloc(sizeof(*wq), GFP_KERNEL);
if (!wq)
return ERR_PTR(-ENOMEM);
ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr);
umem = ib_umem_get(pd->device, ucmd.wq_buf_addr, ucmd.wq_buf_size,
IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem)) {
err = PTR_ERR(umem);
ibdev_dbg(&mdev->ib_dev,
"Failed to get umem for create wq, err %d\n", err);
goto err_free_wq;
}
wq->umem = umem;
wq->wqe = init_attr->max_wr;
wq->wq_buf_size = ucmd.wq_buf_size;
wq->rx_object = INVALID_MANA_HANDLE;
err = mana_ib_gd_create_dma_region(mdev, wq->umem, &wq->gdma_region);
if (err) {
ibdev_dbg(&mdev->ib_dev,
"Failed to create dma region for create wq, %d\n",
err);
goto err_release_umem;
}
ibdev_dbg(&mdev->ib_dev,
"mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
err, wq->gdma_region);
/* WQ ID is returned at wq_create time, doesn't know the value yet */
return &wq->ibwq;
err_release_umem:
ib_umem_release(umem);
err_free_wq:
kfree(wq);
return ERR_PTR(err);
}
int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata)
{
/* modify_wq is not supported by this version of the driver */
return -EOPNOTSUPP;
}
int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
{
struct mana_ib_wq *wq = container_of(ibwq, struct mana_ib_wq, ibwq);
struct ib_device *ib_dev = ibwq->device;
struct mana_ib_dev *mdev;
mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
mana_ib_gd_destroy_dma_region(mdev, wq->gdma_region);
ib_umem_release(wq->umem);
kfree(wq);
return 0;
}
int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
struct ib_rwq_ind_table_init_attr *init_attr,
struct ib_udata *udata)
{
/*
* There is no additional data in ind_table to be maintained by this
* driver, do nothing
*/
return 0;
}
int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
{
/*
* There is no additional data in ind_table to be maintained by this
* driver, do nothing
*/
return 0;
}

View File

@ -144,8 +144,7 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device,
}
}
}
if (dev)
dev_hold(dev);
dev_hold(dev);
rcu_read_unlock();
return dev;
@ -1307,8 +1306,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
spin_lock_bh(&mdev->iboe.lock);
ndev = mdev->iboe.netdevs[mqp->port - 1];
if (ndev)
dev_hold(ndev);
dev_hold(ndev);
spin_unlock_bh(&mdev->iboe.lock);
if (ndev) {
@ -1955,11 +1953,9 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (ge) {
spin_lock_bh(&mdev->iboe.lock);
ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
if (ndev)
dev_hold(ndev);
dev_hold(ndev);
spin_unlock_bh(&mdev->iboe.lock);
if (ndev)
dev_put(ndev);
dev_put(ndev);
list_del(&ge->list);
kfree(ge);
} else

View File

@ -267,17 +267,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
}
static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe,
struct ib_wc *wc, const char *level)
{
mlx5_ib_warn(dev, "dump error cqe\n");
mlx5_dump_err_cqe(dev->mdev, cqe);
mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status,
ib_wc_status_msg(wc->status));
print_hex_dump(level, "cqe_dump: ", DUMP_PREFIX_OFFSET, 16, 1,
cqe, sizeof(*cqe), false);
}
static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
struct mlx5_err_cqe *cqe,
struct ib_wc *wc)
{
int dump = 1;
const char *dump = KERN_WARNING;
switch (cqe->syndrome) {
case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
@ -287,10 +290,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->status = IB_WC_LOC_QP_OP_ERR;
break;
case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
dump = KERN_DEBUG;
wc->status = IB_WC_LOC_PROT_ERR;
break;
case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
dump = 0;
dump = NULL;
wc->status = IB_WC_WR_FLUSH_ERR;
break;
case MLX5_CQE_SYNDROME_MW_BIND_ERR:
@ -306,18 +310,20 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->status = IB_WC_REM_INV_REQ_ERR;
break;
case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
dump = KERN_DEBUG;
wc->status = IB_WC_REM_ACCESS_ERR;
break;
case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
dump = KERN_DEBUG;
wc->status = IB_WC_REM_OP_ERR;
break;
case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
dump = NULL;
wc->status = IB_WC_RETRY_EXC_ERR;
dump = 0;
break;
case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
dump = NULL;
wc->status = IB_WC_RNR_RETRY_EXC_ERR;
dump = 0;
break;
case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
wc->status = IB_WC_REM_ABORT_ERR;
@ -328,11 +334,8 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
}
wc->vendor_err = cqe->vendor_err_synd;
if (dump) {
mlx5_ib_warn(dev, "WC error: %d, Message: %s\n", wc->status,
ib_wc_status_msg(wc->status));
dump_cqe(dev, cqe);
}
if (dump)
dump_cqe(dev, cqe, wc, dump);
}
static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,

View File

@ -127,7 +127,6 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
}
#define LAST_ETH_FIELD vlan_tag
#define LAST_IB_FIELD sl
#define LAST_IPV4_FIELD tos
#define LAST_IPV6_FIELD traffic_class
#define LAST_TCP_UDP_FIELD src_port

View File

@ -38,6 +38,10 @@
dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
__LINE__, current->pid, ##arg)
#define mlx5_ib_log(lvl, _dev, format, arg...) \
dev_printk(lvl, &(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, \
__func__, __LINE__, current->pid, ##arg)
#define MLX5_IB_DEFAULT_UIDX 0xffffff
#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)

View File

@ -1929,10 +1929,8 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
in = kzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto free;
}
if (!in)
return -ENOMEM;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);

View File

@ -479,7 +479,7 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
/* The CQ's CNQ notification counter is checked before
* destroying the CQ in a busy-wait loop that waits for all of
* the CQ's CNQ interrupts to be processed. It is increased
* here, only after the completion handler, to ensure that the
* here, only after the completion handler, to ensure that
* the handler is not running when the CQ is destroyed.
*/
cq->cnq_notif++;

View File

@ -799,12 +799,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
hwerrs &= ~TXE_PIO_PARITY;
}
if (!hwerrs) {
static u32 freeze_cnt;
freeze_cnt++;
if (!hwerrs)
qib_6120_clear_freeze(dd);
} else
else
isfatal = 1;
}

View File

@ -82,7 +82,6 @@ int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd)
struct qib_devdata *dd = rcd->dd;
unsigned i;
unsigned last;
unsigned n = 0;
last = rcd->pio_base + rcd->piocnt;
/*
@ -102,10 +101,8 @@ int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd)
}
spin_lock_irq(&dd->pioavail_lock);
for (i = rcd->pio_base; i < last; i++) {
if (__test_and_clear_bit(i, dd->pio_need_disarm)) {
n++;
if (__test_and_clear_bit(i, dd->pio_need_disarm))
dd->f_sendctrl(rcd->ppd, QIB_SENDCTRL_DISARM_BUF(i));
}
}
spin_unlock_irq(&dd->pioavail_lock);
return 0;

View File

@ -851,7 +851,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
}
/*
* This assignment is a bit strange. it's because the
* This assignment is a bit strange. it's because
* the pbc counts the number of 32 bit words in the full
* packet _except_ the first word of the pbc itself...
*/

View File

@ -187,14 +187,14 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev)
exists = rxe_get_dev_from_net(ndev);
if (exists) {
ib_device_put(&exists->ib_dev);
pr_err("already configured on %s\n", ndev->name);
rxe_dbg(exists, "already configured on %s\n", ndev->name);
err = -EEXIST;
goto err;
}
err = rxe_net_add(ibdev_name, ndev);
if (err) {
pr_err("failed to add %s\n", ndev->name);
rxe_dbg(exists, "failed to add %s\n", ndev->name);
goto err;
}
err:

View File

@ -38,6 +38,25 @@
#define RXE_ROCE_V2_SPORT (0xc000)
#define rxe_dbg(rxe, fmt, ...) ibdev_dbg(&(rxe)->ib_dev, \
"%s: " fmt, __func__, ##__VA_ARGS__)
#define rxe_dbg_uc(uc, fmt, ...) ibdev_dbg((uc)->ibuc.device, \
"uc#%d %s: " fmt, (uc)->elem.index, __func__, ##__VA_ARGS__)
#define rxe_dbg_pd(pd, fmt, ...) ibdev_dbg((pd)->ibpd.device, \
"pd#%d %s: " fmt, (pd)->elem.index, __func__, ##__VA_ARGS__)
#define rxe_dbg_ah(ah, fmt, ...) ibdev_dbg((ah)->ibah.device, \
"ah#%d %s: " fmt, (ah)->elem.index, __func__, ##__VA_ARGS__)
#define rxe_dbg_srq(srq, fmt, ...) ibdev_dbg((srq)->ibsrq.device, \
"srq#%d %s: " fmt, (srq)->elem.index, __func__, ##__VA_ARGS__)
#define rxe_dbg_qp(qp, fmt, ...) ibdev_dbg((qp)->ibqp.device, \
"qp#%d %s: " fmt, (qp)->elem.index, __func__, ##__VA_ARGS__)
#define rxe_dbg_cq(cq, fmt, ...) ibdev_dbg((cq)->ibcq.device, \
"cq#%d %s: " fmt, (cq)->elem.index, __func__, ##__VA_ARGS__)
#define rxe_dbg_mr(mr, fmt, ...) ibdev_dbg((mr)->ibmr.device, \
"mr#%d %s: " fmt, (mr)->elem.index, __func__, ##__VA_ARGS__)
#define rxe_dbg_mw(mw, fmt, ...) ibdev_dbg((mw)->ibmw.device, \
"mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__)
void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name);

View File

@ -14,26 +14,45 @@ void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av)
memcpy(av->dmac, attr->roce.dmac, ETH_ALEN);
}
int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
static int chk_attr(void *obj, struct rdma_ah_attr *attr, bool obj_is_ah)
{
const struct ib_global_route *grh = rdma_ah_read_grh(attr);
struct rxe_port *port;
struct rxe_dev *rxe;
struct rxe_qp *qp;
struct rxe_ah *ah;
int type;
if (obj_is_ah) {
ah = obj;
rxe = to_rdev(ah->ibah.device);
} else {
qp = obj;
rxe = to_rdev(qp->ibqp.device);
}
port = &rxe->port;
if (rdma_ah_get_ah_flags(attr) & IB_AH_GRH) {
if (grh->sgid_index > port->attr.gid_tbl_len) {
pr_warn("invalid sgid index = %d\n",
grh->sgid_index);
if (obj_is_ah)
rxe_dbg_ah(ah, "invalid sgid index = %d\n",
grh->sgid_index);
else
rxe_dbg_qp(qp, "invalid sgid index = %d\n",
grh->sgid_index);
return -EINVAL;
}
type = rdma_gid_attr_network_type(grh->sgid_attr);
if (type < RDMA_NETWORK_IPV4 ||
type > RDMA_NETWORK_IPV6) {
pr_warn("invalid network type for rdma_rxe = %d\n",
type);
if (obj_is_ah)
rxe_dbg_ah(ah, "invalid network type for rdma_rxe = %d\n",
type);
else
rxe_dbg_qp(qp, "invalid network type for rdma_rxe = %d\n",
type);
return -EINVAL;
}
}
@ -41,6 +60,16 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
return 0;
}
int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr)
{
return chk_attr(qp, attr, false);
}
int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr)
{
return chk_attr(ah, attr, true);
}
void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
struct rdma_ah_attr *attr)
{
@ -121,12 +150,12 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp)
/* only new user provider or kernel client */
ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num);
if (!ah) {
pr_warn("Unable to find AH matching ah_num\n");
rxe_dbg_qp(pkt->qp, "Unable to find AH matching ah_num\n");
return NULL;
}
if (rxe_ah_pd(ah) != pkt->qp->pd) {
pr_warn("PDs don't match for AH and QP\n");
rxe_dbg_qp(pkt->qp, "PDs don't match for AH and QP\n");
rxe_put(ah);
return NULL;
}

View File

@ -104,6 +104,8 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV;
case IB_WR_REG_MR: return IB_WC_REG_MR;
case IB_WR_BIND_MW: return IB_WC_BIND_MW;
case IB_WR_ATOMIC_WRITE: return IB_WC_ATOMIC_WRITE;
case IB_WR_FLUSH: return IB_WC_FLUSH;
default:
return 0xff;
@ -114,11 +116,11 @@ void retransmit_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index);
rxe_dbg_qp(qp, "retransmit timer fired\n");
if (qp->valid) {
qp->comp.timeout = 1;
rxe_run_task(&qp->comp.task, 1);
rxe_sched_task(&qp->comp.task);
}
}
@ -132,7 +134,10 @@ void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
if (must_sched != 0)
rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED);
rxe_run_task(&qp->comp.task, must_sched);
if (must_sched)
rxe_sched_task(&qp->comp.task);
else
rxe_run_task(&qp->comp.task);
}
static inline enum comp_state get_wqe(struct rxe_qp *qp,
@ -200,6 +205,10 @@ static inline enum comp_state check_psn(struct rxe_qp *qp,
*/
if (pkt->psn == wqe->last_psn)
return COMPST_COMP_ACK;
else if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE &&
(qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST ||
qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE))
return COMPST_CHECK_ACK;
else
return COMPST_DONE;
} else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) {
@ -228,6 +237,10 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
/* Check NAK code to handle a remote error */
if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE)
break;
if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&
pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {
/* read retries of partial data may restart from
@ -258,12 +271,16 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
if ((syn & AETH_TYPE_MASK) != AETH_ACK)
return COMPST_ERROR;
if (wqe->wr.opcode == IB_WR_ATOMIC_WRITE)
return COMPST_WRITE_SEND;
fallthrough;
/* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH)
*/
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
if (wqe->wr.opcode != IB_WR_RDMA_READ &&
wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) {
wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV &&
wqe->wr.opcode != IB_WR_FLUSH) {
wqe->status = IB_WC_FATAL_ERR;
return COMPST_ERROR;
}
@ -305,7 +322,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
rxe_run_task(&qp->req.task, 0);
rxe_run_task(&qp->req.task);
}
}
return COMPST_ERROR_RETRY;
@ -323,7 +340,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
return COMPST_ERROR;
default:
pr_warn("unexpected nak %x\n", syn);
rxe_dbg_qp(qp, "unexpected nak %x\n", syn);
wqe->status = IB_WC_REM_OP_ERR;
return COMPST_ERROR;
}
@ -334,7 +351,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
break;
default:
pr_warn("unexpected opcode\n");
rxe_dbg_qp(qp, "unexpected opcode\n");
}
return COMPST_ERROR;
@ -452,7 +469,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
*/
if (qp->req.wait_fence) {
qp->req.wait_fence = 0;
rxe_run_task(&qp->req.task, 0);
rxe_run_task(&qp->req.task);
}
}
@ -466,7 +483,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
if (qp->req.need_rd_atomic) {
qp->comp.timeout_retry = 0;
qp->req.need_rd_atomic = 0;
rxe_run_task(&qp->req.task, 0);
rxe_run_task(&qp->req.task);
}
}
@ -512,7 +529,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
rxe_run_task(&qp->req.task, 1);
rxe_sched_task(&qp->req.task);
}
}
@ -587,8 +604,7 @@ int rxe_completer(void *arg)
state = COMPST_GET_ACK;
while (1) {
pr_debug("qp#%d state = %s\n", qp_num(qp),
comp_state_name[state]);
rxe_dbg_qp(qp, "state = %s\n", comp_state_name[state]);
switch (state) {
case COMPST_GET_ACK:
skb = skb_dequeue(&qp->resp_pkts);
@ -646,7 +662,7 @@ int rxe_completer(void *arg)
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
rxe_run_task(&qp->req.task, 1);
rxe_sched_task(&qp->req.task);
}
state = COMPST_DONE;
@ -714,7 +730,7 @@ int rxe_completer(void *arg)
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
qp->comp.started_retry = 1;
rxe_run_task(&qp->req.task, 0);
rxe_run_task(&qp->req.task);
}
goto done;
@ -735,8 +751,7 @@ int rxe_completer(void *arg)
* rnr timer has fired
*/
qp->req.wait_for_rnr_timer = 1;
pr_debug("qp#%d set rnr nak timer\n",
qp_num(qp));
rxe_dbg_qp(qp, "set rnr nak timer\n");
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));

View File

@ -14,12 +14,12 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
int count;
if (cqe <= 0) {
pr_warn("cqe(%d) <= 0\n", cqe);
rxe_dbg(rxe, "cqe(%d) <= 0\n", cqe);
goto err1;
}
if (cqe > rxe->attr.max_cqe) {
pr_debug("cqe(%d) > max_cqe(%d)\n",
rxe_dbg(rxe, "cqe(%d) > max_cqe(%d)\n",
cqe, rxe->attr.max_cqe);
goto err1;
}
@ -27,7 +27,7 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
if (cq) {
count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT);
if (cqe < count) {
pr_debug("cqe(%d) < current # elements in queue (%d)",
rxe_dbg_cq(cq, "cqe(%d) < current # elements in queue (%d)",
cqe, count);
goto err1;
}
@ -65,7 +65,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
cq->queue = rxe_queue_init(rxe, &cqe,
sizeof(struct rxe_cqe), type);
if (!cq->queue) {
pr_warn("unable to create cq\n");
rxe_dbg(rxe, "unable to create cq\n");
return -ENOMEM;
}

View File

@ -607,6 +607,52 @@ static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len)
rxe_opcode[pkt->opcode].offset[RXE_RETH], len);
}
/******************************************************************************
* FLUSH Extended Transport Header
******************************************************************************/
struct rxe_feth {
__be32 bits;
};
#define FETH_PLT_MASK (0x0000000f) /* bits 3-0 */
#define FETH_SEL_MASK (0x00000030) /* bits 5-4 */
#define FETH_SEL_SHIFT (4U)
static inline u32 __feth_plt(void *arg)
{
struct rxe_feth *feth = arg;
return be32_to_cpu(feth->bits) & FETH_PLT_MASK;
}
static inline u32 __feth_sel(void *arg)
{
struct rxe_feth *feth = arg;
return (be32_to_cpu(feth->bits) & FETH_SEL_MASK) >> FETH_SEL_SHIFT;
}
static inline u32 feth_plt(struct rxe_pkt_info *pkt)
{
return __feth_plt(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]);
}
static inline u32 feth_sel(struct rxe_pkt_info *pkt)
{
return __feth_sel(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]);
}
static inline void feth_init(struct rxe_pkt_info *pkt, u8 type, u8 level)
{
struct rxe_feth *feth = (struct rxe_feth *)
(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]);
u32 bits = ((level << FETH_SEL_SHIFT) & FETH_SEL_MASK) |
(type & FETH_PLT_MASK);
feth->bits = cpu_to_be32(bits);
}
/******************************************************************************
* Atomic Extended Transport Header
******************************************************************************/
@ -742,7 +788,6 @@ enum aeth_syndrome {
AETH_NAK_INVALID_REQ = 0x61,
AETH_NAK_REM_ACC_ERR = 0x62,
AETH_NAK_REM_OP_ERR = 0x63,
AETH_NAK_INV_RD_REQ = 0x64,
};
static inline u8 __aeth_syn(void *arg)
@ -910,6 +955,7 @@ enum rxe_hdr_length {
RXE_ATMETH_BYTES = sizeof(struct rxe_atmeth),
RXE_IETH_BYTES = sizeof(struct rxe_ieth),
RXE_RDETH_BYTES = sizeof(struct rxe_rdeth),
RXE_FETH_BYTES = sizeof(struct rxe_feth),
};
static inline size_t header_size(struct rxe_pkt_info *pkt)

View File

@ -21,7 +21,7 @@ int rxe_icrc_init(struct rxe_dev *rxe)
tfm = crypto_alloc_shash("crc32", 0, 0);
if (IS_ERR(tfm)) {
pr_warn("failed to init crc32 algorithm err:%ld\n",
rxe_dbg(rxe, "failed to init crc32 algorithm err: %ld\n",
PTR_ERR(tfm));
return PTR_ERR(tfm);
}
@ -51,7 +51,7 @@ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len)
*(__be32 *)shash_desc_ctx(shash) = crc;
err = crypto_shash_update(shash, next, len);
if (unlikely(err)) {
pr_warn_ratelimited("failed crc calculation, err: %d\n", err);
rxe_dbg(rxe, "failed crc calculation, err: %d\n", err);
return (__force __be32)crc32_le((__force u32)crc, next, len);
}

View File

@ -9,16 +9,12 @@
/* rxe_av.c */
void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av);
int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr);
int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr);
int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr);
void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
struct rdma_ah_attr *attr);
void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr);
void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr);
struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp);
/* rxe_cq.c */
@ -68,6 +64,7 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr);
int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
int access, struct rxe_mr *mr);
int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length);
int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
enum rxe_mr_copy_dir dir);
int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,

View File

@ -79,7 +79,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
/* Don't allow a mmap larger than the object. */
if (size > ip->info.size) {
pr_err("mmap region is larger than the object!\n");
rxe_dbg(rxe, "mmap region is larger than the object!\n");
spin_unlock_bh(&rxe->pending_lock);
ret = -EINVAL;
goto done;
@ -87,7 +87,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
goto found_it;
}
pr_warn("unable to find pending mmap info\n");
rxe_dbg(rxe, "unable to find pending mmap info\n");
spin_unlock_bh(&rxe->pending_lock);
ret = -EINVAL;
goto done;
@ -98,7 +98,7 @@ found_it:
ret = remap_vmalloc_range(vma, ip->obj, 0);
if (ret) {
pr_err("err %d from remap_vmalloc_range\n", ret);
rxe_dbg(rxe, "err %d from remap_vmalloc_range\n", ret);
goto done;
}

View File

@ -4,6 +4,8 @@
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*/
#include <linux/libnvdimm.h>
#include "rxe.h"
#include "rxe_loc.h"
@ -26,7 +28,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
{
switch (mr->type) {
switch (mr->ibmr.type) {
case IB_MR_TYPE_DMA:
return 0;
@ -38,8 +40,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
return 0;
default:
pr_warn("%s: mr type (%d) not supported\n",
__func__, mr->type);
rxe_dbg_mr(mr, "type (%d) not supported\n", mr->ibmr.type);
return -EFAULT;
}
}
@ -62,7 +63,6 @@ static void rxe_mr_init(int access, struct rxe_mr *mr)
mr->rkey = mr->ibmr.rkey = rkey;
mr->state = RXE_MR_STATE_INVALID;
mr->map_shift = ilog2(RXE_BUF_PER_MAP);
}
static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
@ -99,6 +99,7 @@ err2:
kfree(mr->map[i]);
kfree(mr->map);
mr->map = NULL;
err1:
return -ENOMEM;
}
@ -109,7 +110,16 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr)
mr->access = access;
mr->state = RXE_MR_STATE_VALID;
mr->type = IB_MR_TYPE_DMA;
mr->ibmr.type = IB_MR_TYPE_DMA;
}
static bool is_pmem_page(struct page *pg)
{
unsigned long paddr = page_to_phys(pg);
return REGION_INTERSECTS ==
region_intersects(paddr, PAGE_SIZE, IORESOURCE_MEM,
IORES_DESC_PERSISTENT_MEMORY);
}
int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
@ -122,12 +132,11 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
int num_buf;
void *vaddr;
int err;
int i;
umem = ib_umem_get(&rxe->ib_dev, start, length, access);
if (IS_ERR(umem)) {
pr_warn("%s: Unable to pin memory region err = %d\n",
__func__, (int)PTR_ERR(umem));
rxe_dbg_mr(mr, "Unable to pin memory region err = %d\n",
(int)PTR_ERR(umem));
err = PTR_ERR(umem);
goto err_out;
}
@ -138,8 +147,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
err = rxe_mr_alloc(mr, num_buf);
if (err) {
pr_warn("%s: Unable to allocate memory for map\n",
__func__);
rxe_dbg_mr(mr, "Unable to allocate memory for map\n");
goto err_release_umem;
}
@ -149,23 +157,30 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
num_buf = 0;
map = mr->map;
if (length > 0) {
buf = map[0]->buf;
bool persistent_access = access & IB_ACCESS_FLUSH_PERSISTENT;
buf = map[0]->buf;
for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
struct page *pg = sg_page_iter_page(&sg_iter);
if (persistent_access && !is_pmem_page(pg)) {
rxe_dbg_mr(mr, "Unable to register persistent access to non-pmem device\n");
err = -EINVAL;
goto err_release_umem;
}
if (num_buf >= RXE_BUF_PER_MAP) {
map++;
buf = map[0]->buf;
num_buf = 0;
}
vaddr = page_address(sg_page_iter_page(&sg_iter));
vaddr = page_address(pg);
if (!vaddr) {
pr_warn("%s: Unable to get virtual address\n",
__func__);
rxe_dbg_mr(mr, "Unable to get virtual address\n");
err = -ENOMEM;
goto err_cleanup_map;
goto err_release_umem;
}
buf->addr = (uintptr_t)vaddr;
buf->size = PAGE_SIZE;
num_buf++;
@ -178,14 +193,11 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
mr->access = access;
mr->offset = ib_umem_offset(umem);
mr->state = RXE_MR_STATE_VALID;
mr->type = IB_MR_TYPE_USER;
mr->ibmr.type = IB_MR_TYPE_USER;
mr->ibmr.page_size = PAGE_SIZE;
return 0;
err_cleanup_map:
for (i = 0; i < mr->num_map; i++)
kfree(mr->map[i]);
kfree(mr->map);
err_release_umem:
ib_umem_release(umem);
err_out:
@ -205,7 +217,7 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
mr->max_buf = max_pages;
mr->state = RXE_MR_STATE_FREE;
mr->type = IB_MR_TYPE_MEM_REG;
mr->ibmr.type = IB_MR_TYPE_MEM_REG;
return 0;
@ -256,7 +268,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
void *addr;
if (mr->state != RXE_MR_STATE_VALID) {
pr_warn("mr not in valid state\n");
rxe_dbg_mr(mr, "Not in valid state\n");
addr = NULL;
goto out;
}
@ -267,7 +279,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
}
if (mr_check_range(mr, iova, length)) {
pr_warn("range violation\n");
rxe_dbg_mr(mr, "Range violation\n");
addr = NULL;
goto out;
}
@ -275,7 +287,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
lookup_iova(mr, iova, &m, &n, &offset);
if (offset + length > mr->map[m]->buf[n].size) {
pr_warn("crosses page boundary\n");
rxe_dbg_mr(mr, "Crosses page boundary\n");
addr = NULL;
goto out;
}
@ -286,6 +298,39 @@ out:
return addr;
}
int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length)
{
size_t offset;
if (length == 0)
return 0;
if (mr->ibmr.type == IB_MR_TYPE_DMA)
return -EFAULT;
offset = (iova - mr->ibmr.iova + mr->offset) & mr->page_mask;
while (length > 0) {
u8 *va;
int bytes;
bytes = mr->ibmr.page_size - offset;
if (bytes > length)
bytes = length;
va = iova_to_vaddr(mr, iova, length);
if (!va)
return -EFAULT;
arch_wb_cache_pmem(va, bytes);
length -= bytes;
iova += bytes;
offset = 0;
}
return 0;
}
/* copy data from a range (vaddr, vaddr+length-1) to or from
* a mr object starting at iova.
*/
@ -304,7 +349,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
if (length == 0)
return 0;
if (mr->type == IB_MR_TYPE_DMA) {
if (mr->ibmr.type == IB_MR_TYPE_DMA) {
u8 *src, *dest;
src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
@ -511,7 +556,7 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
(type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
mr_pd(mr) != pd || (access && !(access & mr->access)) ||
mr_pd(mr) != pd || ((access & mr->access) != access) ||
mr->state != RXE_MR_STATE_VALID)) {
rxe_put(mr);
mr = NULL;
@ -528,27 +573,26 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
if (!mr) {
pr_err("%s: No MR for key %#x\n", __func__, key);
rxe_dbg_qp(qp, "No MR for key %#x\n", key);
ret = -EINVAL;
goto err;
}
if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
__func__, key, (mr->rkey ? mr->rkey : mr->lkey));
rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n",
key, (mr->rkey ? mr->rkey : mr->lkey));
ret = -EINVAL;
goto err_drop_ref;
}
if (atomic_read(&mr->num_mw) > 0) {
pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
__func__);
rxe_dbg_mr(mr, "Attempt to invalidate an MR while bound to MWs\n");
ret = -EINVAL;
goto err_drop_ref;
}
if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
if (unlikely(mr->ibmr.type != IB_MR_TYPE_MEM_REG)) {
rxe_dbg_mr(mr, "Type (%d) is wrong\n", mr->ibmr.type);
ret = -EINVAL;
goto err_drop_ref;
}
@ -577,22 +621,20 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
/* user can only register MR in free state */
if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
pr_warn("%s: mr->lkey = 0x%x not free\n",
__func__, mr->lkey);
rxe_dbg_mr(mr, "mr->lkey = 0x%x not free\n", mr->lkey);
return -EINVAL;
}
/* user can only register mr with qp in same protection domain */
if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
pr_warn("%s: qp->pd and mr->pd don't match\n",
__func__);
rxe_dbg_mr(mr, "qp->pd and mr->pd don't match\n");
return -EINVAL;
}
/* user is only allowed to change key portion of l/rkey */
if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
__func__, key, mr->lkey);
rxe_dbg_mr(mr, "key = 0x%x has wrong index mr->lkey = 0x%x\n",
key, mr->lkey);
return -EINVAL;
}

View File

@ -52,14 +52,14 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
{
if (mw->ibmw.type == IB_MW_TYPE_1) {
if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
pr_err_once(
rxe_dbg_mw(mw,
"attempt to bind a type 1 MW not in the valid state\n");
return -EINVAL;
}
/* o10-36.2.2 */
if (unlikely((mw->access & IB_ZERO_BASED))) {
pr_err_once("attempt to bind a zero based type 1 MW\n");
rxe_dbg_mw(mw, "attempt to bind a zero based type 1 MW\n");
return -EINVAL;
}
}
@ -67,21 +67,21 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (mw->ibmw.type == IB_MW_TYPE_2) {
/* o10-37.2.30 */
if (unlikely(mw->state != RXE_MW_STATE_FREE)) {
pr_err_once(
rxe_dbg_mw(mw,
"attempt to bind a type 2 MW not in the free state\n");
return -EINVAL;
}
/* C10-72 */
if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) {
pr_err_once(
rxe_dbg_mw(mw,
"attempt to bind type 2 MW with qp with different PD\n");
return -EINVAL;
}
/* o10-37.2.40 */
if (unlikely(!mr || wqe->wr.wr.mw.length == 0)) {
pr_err_once(
rxe_dbg_mw(mw,
"attempt to invalidate type 2 MW by binding with NULL or zero length MR\n");
return -EINVAL;
}
@ -92,13 +92,13 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
return 0;
if (unlikely(mr->access & IB_ZERO_BASED)) {
pr_err_once("attempt to bind MW to zero based MR\n");
rxe_dbg_mw(mw, "attempt to bind MW to zero based MR\n");
return -EINVAL;
}
/* C10-73 */
if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) {
pr_err_once(
rxe_dbg_mw(mw,
"attempt to bind an MW to an MR without bind access\n");
return -EINVAL;
}
@ -107,7 +107,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (unlikely((mw->access &
(IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
!(mr->access & IB_ACCESS_LOCAL_WRITE))) {
pr_err_once(
rxe_dbg_mw(mw,
"attempt to bind an Writable MW to an MR without local write access\n");
return -EINVAL;
}
@ -115,7 +115,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
/* C10-75 */
if (mw->access & IB_ZERO_BASED) {
if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) {
pr_err_once(
rxe_dbg_mw(mw,
"attempt to bind a ZB MW outside of the MR\n");
return -EINVAL;
}
@ -123,7 +123,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (unlikely((wqe->wr.wr.mw.addr < mr->ibmr.iova) ||
((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) >
(mr->ibmr.iova + mr->ibmr.length)))) {
pr_err_once(
rxe_dbg_mw(mw,
"attempt to bind a VA MW outside of the MR\n");
return -EINVAL;
}
@ -293,8 +293,7 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
if (unlikely((mw->rkey != rkey) || rxe_mw_pd(mw) != pd ||
(mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) ||
(mw->length == 0) ||
(access && !(access & mw->access)) ||
(mw->length == 0) || ((access & mw->access) != access) ||
mw->state != RXE_MW_STATE_VALID)) {
rxe_put(mw);
return NULL;

View File

@ -20,9 +20,10 @@
static struct rxe_recv_sockets recv_sockets;
static struct dst_entry *rxe_find_route4(struct net_device *ndev,
struct in_addr *saddr,
struct in_addr *daddr)
static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
struct net_device *ndev,
struct in_addr *saddr,
struct in_addr *daddr)
{
struct rtable *rt;
struct flowi4 fl = { { 0 } };
@ -35,7 +36,7 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev,
rt = ip_route_output_key(&init_net, &fl);
if (IS_ERR(rt)) {
pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr);
rxe_dbg_qp(qp, "no route to %pI4\n", &daddr->s_addr);
return NULL;
}
@ -43,7 +44,8 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev,
}
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *rxe_find_route6(struct net_device *ndev,
static struct dst_entry *rxe_find_route6(struct rxe_qp *qp,
struct net_device *ndev,
struct in6_addr *saddr,
struct in6_addr *daddr)
{
@ -60,12 +62,12 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
recv_sockets.sk6->sk, &fl6,
NULL);
if (IS_ERR(ndst)) {
pr_err_ratelimited("no route to %pI6\n", daddr);
rxe_dbg_qp(qp, "no route to %pI6\n", daddr);
return NULL;
}
if (unlikely(ndst->error)) {
pr_err("no route to %pI6\n", daddr);
rxe_dbg_qp(qp, "no route to %pI6\n", daddr);
goto put;
}
@ -77,7 +79,8 @@ put:
#else
static struct dst_entry *rxe_find_route6(struct net_device *ndev,
static struct dst_entry *rxe_find_route6(struct rxe_qp *qp,
struct net_device *ndev,
struct in6_addr *saddr,
struct in6_addr *daddr)
{
@ -105,14 +108,14 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev,
saddr = &av->sgid_addr._sockaddr_in.sin_addr;
daddr = &av->dgid_addr._sockaddr_in.sin_addr;
dst = rxe_find_route4(ndev, saddr, daddr);
dst = rxe_find_route4(qp, ndev, saddr, daddr);
} else if (av->network_type == RXE_NETWORK_TYPE_IPV6) {
struct in6_addr *saddr6;
struct in6_addr *daddr6;
saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr;
daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr;
dst = rxe_find_route6(ndev, saddr6, daddr6);
dst = rxe_find_route6(qp, ndev, saddr6, daddr6);
#if IS_ENABLED(CONFIG_IPV6)
if (dst)
qp->dst_cookie =
@ -282,7 +285,7 @@ static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt,
dst = rxe_find_route(skb->dev, qp, av);
if (!dst) {
pr_err("Host not reachable\n");
rxe_dbg_qp(qp, "Host not reachable\n");
return -EHOSTUNREACH;
}
@ -306,7 +309,7 @@ static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt,
dst = rxe_find_route(skb->dev, qp, av);
if (!dst) {
pr_err("Host not reachable\n");
rxe_dbg_qp(qp, "Host not reachable\n");
return -EHOSTUNREACH;
}
@ -345,7 +348,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
if (unlikely(qp->need_req_skb &&
skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
rxe_run_task(&qp->req.task, 1);
rxe_sched_task(&qp->req.task);
rxe_put(qp);
}
@ -365,7 +368,8 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
} else if (skb->protocol == htons(ETH_P_IPV6)) {
err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
} else {
pr_err("Unknown layer 3 protocol: %d\n", skb->protocol);
rxe_dbg_qp(pkt->qp, "Unknown layer 3 protocol: %d\n",
skb->protocol);
atomic_dec(&pkt->qp->skb_out);
rxe_put(pkt->qp);
kfree_skb(skb);
@ -373,7 +377,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
}
if (unlikely(net_xmit_eval(err))) {
pr_debug("error sending packet: %d\n", err);
rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err);
return -EAGAIN;
}
@ -411,7 +415,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
if ((is_request && (qp->req.state != QP_STATE_READY)) ||
(!is_request && (qp->resp.state != QP_STATE_READY))) {
pr_info("Packet dropped. QP is not in ready state\n");
rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n");
goto drop;
}
@ -429,7 +433,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
if ((qp_type(qp) != IB_QPT_RC) &&
(pkt->mask & RXE_END_MASK)) {
pkt->wqe->state = wqe_state_done;
rxe_run_task(&qp->comp.task, 1);
rxe_sched_task(&qp->comp.task);
}
rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
@ -592,7 +596,7 @@ static int rxe_notify(struct notifier_block *not_blk,
rxe_port_down(rxe);
break;
case NETDEV_CHANGEMTU:
pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu);
rxe_dbg(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu);
rxe_set_mtu(rxe, ndev->mtu);
break;
case NETDEV_CHANGE:
@ -604,7 +608,7 @@ static int rxe_notify(struct notifier_block *not_blk,
case NETDEV_CHANGENAME:
case NETDEV_FEAT_CHANGE:
default:
pr_info("ignoring netdev event = %ld for %s\n",
rxe_dbg(rxe, "ignoring netdev event = %ld for %s\n",
event, ndev->name);
break;
}

View File

@ -101,6 +101,18 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
[IB_QPT_UC] = WR_LOCAL_OP_MASK,
},
},
[IB_WR_FLUSH] = {
.name = "IB_WR_FLUSH",
.mask = {
[IB_QPT_RC] = WR_FLUSH_MASK,
},
},
[IB_WR_ATOMIC_WRITE] = {
.name = "IB_WR_ATOMIC_WRITE",
.mask = {
[IB_QPT_RC] = WR_ATOMIC_WRITE_MASK,
},
},
};
struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
@ -378,6 +390,29 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
RXE_IETH_BYTES,
}
},
[IB_OPCODE_RC_FLUSH] = {
.name = "IB_OPCODE_RC_FLUSH",
.mask = RXE_FETH_MASK | RXE_RETH_MASK | RXE_FLUSH_MASK |
RXE_START_MASK | RXE_END_MASK | RXE_REQ_MASK,
.length = RXE_BTH_BYTES + RXE_FETH_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_FETH] = RXE_BTH_BYTES,
[RXE_RETH] = RXE_BTH_BYTES + RXE_FETH_BYTES,
}
},
[IB_OPCODE_RC_ATOMIC_WRITE] = {
.name = "IB_OPCODE_RC_ATOMIC_WRITE",
.mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
RXE_ATOMIC_WRITE_MASK | RXE_START_MASK |
RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES + RXE_RETH_BYTES,
}
},
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = {

View File

@ -20,6 +20,8 @@ enum rxe_wr_mask {
WR_READ_MASK = BIT(3),
WR_WRITE_MASK = BIT(4),
WR_LOCAL_OP_MASK = BIT(5),
WR_FLUSH_MASK = BIT(6),
WR_ATOMIC_WRITE_MASK = BIT(7),
WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK,
WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK,
@ -47,6 +49,7 @@ enum rxe_hdr_type {
RXE_RDETH,
RXE_DETH,
RXE_IMMDT,
RXE_FETH,
RXE_PAYLOAD,
NUM_HDR_TYPES
};
@ -63,6 +66,7 @@ enum rxe_hdr_mask {
RXE_IETH_MASK = BIT(RXE_IETH),
RXE_RDETH_MASK = BIT(RXE_RDETH),
RXE_DETH_MASK = BIT(RXE_DETH),
RXE_FETH_MASK = BIT(RXE_FETH),
RXE_PAYLOAD_MASK = BIT(RXE_PAYLOAD),
RXE_REQ_MASK = BIT(NUM_HDR_TYPES + 0),
@ -71,16 +75,19 @@ enum rxe_hdr_mask {
RXE_WRITE_MASK = BIT(NUM_HDR_TYPES + 3),
RXE_READ_MASK = BIT(NUM_HDR_TYPES + 4),
RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5),
RXE_FLUSH_MASK = BIT(NUM_HDR_TYPES + 6),
RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 6),
RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 7),
RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 7),
RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 8),
RXE_START_MASK = BIT(NUM_HDR_TYPES + 8),
RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9),
RXE_END_MASK = BIT(NUM_HDR_TYPES + 10),
RXE_START_MASK = BIT(NUM_HDR_TYPES + 9),
RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 10),
RXE_END_MASK = BIT(NUM_HDR_TYPES + 11),
RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12),
RXE_ATOMIC_WRITE_MASK = BIT(NUM_HDR_TYPES + 14),
RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK),
RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK),
RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK),

View File

@ -51,7 +51,14 @@ enum rxe_device_param {
| IB_DEVICE_SRQ_RESIZE
| IB_DEVICE_MEM_MGT_EXTENSIONS
| IB_DEVICE_MEM_WINDOW
| IB_DEVICE_FLUSH_GLOBAL
| IB_DEVICE_FLUSH_PERSISTENT
#ifdef CONFIG_64BIT
| IB_DEVICE_MEM_WINDOW_TYPE_2B
| IB_DEVICE_ATOMIC_WRITE,
#else
| IB_DEVICE_MEM_WINDOW_TYPE_2B,
#endif /* CONFIG_64BIT */
RXE_MAX_SGE = 32,
RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) +
sizeof(struct ib_sge) * RXE_MAX_SGE,

View File

@ -19,33 +19,33 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
int has_srq)
{
if (cap->max_send_wr > rxe->attr.max_qp_wr) {
pr_debug("invalid send wr = %u > %d\n",
rxe_dbg(rxe, "invalid send wr = %u > %d\n",
cap->max_send_wr, rxe->attr.max_qp_wr);
goto err1;
}
if (cap->max_send_sge > rxe->attr.max_send_sge) {
pr_debug("invalid send sge = %u > %d\n",
rxe_dbg(rxe, "invalid send sge = %u > %d\n",
cap->max_send_sge, rxe->attr.max_send_sge);
goto err1;
}
if (!has_srq) {
if (cap->max_recv_wr > rxe->attr.max_qp_wr) {
pr_debug("invalid recv wr = %u > %d\n",
rxe_dbg(rxe, "invalid recv wr = %u > %d\n",
cap->max_recv_wr, rxe->attr.max_qp_wr);
goto err1;
}
if (cap->max_recv_sge > rxe->attr.max_recv_sge) {
pr_debug("invalid recv sge = %u > %d\n",
rxe_dbg(rxe, "invalid recv sge = %u > %d\n",
cap->max_recv_sge, rxe->attr.max_recv_sge);
goto err1;
}
}
if (cap->max_inline_data > rxe->max_inline_data) {
pr_debug("invalid max inline data = %u > %d\n",
rxe_dbg(rxe, "invalid max inline data = %u > %d\n",
cap->max_inline_data, rxe->max_inline_data);
goto err1;
}
@ -73,7 +73,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
}
if (!init->recv_cq || !init->send_cq) {
pr_debug("missing cq\n");
rxe_dbg(rxe, "missing cq\n");
goto err1;
}
@ -82,14 +82,14 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
if (init->qp_type == IB_QPT_GSI) {
if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) {
pr_debug("invalid port = %d\n", port_num);
rxe_dbg(rxe, "invalid port = %d\n", port_num);
goto err1;
}
port = &rxe->port;
if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {
pr_debug("GSI QP exists for port %d\n", port_num);
rxe_dbg(rxe, "GSI QP exists for port %d\n", port_num);
goto err1;
}
}
@ -172,10 +172,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
spin_lock_init(&qp->state_lock);
spin_lock_init(&qp->req.task.state_lock);
spin_lock_init(&qp->resp.task.state_lock);
spin_lock_init(&qp->comp.task.state_lock);
spin_lock_init(&qp->sq.sq_lock);
spin_lock_init(&qp->rq.producer_lock);
spin_lock_init(&qp->rq.consumer_lock);
@ -242,10 +238,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
skb_queue_head_init(&qp->req_pkts);
rxe_init_task(&qp->req.task, qp,
rxe_requester, "req");
rxe_init_task(&qp->comp.task, qp,
rxe_completer, "comp");
rxe_init_task(&qp->req.task, qp, rxe_requester);
rxe_init_task(&qp->comp.task, qp, rxe_completer);
qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
if (init->qp_type == IB_QPT_RC) {
@ -270,9 +264,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
wqe_size = rcv_wqe_size(qp->rq.max_sge);
pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n",
qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size);
type = QUEUE_TYPE_FROM_CLIENT;
qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr,
wqe_size, type);
@ -292,8 +283,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
skb_queue_head_init(&qp->resp_pkts);
rxe_init_task(&qp->resp.task, qp,
rxe_responder, "resp");
rxe_init_task(&qp->resp.task, qp, rxe_responder);
qp->resp.opcode = OPCODE_NONE;
qp->resp.msn = 0;
@ -402,7 +392,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) {
pr_debug("invalid mask or state for qp\n");
rxe_dbg_qp(qp, "invalid mask or state\n");
goto err1;
}
@ -416,7 +406,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_PORT) {
if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) {
pr_debug("invalid port %d\n", attr->port_num);
rxe_dbg_qp(qp, "invalid port %d\n", attr->port_num);
goto err1;
}
}
@ -424,18 +414,18 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq))
goto err1;
if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr))
if (mask & IB_QP_AV && rxe_av_chk_attr(qp, &attr->ah_attr))
goto err1;
if (mask & IB_QP_ALT_PATH) {
if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
if (rxe_av_chk_attr(qp, &attr->alt_ah_attr))
goto err1;
if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) {
pr_debug("invalid alt port %d\n", attr->alt_port_num);
rxe_dbg_qp(qp, "invalid alt port %d\n", attr->alt_port_num);
goto err1;
}
if (attr->alt_timeout > 31) {
pr_debug("invalid QP alt timeout %d > 31\n",
rxe_dbg_qp(qp, "invalid alt timeout %d > 31\n",
attr->alt_timeout);
goto err1;
}
@ -448,7 +438,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
enum ib_mtu mtu = attr->path_mtu;
if (mtu > max_mtu) {
pr_debug("invalid mtu (%d) > (%d)\n",
rxe_dbg_qp(qp, "invalid mtu (%d) > (%d)\n",
ib_mtu_enum_to_int(mtu),
ib_mtu_enum_to_int(max_mtu));
goto err1;
@ -457,7 +447,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) {
pr_debug("invalid max_rd_atomic %d > %d\n",
rxe_dbg_qp(qp, "invalid max_rd_atomic %d > %d\n",
attr->max_rd_atomic,
rxe->attr.max_qp_rd_atom);
goto err1;
@ -466,7 +456,8 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_TIMEOUT) {
if (attr->timeout > 31) {
pr_debug("invalid QP timeout %d > 31\n", attr->timeout);
rxe_dbg_qp(qp, "invalid timeout %d > 31\n",
attr->timeout);
goto err1;
}
}
@ -543,10 +534,10 @@ static void rxe_qp_drain(struct rxe_qp *qp)
if (qp->req.state != QP_STATE_DRAINED) {
qp->req.state = QP_STATE_DRAIN;
if (qp_type(qp) == IB_QPT_RC)
rxe_run_task(&qp->comp.task, 1);
rxe_sched_task(&qp->comp.task);
else
__rxe_do_task(&qp->comp.task);
rxe_run_task(&qp->req.task, 1);
rxe_sched_task(&qp->req.task);
}
}
}
@ -560,13 +551,13 @@ void rxe_qp_error(struct rxe_qp *qp)
qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */
rxe_run_task(&qp->resp.task, 1);
rxe_sched_task(&qp->resp.task);
if (qp_type(qp) == IB_QPT_RC)
rxe_run_task(&qp->comp.task, 1);
rxe_sched_task(&qp->comp.task);
else
__rxe_do_task(&qp->comp.task);
rxe_run_task(&qp->req.task, 1);
rxe_sched_task(&qp->req.task);
}
/* called by the modify qp verb */
@ -644,27 +635,24 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
if (mask & IB_QP_RETRY_CNT) {
qp->attr.retry_cnt = attr->retry_cnt;
qp->comp.retry_cnt = attr->retry_cnt;
pr_debug("qp#%d set retry count = %d\n", qp_num(qp),
attr->retry_cnt);
rxe_dbg_qp(qp, "set retry count = %d\n", attr->retry_cnt);
}
if (mask & IB_QP_RNR_RETRY) {
qp->attr.rnr_retry = attr->rnr_retry;
qp->comp.rnr_retry = attr->rnr_retry;
pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp),
attr->rnr_retry);
rxe_dbg_qp(qp, "set rnr retry count = %d\n", attr->rnr_retry);
}
if (mask & IB_QP_RQ_PSN) {
qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK);
qp->resp.psn = qp->attr.rq_psn;
pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp),
qp->resp.psn);
rxe_dbg_qp(qp, "set resp psn = 0x%x\n", qp->resp.psn);
}
if (mask & IB_QP_MIN_RNR_TIMER) {
qp->attr.min_rnr_timer = attr->min_rnr_timer;
pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp),
rxe_dbg_qp(qp, "set min rnr timer = 0x%x\n",
attr->min_rnr_timer);
}
@ -672,7 +660,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK);
qp->req.psn = qp->attr.sq_psn;
qp->comp.psn = qp->attr.sq_psn;
pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn);
rxe_dbg_qp(qp, "set req psn = 0x%x\n", qp->req.psn);
}
if (mask & IB_QP_PATH_MIG_STATE)
@ -686,40 +674,40 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
switch (attr->qp_state) {
case IB_QPS_RESET:
pr_debug("qp#%d state -> RESET\n", qp_num(qp));
rxe_dbg_qp(qp, "state -> RESET\n");
rxe_qp_reset(qp);
break;
case IB_QPS_INIT:
pr_debug("qp#%d state -> INIT\n", qp_num(qp));
rxe_dbg_qp(qp, "state -> INIT\n");
qp->req.state = QP_STATE_INIT;
qp->resp.state = QP_STATE_INIT;
qp->comp.state = QP_STATE_INIT;
break;
case IB_QPS_RTR:
pr_debug("qp#%d state -> RTR\n", qp_num(qp));
rxe_dbg_qp(qp, "state -> RTR\n");
qp->resp.state = QP_STATE_READY;
break;
case IB_QPS_RTS:
pr_debug("qp#%d state -> RTS\n", qp_num(qp));
rxe_dbg_qp(qp, "state -> RTS\n");
qp->req.state = QP_STATE_READY;
qp->comp.state = QP_STATE_READY;
break;
case IB_QPS_SQD:
pr_debug("qp#%d state -> SQD\n", qp_num(qp));
rxe_dbg_qp(qp, "state -> SQD\n");
rxe_qp_drain(qp);
break;
case IB_QPS_SQE:
pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp));
rxe_dbg_qp(qp, "state -> SQE !!?\n");
/* Not possible from modify_qp. */
break;
case IB_QPS_ERR:
pr_debug("qp#%d state -> ERR\n", qp_num(qp));
rxe_dbg_qp(qp, "state -> ERR\n");
rxe_qp_error(qp);
break;
}
@ -759,7 +747,7 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
attr->sq_draining = 0;
}
pr_debug("attr->sq_draining = %d\n", attr->sq_draining);
rxe_dbg_qp(qp, "attr->sq_draining = %d\n", attr->sq_draining);
return 0;
}
@ -771,7 +759,7 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp)
* will fail immediately.
*/
if (atomic_read(&qp->mcg_num)) {
pr_debug("Attempt to destroy QP while attached to multicast group\n");
rxe_dbg_qp(qp, "Attempt to destroy while attached to multicast group\n");
return -EBUSY;
}
@ -829,12 +817,12 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
if (qp->resp.mr)
rxe_put(qp->resp.mr);
if (qp_type(qp) == IB_QPT_RC)
sk_dst_reset(qp->sk->sk);
free_rd_atomic_resources(qp);
if (qp->sk) {
if (qp_type(qp) == IB_QPT_RC)
sk_dst_reset(qp->sk->sk);
kernel_sock_shutdown(qp->sk, SHUT_RDWR);
sock_release(qp->sk);
}

View File

@ -100,12 +100,12 @@ void rnr_nak_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp));
rxe_dbg_qp(qp, "nak timer fired\n");
/* request a send queue retry */
qp->req.need_retry = 1;
qp->req.wait_for_rnr_timer = 0;
rxe_run_task(&qp->req.task, 1);
rxe_sched_task(&qp->req.task);
}
static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
@ -241,6 +241,9 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE :
IB_OPCODE_RC_SEND_FIRST;
case IB_WR_FLUSH:
return IB_OPCODE_RC_FLUSH;
case IB_WR_RDMA_READ:
return IB_OPCODE_RC_RDMA_READ_REQUEST;
@ -258,6 +261,10 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
else
return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :
IB_OPCODE_RC_SEND_FIRST;
case IB_WR_ATOMIC_WRITE:
return IB_OPCODE_RC_ATOMIC_WRITE;
case IB_WR_REG_MR:
case IB_WR_LOCAL_INV:
return opcode;
@ -421,11 +428,18 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
/* init optional headers */
if (pkt->mask & RXE_RETH_MASK) {
reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
if (pkt->mask & RXE_FETH_MASK)
reth_set_rkey(pkt, ibwr->wr.flush.rkey);
else
reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
reth_set_va(pkt, wqe->iova);
reth_set_len(pkt, wqe->dma.resid);
}
/* Fill Flush Extension Transport Header */
if (pkt->mask & RXE_FETH_MASK)
feth_init(pkt, ibwr->wr.flush.type, ibwr->wr.flush.level);
if (pkt->mask & RXE_IMMDT_MASK)
immdt_set_imm(pkt, ibwr->ex.imm_data);
@ -484,6 +498,14 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
memset(pad, 0, bth_pad(pkt));
}
} else if (pkt->mask & RXE_FLUSH_MASK) {
/* oA19-2: shall have no payload. */
wqe->dma.resid = 0;
}
if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
memcpy(payload_addr(pkt), wqe->dma.atomic_wr, payload);
wqe->dma.resid -= payload;
}
return 0;
@ -595,7 +617,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
}
break;
default:
pr_err("Unexpected send wqe opcode %d\n", opcode);
rxe_dbg_qp(qp, "Unexpected send wqe opcode %d\n", opcode);
wqe->status = IB_WC_LOC_QP_OP_ERR;
return -EINVAL;
}
@ -608,7 +630,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
* which can lead to a deadlock. So go ahead and complete
* it now.
*/
rxe_run_task(&qp->comp.task, 1);
rxe_sched_task(&qp->comp.task);
return 0;
}
@ -709,13 +731,15 @@ int rxe_requester(void *arg)
}
mask = rxe_opcode[opcode].mask;
if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) {
if (unlikely(mask & (RXE_READ_OR_ATOMIC_MASK |
RXE_ATOMIC_WRITE_MASK))) {
if (check_init_depth(qp, wqe))
goto exit;
}
mtu = get_mtu(qp);
payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0;
payload = (mask & (RXE_WRITE_OR_SEND_MASK | RXE_ATOMIC_WRITE_MASK)) ?
wqe->dma.resid : 0;
if (payload > mtu) {
if (qp_type(qp) == IB_QPT_UD) {
/* C10-93.1.1: If the total sum of all the buffer lengths specified for a
@ -733,7 +757,7 @@ int rxe_requester(void *arg)
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
rxe_run_task(&qp->comp.task, 0);
rxe_run_task(&qp->comp.task);
goto done;
}
payload = mtu;
@ -748,14 +772,14 @@ int rxe_requester(void *arg)
av = rxe_get_av(&pkt, &ah);
if (unlikely(!av)) {
pr_err("qp#%d Failed no address vector\n", qp_num(qp));
rxe_dbg_qp(qp, "Failed no address vector\n");
wqe->status = IB_WC_LOC_QP_OP_ERR;
goto err;
}
skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt);
if (unlikely(!skb)) {
pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
rxe_dbg_qp(qp, "Failed allocating skb\n");
wqe->status = IB_WC_LOC_QP_OP_ERR;
if (ah)
rxe_put(ah);
@ -764,7 +788,7 @@ int rxe_requester(void *arg)
err = finish_packet(qp, av, wqe, &pkt, skb, payload);
if (unlikely(err)) {
pr_debug("qp#%d Error during finish packet\n", qp_num(qp));
rxe_dbg_qp(qp, "Error during finish packet\n");
if (err == -EFAULT)
wqe->status = IB_WC_LOC_PROT_ERR;
else
@ -795,7 +819,7 @@ int rxe_requester(void *arg)
rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
if (err == -EAGAIN) {
rxe_run_task(&qp->req.task, 1);
rxe_sched_task(&qp->req.task);
goto exit;
}
@ -817,7 +841,7 @@ err:
qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
wqe->state = wqe_state_error;
qp->req.state = QP_STATE_ERROR;
rxe_run_task(&qp->comp.task, 0);
rxe_run_task(&qp->comp.task);
exit:
ret = -EAGAIN;
out:

View File

@ -22,6 +22,8 @@ enum resp_states {
RESPST_EXECUTE,
RESPST_READ_REPLY,
RESPST_ATOMIC_REPLY,
RESPST_ATOMIC_WRITE_REPLY,
RESPST_PROCESS_FLUSH,
RESPST_COMPLETE,
RESPST_ACKNOWLEDGE,
RESPST_CLEANUP,
@ -57,6 +59,8 @@ static char *resp_state_name[] = {
[RESPST_EXECUTE] = "EXECUTE",
[RESPST_READ_REPLY] = "READ_REPLY",
[RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY",
[RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY",
[RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH",
[RESPST_COMPLETE] = "COMPLETE",
[RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",
[RESPST_CLEANUP] = "CLEANUP",
@ -91,7 +95,10 @@ void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
(skb_queue_len(&qp->req_pkts) > 1);
rxe_run_task(&qp->resp.task, must_sched);
if (must_sched)
rxe_sched_task(&qp->resp.task);
else
rxe_run_task(&qp->resp.task);
}
static inline enum resp_states get_req(struct rxe_qp *qp,
@ -253,19 +260,37 @@ static enum resp_states check_op_seq(struct rxe_qp *qp,
}
}
static bool check_qp_attr_access(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
if (((pkt->mask & RXE_READ_MASK) &&
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
((pkt->mask & RXE_ATOMIC_MASK) &&
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
return false;
if (pkt->mask & RXE_FLUSH_MASK) {
u32 flush_type = feth_plt(pkt);
if ((flush_type & IB_FLUSH_GLOBAL &&
!(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) ||
(flush_type & IB_FLUSH_PERSISTENT &&
!(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT)))
return false;
}
return true;
}
static enum resp_states check_op_valid(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
switch (qp_type(qp)) {
case IB_QPT_RC:
if (((pkt->mask & RXE_READ_MASK) &&
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
((pkt->mask & RXE_WRITE_MASK) &&
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
((pkt->mask & RXE_ATOMIC_MASK) &&
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) {
if (!check_qp_attr_access(qp, pkt))
return RESPST_ERR_UNSUPPORTED_OPCODE;
}
break;
@ -314,7 +339,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
/* don't trust user space data */
if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
pr_warn("%s: invalid num_sge in SRQ entry\n", __func__);
rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n");
return RESPST_ERR_MALFORMED_WQE;
}
size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
@ -364,7 +389,7 @@ static enum resp_states check_resource(struct rxe_qp *qp,
}
}
if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) {
if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
/* it is the requesters job to not send
* too many read/atomic ops, we just
* recycle the responder resource queue
@ -387,19 +412,66 @@ static enum resp_states check_resource(struct rxe_qp *qp,
return RESPST_CHK_LENGTH;
}
static enum resp_states check_length(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
switch (qp_type(qp)) {
case IB_QPT_RC:
return RESPST_CHK_RKEY;
/*
* See IBA C9-92
* For UD QPs we only check if the packet will fit in the
* receive buffer later. For rmda operations additional
* length checks are performed in check_rkey.
*/
if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
(qp_type(qp) == IB_QPT_UC))) {
unsigned int mtu = qp->mtu;
unsigned int payload = payload_size(pkt);
case IB_QPT_UC:
return RESPST_CHK_RKEY;
default:
return RESPST_CHK_RKEY;
if ((pkt->mask & RXE_START_MASK) &&
(pkt->mask & RXE_END_MASK)) {
if (unlikely(payload > mtu)) {
rxe_dbg_qp(qp, "only packet too long");
return RESPST_ERR_LENGTH;
}
} else if ((pkt->mask & RXE_START_MASK) ||
(pkt->mask & RXE_MIDDLE_MASK)) {
if (unlikely(payload != mtu)) {
rxe_dbg_qp(qp, "first or middle packet not mtu");
return RESPST_ERR_LENGTH;
}
} else if (pkt->mask & RXE_END_MASK) {
if (unlikely((payload == 0) || (payload > mtu))) {
rxe_dbg_qp(qp, "last packet zero or too long");
return RESPST_ERR_LENGTH;
}
}
}
/* See IBA C9-94 */
if (pkt->mask & RXE_RETH_MASK) {
if (reth_len(pkt) > (1U << 31)) {
rxe_dbg_qp(qp, "dma length too long");
return RESPST_ERR_LENGTH;
}
}
return RESPST_CHK_RKEY;
}
static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
{
qp->resp.va = reth_va(pkt);
qp->resp.offset = 0;
qp->resp.rkey = reth_rkey(pkt);
qp->resp.resid = reth_len(pkt);
qp->resp.length = reth_len(pkt);
}
static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
{
qp->resp.va = atmeth_va(pkt);
qp->resp.offset = 0;
qp->resp.rkey = atmeth_rkey(pkt);
qp->resp.resid = sizeof(u64);
}
static enum resp_states check_rkey(struct rxe_qp *qp,
@ -413,29 +485,32 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
u32 pktlen;
int mtu = qp->mtu;
enum resp_states state;
int access;
int access = 0;
if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
if (pkt->mask & RXE_RETH_MASK)
qp_resp_from_reth(qp, pkt);
if (pkt->mask & RXE_READ_OR_WRITE_MASK) {
if (pkt->mask & RXE_RETH_MASK) {
qp->resp.va = reth_va(pkt);
qp->resp.offset = 0;
qp->resp.rkey = reth_rkey(pkt);
qp->resp.resid = reth_len(pkt);
qp->resp.length = reth_len(pkt);
}
access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
: IB_ACCESS_REMOTE_WRITE;
} else if (pkt->mask & RXE_FLUSH_MASK) {
u32 flush_type = feth_plt(pkt);
if (pkt->mask & RXE_RETH_MASK)
qp_resp_from_reth(qp, pkt);
if (flush_type & IB_FLUSH_GLOBAL)
access |= IB_ACCESS_FLUSH_GLOBAL;
if (flush_type & IB_FLUSH_PERSISTENT)
access |= IB_ACCESS_FLUSH_PERSISTENT;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
qp->resp.va = atmeth_va(pkt);
qp->resp.offset = 0;
qp->resp.rkey = atmeth_rkey(pkt);
qp->resp.resid = sizeof(u64);
qp_resp_from_atmeth(qp, pkt);
access = IB_ACCESS_REMOTE_ATOMIC;
} else {
return RESPST_EXECUTE;
}
/* A zero-byte op is not required to set an addr or rkey. */
/* A zero-byte op is not required to set an addr or rkey. See C9-88 */
if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
(pkt->mask & RXE_RETH_MASK) &&
reth_len(pkt) == 0) {
@ -450,15 +525,14 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (rkey_is_mw(rkey)) {
mw = rxe_lookup_mw(qp, access, rkey);
if (!mw) {
pr_debug("%s: no MW matches rkey %#x\n",
__func__, rkey);
rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
mr = mw->mr;
if (!mr) {
pr_err("%s: MW doesn't have an MR\n", __func__);
rxe_dbg_qp(qp, "MW doesn't have an MR\n");
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
@ -471,19 +545,27 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
} else {
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
pr_debug("%s: no MR matches rkey %#x\n",
__func__, rkey);
rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
}
if (pkt->mask & RXE_FLUSH_MASK) {
/* FLUSH MR may not set va or resid
* no need to check range since we will flush whole mr
*/
if (feth_sel(pkt) == IB_FLUSH_MR)
goto skip_check_range;
}
if (mr_check_range(mr, va + qp->resp.offset, resid)) {
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
if (pkt->mask & RXE_WRITE_MASK) {
skip_check_range:
if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
if (resid > mtu) {
if (pktlen != mtu || bth_pad(pkt)) {
state = RESPST_ERR_LENGTH;
@ -583,15 +665,66 @@ static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
res->state = rdatm_res_state_new;
break;
case RXE_ATOMIC_MASK:
case RXE_ATOMIC_WRITE_MASK:
res->first_psn = pkt->psn;
res->last_psn = pkt->psn;
res->cur_psn = pkt->psn;
break;
case RXE_FLUSH_MASK:
res->flush.va = qp->resp.va + qp->resp.offset;
res->flush.length = qp->resp.length;
res->flush.type = feth_plt(pkt);
res->flush.level = feth_sel(pkt);
}
return res;
}
static enum resp_states process_flush(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
u64 length, start;
struct rxe_mr *mr = qp->resp.mr;
struct resp_res *res = qp->resp.res;
/* oA19-14, oA19-15 */
if (res && res->replay)
return RESPST_ACKNOWLEDGE;
else if (!res) {
res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK);
qp->resp.res = res;
}
if (res->flush.level == IB_FLUSH_RANGE) {
start = res->flush.va;
length = res->flush.length;
} else { /* level == IB_FLUSH_MR */
start = mr->ibmr.iova;
length = mr->ibmr.length;
}
if (res->flush.type & IB_FLUSH_PERSISTENT) {
if (rxe_flush_pmem_iova(mr, start, length))
return RESPST_ERR_RKEY_VIOLATION;
/* Make data persistent. */
wmb();
} else if (res->flush.type & IB_FLUSH_GLOBAL) {
/* Make data global visibility. */
wmb();
}
qp->resp.msn++;
/* next expected psn, read handles this separately */
qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
qp->resp.ack_psn = qp->resp.psn;
qp->resp.opcode = pkt->opcode;
qp->resp.status = IB_WC_SUCCESS;
return RESPST_ACKNOWLEDGE;
}
/* Guarantee atomicity of atomic operations at the machine level. */
static DEFINE_SPINLOCK(atomic_ops_lock);
@ -652,6 +785,55 @@ out:
return ret;
}
static enum resp_states atomic_write_reply(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
u64 src, *dst;
struct resp_res *res = qp->resp.res;
struct rxe_mr *mr = qp->resp.mr;
int payload = payload_size(pkt);
if (!res) {
res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
qp->resp.res = res;
}
if (!res->replay) {
#ifdef CONFIG_64BIT
if (mr->state != RXE_MR_STATE_VALID)
return RESPST_ERR_RKEY_VIOLATION;
memcpy(&src, payload_addr(pkt), payload);
dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
/* check vaddr is 8 bytes aligned. */
if (!dst || (uintptr_t)dst & 7)
return RESPST_ERR_MISALIGNED_ATOMIC;
/* Do atomic write after all prior operations have completed */
smp_store_release(dst, src);
/* decrease resp.resid to zero */
qp->resp.resid -= sizeof(payload);
qp->resp.msn++;
/* next expected psn, read handles this separately */
qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
qp->resp.ack_psn = qp->resp.psn;
qp->resp.opcode = pkt->opcode;
qp->resp.status = IB_WC_SUCCESS;
return RESPST_ACKNOWLEDGE;
#else
return RESPST_ERR_UNSUPPORTED_OPCODE;
#endif /* CONFIG_64BIT */
}
return RESPST_ACKNOWLEDGE;
}
static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
struct rxe_pkt_info *ack,
int opcode,
@ -807,14 +989,19 @@ static enum resp_states read_reply(struct rxe_qp *qp,
skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
res->cur_psn, AETH_ACK_UNLIMITED);
if (!skb) {
rxe_put(mr);
if (mr)
rxe_put(mr);
return RESPST_ERR_RNR;
}
rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
payload, RXE_FROM_MR_OBJ);
err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
payload, RXE_FROM_MR_OBJ);
if (mr)
rxe_put(mr);
if (err) {
kfree_skb(skb);
return RESPST_ERR_RKEY_VIOLATION;
}
if (bth_pad(&ack_pkt)) {
u8 *pad = payload_addr(&ack_pkt) + payload;
@ -890,6 +1077,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
return RESPST_READ_REPLY;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
return RESPST_ATOMIC_REPLY;
} else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
return RESPST_ATOMIC_WRITE_REPLY;
} else if (pkt->mask & RXE_FLUSH_MASK) {
return RESPST_PROCESS_FLUSH;
} else {
/* Unreachable */
WARN_ON_ONCE(1);
@ -1040,7 +1231,7 @@ static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
err = rxe_xmit_packet(qp, &ack_pkt, skb);
if (err)
pr_err_ratelimited("Failed sending %s\n", msg);
rxe_dbg_qp(qp, "Failed sending %s\n", msg);
return err;
}
@ -1063,6 +1254,19 @@ static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
return ret;
}
static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
{
int ret = send_common_ack(qp, syndrome, psn,
IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY,
"RDMA READ response of length zero ACK");
/* have to clear this since it is used to trigger
* long read replies
*/
qp->resp.res = NULL;
return ret;
}
static enum resp_states acknowledge(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
@ -1073,6 +1277,8 @@ static enum resp_states acknowledge(struct rxe_qp *qp,
send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
else if (pkt->mask & RXE_ATOMIC_MASK)
send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK))
send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
else if (bth_ack(pkt))
send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
@ -1129,6 +1335,22 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
/* SEND. Ack again and cleanup. C9-105. */
send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
return RESPST_CLEANUP;
} else if (pkt->mask & RXE_FLUSH_MASK) {
struct resp_res *res;
/* Find the operation in our list of responder resources. */
res = find_resource(qp, pkt->psn);
if (res) {
res->replay = 1;
res->cur_psn = pkt->psn;
qp->resp.res = res;
rc = RESPST_PROCESS_FLUSH;
goto out;
}
/* Resource not found. Class D error. Drop the request. */
rc = RESPST_CLEANUP;
goto out;
} else if (pkt->mask & RXE_READ_MASK) {
struct resp_res *res;
@ -1184,7 +1406,9 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
res->replay = 1;
res->cur_psn = pkt->psn;
qp->resp.res = res;
rc = RESPST_ATOMIC_REPLY;
rc = pkt->mask & RXE_ATOMIC_MASK ?
RESPST_ATOMIC_REPLY :
RESPST_ATOMIC_WRITE_REPLY;
goto out;
}
@ -1286,8 +1510,7 @@ int rxe_responder(void *arg)
}
while (1) {
pr_debug("qp#%d state = %s\n", qp_num(qp),
resp_state_name[state]);
rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]);
switch (state) {
case RESPST_GET_REQ:
state = get_req(qp, &pkt);
@ -1305,7 +1528,7 @@ int rxe_responder(void *arg)
state = check_resource(qp, pkt);
break;
case RESPST_CHK_LENGTH:
state = check_length(qp, pkt);
state = rxe_resp_check_length(qp, pkt);
break;
case RESPST_CHK_RKEY:
state = check_rkey(qp, pkt);
@ -1322,6 +1545,12 @@ int rxe_responder(void *arg)
case RESPST_ATOMIC_REPLY:
state = atomic_reply(qp, pkt);
break;
case RESPST_ATOMIC_WRITE_REPLY:
state = atomic_write_reply(qp, pkt);
break;
case RESPST_PROCESS_FLUSH:
state = process_flush(qp, pkt);
break;
case RESPST_ACKNOWLEDGE:
state = acknowledge(qp, pkt);
break;
@ -1444,7 +1673,7 @@ int rxe_responder(void *arg)
case RESPST_ERROR:
qp->resp.goto_error = 0;
pr_debug("qp#%d moved to error state\n", qp_num(qp));
rxe_dbg_qp(qp, "moved to error state\n");
rxe_qp_error(qp);
goto exit;

View File

@ -13,13 +13,13 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
struct ib_srq_attr *attr = &init->attr;
if (attr->max_wr > rxe->attr.max_srq_wr) {
pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
rxe_dbg(rxe, "max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr <= 0) {
pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
rxe_dbg(rxe, "max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
@ -27,7 +27,7 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
attr->max_wr = RXE_MIN_SRQ_WR;
if (attr->max_sge > rxe->attr.max_srq_sge) {
pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
rxe_dbg(rxe, "max_sge(%d) > max_srq_sge(%d)\n",
attr->max_sge, rxe->attr.max_srq_sge);
goto err1;
}
@ -65,7 +65,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
type = QUEUE_TYPE_FROM_CLIENT;
q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type);
if (!q) {
pr_warn("unable to allocate queue for srq\n");
rxe_dbg_srq(srq, "Unable to allocate queue\n");
return -ENOMEM;
}
@ -94,24 +94,24 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
{
if (srq->error) {
pr_warn("srq in error state\n");
rxe_dbg_srq(srq, "in error state\n");
goto err1;
}
if (mask & IB_SRQ_MAX_WR) {
if (attr->max_wr > rxe->attr.max_srq_wr) {
pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
rxe_dbg_srq(srq, "max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr <= 0) {
pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
rxe_dbg_srq(srq, "max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
if (srq->limit && (attr->max_wr < srq->limit)) {
pr_warn("max_wr (%d) < srq->limit (%d)\n",
rxe_dbg_srq(srq, "max_wr (%d) < srq->limit (%d)\n",
attr->max_wr, srq->limit);
goto err1;
}
@ -122,13 +122,13 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
if (mask & IB_SRQ_LIMIT) {
if (attr->srq_limit > rxe->attr.max_srq_wr) {
pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
rxe_dbg_srq(srq, "srq_limit(%d) > max_srq_wr(%d)\n",
attr->srq_limit, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->srq_limit > srq->rq.queue->buf->index_mask) {
pr_warn("srq_limit (%d) > cur limit(%d)\n",
rxe_dbg_srq(srq, "srq_limit (%d) > cur limit(%d)\n",
attr->srq_limit,
srq->rq.queue->buf->index_mask);
goto err1;

View File

@ -4,10 +4,6 @@
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/hardirq.h>
#include "rxe.h"
int __rxe_do_task(struct rxe_task *task)
@ -28,30 +24,31 @@ int __rxe_do_task(struct rxe_task *task)
* a second caller finds the task already running
* but looks just after the last call to func
*/
void rxe_do_task(struct tasklet_struct *t)
static void do_task(struct tasklet_struct *t)
{
int cont;
int ret;
struct rxe_task *task = from_tasklet(task, t, tasklet);
struct rxe_qp *qp = (struct rxe_qp *)task->arg;
unsigned int iterations = RXE_MAX_ITERATIONS;
spin_lock_bh(&task->state_lock);
spin_lock_bh(&task->lock);
switch (task->state) {
case TASK_STATE_START:
task->state = TASK_STATE_BUSY;
spin_unlock_bh(&task->state_lock);
spin_unlock_bh(&task->lock);
break;
case TASK_STATE_BUSY:
task->state = TASK_STATE_ARMED;
fallthrough;
case TASK_STATE_ARMED:
spin_unlock_bh(&task->state_lock);
spin_unlock_bh(&task->lock);
return;
default:
spin_unlock_bh(&task->state_lock);
pr_warn("%s failed with bad state %d\n", __func__, task->state);
spin_unlock_bh(&task->lock);
rxe_dbg_qp(qp, "failed with bad state %d\n", task->state);
return;
}
@ -59,7 +56,7 @@ void rxe_do_task(struct tasklet_struct *t)
cont = 0;
ret = task->func(task->arg);
spin_lock_bh(&task->state_lock);
spin_lock_bh(&task->lock);
switch (task->state) {
case TASK_STATE_BUSY:
if (ret) {
@ -85,27 +82,25 @@ void rxe_do_task(struct tasklet_struct *t)
break;
default:
pr_warn("%s failed with bad state %d\n", __func__,
task->state);
rxe_dbg_qp(qp, "failed with bad state %d\n",
task->state);
}
spin_unlock_bh(&task->state_lock);
spin_unlock_bh(&task->lock);
} while (cont);
task->ret = ret;
}
int rxe_init_task(struct rxe_task *task,
void *arg, int (*func)(void *), char *name)
int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *))
{
task->arg = arg;
task->func = func;
snprintf(task->name, sizeof(task->name), "%s", name);
task->destroyed = false;
tasklet_setup(&task->tasklet, rxe_do_task);
tasklet_setup(&task->tasklet, do_task);
task->state = TASK_STATE_START;
spin_lock_init(&task->state_lock);
spin_lock_init(&task->lock);
return 0;
}
@ -121,23 +116,28 @@ void rxe_cleanup_task(struct rxe_task *task)
task->destroyed = true;
do {
spin_lock_bh(&task->state_lock);
spin_lock_bh(&task->lock);
idle = (task->state == TASK_STATE_START);
spin_unlock_bh(&task->state_lock);
spin_unlock_bh(&task->lock);
} while (!idle);
tasklet_kill(&task->tasklet);
}
void rxe_run_task(struct rxe_task *task, int sched)
void rxe_run_task(struct rxe_task *task)
{
if (task->destroyed)
return;
if (sched)
tasklet_schedule(&task->tasklet);
else
rxe_do_task(&task->tasklet);
do_task(&task->tasklet);
}
void rxe_sched_task(struct rxe_task *task)
{
if (task->destroyed)
return;
tasklet_schedule(&task->tasklet);
}
void rxe_disable_task(struct rxe_task *task)

View File

@ -21,11 +21,10 @@ enum {
struct rxe_task {
struct tasklet_struct tasklet;
int state;
spinlock_t state_lock; /* spinlock for task state */
spinlock_t lock;
void *arg;
int (*func)(void *arg);
int ret;
char name[16];
bool destroyed;
};
@ -34,8 +33,7 @@ struct rxe_task {
* arg => parameter to pass to fcn
* func => function to call until it returns != 0
*/
int rxe_init_task(struct rxe_task *task,
void *arg, int (*func)(void *), char *name);
int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *));
/* cleanup task */
void rxe_cleanup_task(struct rxe_task *task);
@ -46,18 +44,9 @@ void rxe_cleanup_task(struct rxe_task *task);
*/
int __rxe_do_task(struct rxe_task *task);
/*
* common function called by any of the main tasklets
* If there is any chance that there is additional
* work to do someone must reschedule the task before
* leaving
*/
void rxe_do_task(struct tasklet_struct *t);
void rxe_run_task(struct rxe_task *task);
/* run a task, else schedule it to run as a tasklet, The decision
* to run or schedule tasklet is based on the parameter sched.
*/
void rxe_run_task(struct rxe_task *task, int sched);
void rxe_sched_task(struct rxe_task *task);
/* keep a task from scheduling */
void rxe_disable_task(struct rxe_task *task);

View File

@ -172,10 +172,6 @@ static int rxe_create_ah(struct ib_ah *ibah,
ah->is_user = false;
}
err = rxe_av_chk_attr(rxe, init_attr->ah_attr);
if (err)
return err;
err = rxe_add_to_pool_ah(&rxe->ah_pool, ah,
init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
if (err)
@ -184,6 +180,12 @@ static int rxe_create_ah(struct ib_ah *ibah,
/* create index > 0 */
ah->ah_num = ah->elem.index;
err = rxe_ah_chk_attr(ah, init_attr->ah_attr);
if (err) {
rxe_cleanup(ah);
return err;
}
if (uresp) {
/* only if new user provider */
err = copy_to_user(&uresp->ah_num, &ah->ah_num,
@ -206,10 +208,9 @@ static int rxe_create_ah(struct ib_ah *ibah,
static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
int err;
struct rxe_dev *rxe = to_rdev(ibah->device);
struct rxe_ah *ah = to_rah(ibah);
err = rxe_av_chk_attr(rxe, attr);
err = rxe_ah_chk_attr(ah, attr);
if (err)
return err;
@ -238,7 +239,6 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
{
int err;
int i;
u32 length;
struct rxe_recv_wqe *recv_wqe;
@ -246,15 +246,11 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
int full;
full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER);
if (unlikely(full)) {
err = -ENOMEM;
goto err1;
}
if (unlikely(full))
return -ENOMEM;
if (unlikely(num_sge > rq->max_sge)) {
err = -EINVAL;
goto err1;
}
if (unlikely(num_sge > rq->max_sge))
return -EINVAL;
length = 0;
for (i = 0; i < num_sge; i++)
@ -275,9 +271,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER);
return 0;
err1:
return err;
}
static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
@ -343,10 +336,7 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
if (err)
return err;
err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
if (err)
return err;
return 0;
return rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
}
static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
@ -453,11 +443,11 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
err = rxe_qp_chk_attr(rxe, qp, attr, mask);
if (err)
goto err1;
return err;
err = rxe_qp_from_attr(qp, attr, mask, udata);
if (err)
goto err1;
return err;
if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH))
qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
@ -465,9 +455,6 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->attr.dest_qp_num);
return 0;
err1:
return err;
}
static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@ -501,24 +488,21 @@ static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
struct rxe_sq *sq = &qp->sq;
if (unlikely(num_sge > sq->max_sge))
goto err1;
return -EINVAL;
if (unlikely(mask & WR_ATOMIC_MASK)) {
if (length < 8)
goto err1;
return -EINVAL;
if (atomic_wr(ibwr)->remote_addr & 0x7)
goto err1;
return -EINVAL;
}
if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
(length > sq->max_inline)))
goto err1;
return -EINVAL;
return 0;
err1:
return -EINVAL;
}
static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
@ -695,9 +679,9 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
wr = next;
}
rxe_run_task(&qp->req.task, 1);
rxe_sched_task(&qp->req.task);
if (unlikely(qp->req.state == QP_STATE_ERROR))
rxe_run_task(&qp->comp.task, 1);
rxe_sched_task(&qp->comp.task);
return err;
}
@ -719,7 +703,7 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
if (qp->is_user) {
/* Utilize process context to do protocol processing */
rxe_run_task(&qp->req.task, 0);
rxe_run_task(&qp->req.task);
return 0;
} else
return rxe_post_send_kernel(qp, wr, bad_wr);
@ -735,14 +719,12 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
*bad_wr = wr;
err = -EINVAL;
goto err1;
return -EINVAL;
}
if (unlikely(qp->srq)) {
*bad_wr = wr;
err = -EINVAL;
goto err1;
return -EINVAL;
}
spin_lock_irqsave(&rq->producer_lock, flags);
@ -759,9 +741,8 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
spin_unlock_irqrestore(&rq->producer_lock, flags);
if (qp->resp.state == QP_STATE_ERROR)
rxe_run_task(&qp->resp.task, 1);
rxe_sched_task(&qp->resp.task);
err1:
return err;
}
@ -826,16 +807,9 @@ static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
if (err)
goto err1;
return err;
err = rxe_cq_resize_queue(cq, cqe, uresp, udata);
if (err)
goto err1;
return 0;
err1:
return err;
return rxe_cq_resize_queue(cq, cqe, uresp, udata);
}
static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
@ -902,6 +876,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
rxe_get(pd);
mr->ibmr.pd = ibpd;
mr->ibmr.device = ibpd->device;
rxe_mr_init_dma(access, mr);
rxe_finalize(mr);
@ -921,26 +896,23 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
struct rxe_mr *mr;
mr = rxe_alloc(&rxe->mr_pool);
if (!mr) {
err = -ENOMEM;
goto err2;
}
if (!mr)
return ERR_PTR(-ENOMEM);
rxe_get(pd);
mr->ibmr.pd = ibpd;
mr->ibmr.device = ibpd->device;
err = rxe_mr_init_user(rxe, start, length, iova, access, mr);
if (err)
goto err3;
goto err1;
rxe_finalize(mr);
return &mr->ibmr;
err3:
err1:
rxe_cleanup(mr);
err2:
return ERR_PTR(err);
}
@ -956,25 +928,23 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
return ERR_PTR(-EINVAL);
mr = rxe_alloc(&rxe->mr_pool);
if (!mr) {
err = -ENOMEM;
goto err1;
}
if (!mr)
return ERR_PTR(-ENOMEM);
rxe_get(pd);
mr->ibmr.pd = ibpd;
mr->ibmr.device = ibpd->device;
err = rxe_mr_init_fast(max_num_sg, mr);
if (err)
goto err2;
goto err1;
rxe_finalize(mr);
return &mr->ibmr;
err2:
rxe_cleanup(mr);
err1:
rxe_cleanup(mr);
return ERR_PTR(err);
}
@ -1134,7 +1104,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
err = ib_register_device(dev, ibdev_name, NULL);
if (err)
pr_warn("%s failed with error %d\n", __func__, err);
rxe_dbg(rxe, "failed with error %d\n", err);
/*
* Note that rxe may be invalid at this point if another thread

View File

@ -165,6 +165,12 @@ struct resp_res {
u64 va;
u32 resid;
} read;
struct {
u32 length;
u64 va;
u8 type;
u8 level;
} flush;
};
};
@ -304,7 +310,6 @@ struct rxe_mr {
u32 lkey;
u32 rkey;
enum rxe_mr_state state;
enum ib_mr_type type;
u32 offset;
int access;

View File

@ -56,8 +56,6 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) {
memset(wc, 0, sizeof(*wc));
wc->wr_id = cqe->id;
wc->status = map_cqe_status[cqe->status].ib;
wc->opcode = map_wc_opcode[cqe->opcode];
wc->byte_len = cqe->bytes;
/*
@ -71,10 +69,32 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
wc->wc_flags = IB_WC_WITH_INVALIDATE;
}
wc->qp = cqe->base_qp;
wc->opcode = map_wc_opcode[cqe->opcode];
wc->status = map_cqe_status[cqe->status].ib;
siw_dbg_cq(cq,
"idx %u, type %d, flags %2x, id 0x%pK\n",
cq->cq_get % cq->num_cqe, cqe->opcode,
cqe->flags, (void *)(uintptr_t)cqe->id);
} else {
/*
* A malicious user may set invalid opcode or
* status in the user mmapped CQE array.
* Sanity check and correct values in that case
* to avoid out-of-bounds access to global arrays
* for opcode and status mapping.
*/
u8 opcode = cqe->opcode;
u16 status = cqe->status;
if (opcode >= SIW_NUM_OPCODES) {
opcode = 0;
status = SIW_WC_GENERAL_ERR;
} else if (status >= SIW_NUM_WC_STATUS) {
status = SIW_WC_GENERAL_ERR;
}
wc->opcode = map_wc_opcode[opcode];
wc->status = map_cqe_status[status].ib;
}
WRITE_ONCE(cqe->flags, 0);
cq->cq_get++;

View File

@ -676,13 +676,45 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr,
static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
{
struct siw_sqe sqe = {};
int rv = 0;
while (wr) {
sqe.id = wr->wr_id;
sqe.opcode = wr->opcode;
rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR);
struct siw_sqe sqe = {};
switch (wr->opcode) {
case IB_WR_RDMA_WRITE:
sqe.opcode = SIW_OP_WRITE;
break;
case IB_WR_RDMA_READ:
sqe.opcode = SIW_OP_READ;
break;
case IB_WR_RDMA_READ_WITH_INV:
sqe.opcode = SIW_OP_READ_LOCAL_INV;
break;
case IB_WR_SEND:
sqe.opcode = SIW_OP_SEND;
break;
case IB_WR_SEND_WITH_IMM:
sqe.opcode = SIW_OP_SEND_WITH_IMM;
break;
case IB_WR_SEND_WITH_INV:
sqe.opcode = SIW_OP_SEND_REMOTE_INV;
break;
case IB_WR_LOCAL_INV:
sqe.opcode = SIW_OP_INVAL_STAG;
break;
case IB_WR_REG_MR:
sqe.opcode = SIW_OP_REG_MR;
break;
default:
rv = -EINVAL;
break;
}
if (!rv) {
sqe.id = wr->wr_id;
rv = siw_sqe_complete(qp, &sqe, 0,
SIW_WC_WR_FLUSH_ERR);
}
if (rv) {
if (bad_wr)
*bad_wr = wr;

View File

@ -41,6 +41,11 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
[IFLA_IPOIB_UMCAST] = { .type = NLA_U16 },
};
static unsigned int ipoib_get_max_num_queues(void)
{
return min_t(unsigned int, num_possible_cpus(), 128);
}
static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@ -172,6 +177,8 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.changelink = ipoib_changelink,
.get_size = ipoib_get_size,
.fill_info = ipoib_fill_info,
.get_num_rx_queues = ipoib_get_max_num_queues,
.get_num_tx_queues = ipoib_get_max_num_queues,
};
struct rtnl_link_ops *ipoib_get_link_ops(void)

View File

@ -347,22 +347,6 @@ static void iser_device_try_release(struct iser_device *device)
mutex_unlock(&ig.device_list_mutex);
}
/*
* Called with state mutex held
*/
static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
enum iser_conn_state comp,
enum iser_conn_state exch)
{
int ret;
ret = (iser_conn->state == comp);
if (ret)
iser_conn->state = exch;
return ret;
}
void iser_release_work(struct work_struct *work)
{
struct iser_conn *iser_conn;
@ -464,11 +448,13 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
struct ib_conn *ib_conn = &iser_conn->ib_conn;
int err = 0;
lockdep_assert_held(&iser_conn->state_mutex);
/* terminate the iser conn only if the conn state is UP */
if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
ISER_CONN_TERMINATING))
if (iser_conn->state != ISER_CONN_UP)
return 0;
iser_conn->state = ISER_CONN_TERMINATING;
iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state);
/* suspend queuing of new iscsi commands */
@ -498,9 +484,10 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
*/
static void iser_connect_error(struct rdma_cm_id *cma_id)
{
struct iser_conn *iser_conn;
struct iser_conn *iser_conn = cma_id->context;
lockdep_assert_held(&iser_conn->state_mutex);
iser_conn = cma_id->context;
iser_conn->state = ISER_CONN_TERMINATING;
}
@ -542,12 +529,13 @@ static void iser_calc_scsi_params(struct iser_conn *iser_conn,
*/
static void iser_addr_handler(struct rdma_cm_id *cma_id)
{
struct iser_conn *iser_conn = cma_id->context;
struct iser_device *device;
struct iser_conn *iser_conn;
struct ib_conn *ib_conn;
int ret;
iser_conn = cma_id->context;
lockdep_assert_held(&iser_conn->state_mutex);
if (iser_conn->state != ISER_CONN_PENDING)
/* bailout */
return;
@ -597,6 +585,8 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct ib_device *ib_dev = ib_conn->device->ib_device;
lockdep_assert_held(&iser_conn->state_mutex);
if (iser_conn->state != ISER_CONN_PENDING)
/* bailout */
return;
@ -629,14 +619,18 @@ failure:
iser_connect_error(cma_id);
}
/*
* Called with state mutex held
*/
static void iser_connected_handler(struct rdma_cm_id *cma_id,
const void *private_data)
{
struct iser_conn *iser_conn;
struct iser_conn *iser_conn = cma_id->context;
struct ib_qp_attr attr;
struct ib_qp_init_attr init_attr;
iser_conn = cma_id->context;
lockdep_assert_held(&iser_conn->state_mutex);
if (iser_conn->state != ISER_CONN_PENDING)
/* bailout */
return;
@ -657,10 +651,20 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id,
complete(&iser_conn->up_completion);
}
static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
/*
* Called with state mutex held
*/
static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
bool destroy)
{
struct iser_conn *iser_conn = cma_id->context;
lockdep_assert_held(&iser_conn->state_mutex);
/*
* We are not guaranteed that we visited disconnected_handler
* by now, call it here to be safe that we handle CM drep
* and flush errors.
*/
if (iser_conn_terminate(iser_conn)) {
if (iser_conn->iscsi_conn)
iscsi_conn_failure(iser_conn->iscsi_conn,
@ -668,19 +672,6 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
else
iser_err("iscsi_iser connection isn't bound\n");
}
}
static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
bool destroy)
{
struct iser_conn *iser_conn = cma_id->context;
/*
* We are not guaranteed that we visited disconnected_handler
* by now, call it here to be safe that we handle CM drep
* and flush errors.
*/
iser_disconnected_handler(cma_id);
iser_free_ib_conn_res(iser_conn, destroy);
complete(&iser_conn->ib_completion);
}

View File

@ -993,9 +993,8 @@ isert_rx_login_req(struct isert_conn *isert_conn)
* login request PDU.
*/
login->leading_connection = (!login_req->tsih) ? 1 : 0;
login->current_stage =
(login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK)
>> 2;
login->current_stage = ISCSI_LOGIN_CURRENT_STAGE(
login_req->flags);
login->version_min = login_req->min_version;
login->version_max = login_req->max_version;
memcpy(login->isid, login_req->isid, 6);

View File

@ -1064,10 +1064,8 @@ static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count)
/* Align the MR to a 4K page size to match the block virt boundary */
nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K);
if (nr < 0)
return nr;
if (nr < req->sg_cnt)
return -EINVAL;
if (nr != count)
return nr < 0 ? nr : -EINVAL;
ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
return nr;

View File

@ -68,10 +68,7 @@ enum {
struct rtrs_ib_dev;
struct rtrs_rdma_dev_pd_ops {
struct rtrs_ib_dev *(*alloc)(void);
void (*free)(struct rtrs_ib_dev *dev);
int (*init)(struct rtrs_ib_dev *dev);
void (*deinit)(struct rtrs_ib_dev *dev);
};
struct rtrs_rdma_dev_pd {

View File

@ -203,7 +203,6 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_path *srv_path)
mutex_lock(&srv->paths_mutex);
if (!--srv->dev_ref) {
kobject_del(srv->kobj_paths);
kobject_put(srv->kobj_paths);
mutex_unlock(&srv->paths_mutex);
device_del(&srv->dev);
@ -304,12 +303,18 @@ destroy_root:
void rtrs_srv_destroy_path_files(struct rtrs_srv_path *srv_path)
{
if (srv_path->kobj.state_in_sysfs) {
if (srv_path->stats->kobj_stats.state_in_sysfs) {
sysfs_remove_group(&srv_path->stats->kobj_stats,
&rtrs_srv_stats_attr_group);
kobject_del(&srv_path->stats->kobj_stats);
kobject_put(&srv_path->stats->kobj_stats);
sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group);
kobject_put(&srv_path->kobj);
rtrs_srv_destroy_once_sysfs_root_folders(srv_path);
}
if (srv_path->kobj.state_in_sysfs) {
sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group);
kobject_del(&srv_path->kobj);
kobject_put(&srv_path->kobj);
}
rtrs_srv_destroy_once_sysfs_root_folders(srv_path);
}

View File

@ -561,9 +561,11 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
{
struct rtrs_srv_sess *srv = srv_path->srv;
struct rtrs_path *ss = &srv_path->s;
int i, mri, err, mrs_num;
int i, err, mrs_num;
unsigned int chunk_bits;
int chunks_per_mr = 1;
struct ib_mr *mr;
struct sg_table *sgt;
/*
* Here we map queue_depth chunks to MR. Firstly we have to
@ -586,16 +588,14 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
if (!srv_path->mrs)
return -ENOMEM;
srv_path->mrs_num = mrs_num;
for (mri = 0; mri < mrs_num; mri++) {
struct rtrs_srv_mr *srv_mr = &srv_path->mrs[mri];
struct sg_table *sgt = &srv_mr->sgt;
for (srv_path->mrs_num = 0; srv_path->mrs_num < mrs_num;
srv_path->mrs_num++) {
struct rtrs_srv_mr *srv_mr = &srv_path->mrs[srv_path->mrs_num];
struct scatterlist *s;
struct ib_mr *mr;
int nr, nr_sgt, chunks;
chunks = chunks_per_mr * mri;
sgt = &srv_mr->sgt;
chunks = chunks_per_mr * srv_path->mrs_num;
if (!always_invalidate)
chunks_per_mr = min_t(int, chunks_per_mr,
srv->queue_depth - chunks);
@ -622,7 +622,7 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
}
nr = ib_map_mr_sg(mr, sgt->sgl, nr_sgt,
NULL, max_chunk_size);
if (nr < 0 || nr < sgt->nents) {
if (nr != nr_sgt) {
err = nr < 0 ? nr : -EINVAL;
goto dereg_mr;
}
@ -644,31 +644,24 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
srv_mr->mr = mr;
continue;
err:
while (mri--) {
srv_mr = &srv_path->mrs[mri];
sgt = &srv_mr->sgt;
mr = srv_mr->mr;
rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1);
dereg_mr:
ib_dereg_mr(mr);
unmap_sg:
ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl,
sgt->nents, DMA_BIDIRECTIONAL);
free_sg:
sg_free_table(sgt);
}
kfree(srv_path->mrs);
return err;
}
chunk_bits = ilog2(srv->queue_depth - 1) + 1;
srv_path->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits);
return 0;
dereg_mr:
ib_dereg_mr(mr);
unmap_sg:
ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl,
sgt->nents, DMA_BIDIRECTIONAL);
free_sg:
sg_free_table(sgt);
err:
unmap_cont_bufs(srv_path);
return err;
}
static void rtrs_srv_hb_err_handler(struct rtrs_con *c)
@ -1678,12 +1671,6 @@ static int create_con(struct rtrs_srv_path *srv_path,
srv->queue_depth * (1 + 2) + 1);
max_recv_wr = srv->queue_depth + 1;
/*
* If we have all receive requests posted and
* all write requests posted and each read request
* requires an invalidate request + drain
* and qp gets into error state.
*/
}
cq_num = max_send_wr + max_recv_wr;
atomic_set(&con->c.sq_wr_avail, max_send_wr);
@ -1950,22 +1937,21 @@ static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id,
{
struct rtrs_srv_path *srv_path = NULL;
struct rtrs_path *s = NULL;
struct rtrs_con *c = NULL;
if (ev->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
struct rtrs_con *c = cm_id->context;
s = c->path;
srv_path = to_srv_path(s);
}
switch (ev->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST:
if (ev->event == RDMA_CM_EVENT_CONNECT_REQUEST)
/*
* In case of error cma.c will destroy cm_id,
* see cma_process_remove()
*/
return rtrs_rdma_connect(cm_id, ev->param.conn.private_data,
ev->param.conn.private_data_len);
c = cm_id->context;
s = c->path;
srv_path = to_srv_path(s);
switch (ev->event) {
case RDMA_CM_EVENT_ESTABLISHED:
/* Nothing here */
break;

View File

@ -557,7 +557,6 @@ EXPORT_SYMBOL(rtrs_addr_to_sockaddr);
void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
struct rtrs_rdma_dev_pd *pool)
{
WARN_ON(pool->ops && (!pool->ops->alloc ^ !pool->ops->free));
INIT_LIST_HEAD(&pool->list);
mutex_init(&pool->mutex);
pool->pd_flags = pd_flags;
@ -583,15 +582,8 @@ static void dev_free(struct kref *ref)
list_del(&dev->entry);
mutex_unlock(&pool->mutex);
if (pool->ops && pool->ops->deinit)
pool->ops->deinit(dev);
ib_dealloc_pd(dev->ib_pd);
if (pool->ops && pool->ops->free)
pool->ops->free(dev);
else
kfree(dev);
kfree(dev);
}
int rtrs_ib_dev_put(struct rtrs_ib_dev *dev)
@ -618,11 +610,8 @@ rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
goto out_unlock;
}
mutex_unlock(&pool->mutex);
if (pool->ops && pool->ops->alloc)
dev = pool->ops->alloc();
else
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (IS_ERR_OR_NULL(dev))
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
goto out_err;
kref_init(&dev->ref);
@ -644,10 +633,7 @@ out_unlock:
out_free_pd:
ib_dealloc_pd(dev->ib_pd);
out_free_dev:
if (pool->ops && pool->ops->free)
pool->ops->free(dev);
else
kfree(dev);
kfree(dev);
out_err:
return NULL;
}

View File

@ -3410,7 +3410,8 @@ static int srp_parse_options(struct net *net, const char *buf,
break;
case SRP_OPT_PKEY:
if (match_hex(args, &token)) {
ret = match_hex(args, &token);
if (ret) {
pr_warn("bad P_Key parameter '%s'\n", p);
goto out;
}
@ -3470,7 +3471,8 @@ static int srp_parse_options(struct net *net, const char *buf,
break;
case SRP_OPT_MAX_SECT:
if (match_int(args, &token)) {
ret = match_int(args, &token);
if (ret) {
pr_warn("bad max sect parameter '%s'\n", p);
goto out;
}
@ -3478,8 +3480,15 @@ static int srp_parse_options(struct net *net, const char *buf,
break;
case SRP_OPT_QUEUE_SIZE:
if (match_int(args, &token) || token < 1) {
ret = match_int(args, &token);
if (ret) {
pr_warn("match_int() failed for queue_size parameter '%s', Error %d\n",
p, ret);
goto out;
}
if (token < 1) {
pr_warn("bad queue_size parameter '%s'\n", p);
ret = -EINVAL;
goto out;
}
target->scsi_host->can_queue = token;
@ -3490,25 +3499,40 @@ static int srp_parse_options(struct net *net, const char *buf,
break;
case SRP_OPT_MAX_CMD_PER_LUN:
if (match_int(args, &token) || token < 1) {
ret = match_int(args, &token);
if (ret) {
pr_warn("match_int() failed for max cmd_per_lun parameter '%s', Error %d\n",
p, ret);
goto out;
}
if (token < 1) {
pr_warn("bad max cmd_per_lun parameter '%s'\n",
p);
ret = -EINVAL;
goto out;
}
target->scsi_host->cmd_per_lun = token;
break;
case SRP_OPT_TARGET_CAN_QUEUE:
if (match_int(args, &token) || token < 1) {
ret = match_int(args, &token);
if (ret) {
pr_warn("match_int() failed for max target_can_queue parameter '%s', Error %d\n",
p, ret);
goto out;
}
if (token < 1) {
pr_warn("bad max target_can_queue parameter '%s'\n",
p);
ret = -EINVAL;
goto out;
}
target->target_can_queue = token;
break;
case SRP_OPT_IO_CLASS:
if (match_hex(args, &token)) {
ret = match_hex(args, &token);
if (ret) {
pr_warn("bad IO class parameter '%s'\n", p);
goto out;
}
@ -3517,6 +3541,7 @@ static int srp_parse_options(struct net *net, const char *buf,
pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
token, SRP_REV10_IB_IO_CLASS,
SRP_REV16A_IB_IO_CLASS);
ret = -EINVAL;
goto out;
}
target->io_class = token;
@ -3539,16 +3564,24 @@ static int srp_parse_options(struct net *net, const char *buf,
break;
case SRP_OPT_CMD_SG_ENTRIES:
if (match_int(args, &token) || token < 1 || token > 255) {
ret = match_int(args, &token);
if (ret) {
pr_warn("match_int() failed for max cmd_sg_entries parameter '%s', Error %d\n",
p, ret);
goto out;
}
if (token < 1 || token > 255) {
pr_warn("bad max cmd_sg_entries parameter '%s'\n",
p);
ret = -EINVAL;
goto out;
}
target->cmd_sg_cnt = token;
break;
case SRP_OPT_ALLOW_EXT_SG:
if (match_int(args, &token)) {
ret = match_int(args, &token);
if (ret) {
pr_warn("bad allow_ext_sg parameter '%s'\n", p);
goto out;
}
@ -3556,43 +3589,77 @@ static int srp_parse_options(struct net *net, const char *buf,
break;
case SRP_OPT_SG_TABLESIZE:
if (match_int(args, &token) || token < 1 ||
token > SG_MAX_SEGMENTS) {
ret = match_int(args, &token);
if (ret) {
pr_warn("match_int() failed for max sg_tablesize parameter '%s', Error %d\n",
p, ret);
goto out;
}
if (token < 1 || token > SG_MAX_SEGMENTS) {
pr_warn("bad max sg_tablesize parameter '%s'\n",
p);
ret = -EINVAL;
goto out;
}
target->sg_tablesize = token;
break;
case SRP_OPT_COMP_VECTOR:
if (match_int(args, &token) || token < 0) {
ret = match_int(args, &token);
if (ret) {
pr_warn("match_int() failed for comp_vector parameter '%s', Error %d\n",
p, ret);
goto out;
}
if (token < 0) {
pr_warn("bad comp_vector parameter '%s'\n", p);
ret = -EINVAL;
goto out;
}
target->comp_vector = token;
break;
case SRP_OPT_TL_RETRY_COUNT:
if (match_int(args, &token) || token < 2 || token > 7) {
ret = match_int(args, &token);
if (ret) {
pr_warn("match_int() failed for tl_retry_count parameter '%s', Error %d\n",
p, ret);
goto out;
}
if (token < 2 || token > 7) {
pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
p);
ret = -EINVAL;
goto out;
}
target->tl_retry_count = token;
break;
case SRP_OPT_MAX_IT_IU_SIZE:
if (match_int(args, &token) || token < 0) {
ret = match_int(args, &token);
if (ret) {
pr_warn("match_int() failed for max it_iu_size parameter '%s', Error %d\n",
p, ret);
goto out;
}
if (token < 0) {
pr_warn("bad maximum initiator to target IU size '%s'\n", p);
ret = -EINVAL;
goto out;
}
target->max_it_iu_size = token;
break;
case SRP_OPT_CH_COUNT:
if (match_int(args, &token) || token < 1) {
ret = match_int(args, &token);
if (ret) {
pr_warn("match_int() failed for channel count parameter '%s', Error %d\n",
p, ret);
goto out;
}
if (token < 1) {
pr_warn("bad channel count %s\n", p);
ret = -EINVAL;
goto out;
}
target->ch_count = token;
@ -3601,6 +3668,7 @@ static int srp_parse_options(struct net *net, const char *buf,
default:
pr_warn("unknown parameter or missing value '%s' in target creation request\n",
p);
ret = -EINVAL;
goto out;
}
}

View File

@ -671,8 +671,7 @@ free_q:
return err;
}
int mana_gd_destroy_dma_region(struct gdma_context *gc,
gdma_obj_handle_t dma_region_handle)
int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle)
{
struct gdma_destroy_dma_region_req req = {};
struct gdma_general_resp resp = {};

View File

@ -65,8 +65,6 @@ enum {
GDMA_DEVICE_MANA = 2,
};
typedef u64 gdma_obj_handle_t;
struct gdma_resource {
/* Protect the bitmap */
spinlock_t lock;
@ -200,7 +198,7 @@ struct gdma_mem_info {
u64 length;
/* Allocated by the PF driver */
gdma_obj_handle_t dma_region_handle;
u64 dma_region_handle;
};
#define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8
@ -632,7 +630,7 @@ struct gdma_create_queue_req {
u32 reserved1;
u32 pdid;
u32 doolbell_id;
gdma_obj_handle_t gdma_region;
u64 gdma_region;
u32 reserved2;
u32 queue_size;
u32 log2_throttle_limit;
@ -707,14 +705,14 @@ struct gdma_create_dma_region_req {
struct gdma_create_dma_region_resp {
struct gdma_resp_hdr hdr;
gdma_obj_handle_t dma_region_handle;
u64 dma_region_handle;
}; /* HW DATA */
/* GDMA_DMA_REGION_ADD_PAGES */
struct gdma_dma_region_add_pages_req {
struct gdma_req_hdr hdr;
gdma_obj_handle_t dma_region_handle;
u64 dma_region_handle;
u32 page_addr_list_len;
u32 reserved3;
@ -726,7 +724,7 @@ struct gdma_dma_region_add_pages_req {
struct gdma_destroy_dma_region_req {
struct gdma_req_hdr hdr;
gdma_obj_handle_t dma_region_handle;
u64 dma_region_handle;
}; /* HW DATA */
enum gdma_pd_flags {
@ -741,14 +739,14 @@ struct gdma_create_pd_req {
struct gdma_create_pd_resp {
struct gdma_resp_hdr hdr;
gdma_obj_handle_t pd_handle;
u64 pd_handle;
u32 pd_id;
u32 reserved;
};/* HW DATA */
struct gdma_destroy_pd_req {
struct gdma_req_hdr hdr;
gdma_obj_handle_t pd_handle;
u64 pd_handle;
};/* HW DATA */
struct gdma_destory_pd_resp {
@ -764,11 +762,11 @@ enum gdma_mr_type {
};
struct gdma_create_mr_params {
gdma_obj_handle_t pd_handle;
u64 pd_handle;
enum gdma_mr_type mr_type;
union {
struct {
gdma_obj_handle_t dma_region_handle;
u64 dma_region_handle;
u64 virtual_address;
enum gdma_mr_access_flags access_flags;
} gva;
@ -777,13 +775,13 @@ struct gdma_create_mr_params {
struct gdma_create_mr_request {
struct gdma_req_hdr hdr;
gdma_obj_handle_t pd_handle;
u64 pd_handle;
enum gdma_mr_type mr_type;
u32 reserved_1;
union {
struct {
gdma_obj_handle_t dma_region_handle;
u64 dma_region_handle;
u64 virtual_address;
enum gdma_mr_access_flags access_flags;
} gva;
@ -794,14 +792,14 @@ struct gdma_create_mr_request {
struct gdma_create_mr_response {
struct gdma_resp_hdr hdr;
gdma_obj_handle_t mr_handle;
u64 mr_handle;
u32 lkey;
u32 rkey;
};/* HW DATA */
struct gdma_destroy_mr_request {
struct gdma_req_hdr hdr;
gdma_obj_handle_t mr_handle;
u64 mr_handle;
};/* HW DATA */
struct gdma_destroy_mr_response {
@ -835,7 +833,6 @@ void mana_gd_free_memory(struct gdma_mem_info *gmi);
int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req,
u32 resp_len, void *resp);
int mana_gd_destroy_dma_region(struct gdma_context *gc,
gdma_obj_handle_t dma_region_handle);
int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle);
#endif /* _GDMA_H */

View File

@ -412,6 +412,9 @@ int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
extern const struct ethtool_ops mana_ethtool_ops;
/* A CQ can be created not associated with any EQ */
#define GDMA_CQ_NO_EQ 0xffff
struct mana_obj_spec {
u32 queue_index;
u64 gdma_region;

View File

@ -84,6 +84,8 @@ enum {
/* opcode 0x15 is reserved */
IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16,
IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17,
IB_OPCODE_FLUSH = 0x1C,
IB_OPCODE_ATOMIC_WRITE = 0x1D,
/* real constants follow -- see comment about above IB_OPCODE()
macro for more details */
@ -112,6 +114,8 @@ enum {
IB_OPCODE(RC, FETCH_ADD),
IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE),
IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE),
IB_OPCODE(RC, FLUSH),
IB_OPCODE(RC, ATOMIC_WRITE),
/* UC */
IB_OPCODE(UC, SEND_FIRST),
@ -149,6 +153,7 @@ enum {
IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE),
IB_OPCODE(RD, COMPARE_SWAP),
IB_OPCODE(RD, FETCH_ADD),
IB_OPCODE(RD, FLUSH),
/* UD */
IB_OPCODE(UD, SEND_ONLY),

View File

@ -270,6 +270,10 @@ enum ib_device_cap_flags {
/* The device supports padding incoming writes to cacheline. */
IB_DEVICE_PCI_WRITE_END_PADDING =
IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING,
/* Placement type attributes */
IB_DEVICE_FLUSH_GLOBAL = IB_UVERBS_DEVICE_FLUSH_GLOBAL,
IB_DEVICE_FLUSH_PERSISTENT = IB_UVERBS_DEVICE_FLUSH_PERSISTENT,
IB_DEVICE_ATOMIC_WRITE = IB_UVERBS_DEVICE_ATOMIC_WRITE,
};
enum ib_kernel_cap_flags {
@ -982,9 +986,11 @@ enum ib_wc_opcode {
IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW,
IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV,
IB_WC_LSO = IB_UVERBS_WC_TSO,
IB_WC_ATOMIC_WRITE = IB_UVERBS_WC_ATOMIC_WRITE,
IB_WC_REG_MR,
IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD,
IB_WC_FLUSH = IB_UVERBS_WC_FLUSH,
/*
* Set value of IB_WC_RECV so consumers can test if a completion is a
* receive by testing (opcode & IB_WC_RECV).
@ -1325,6 +1331,8 @@ enum ib_wr_opcode {
IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
IB_WR_FLUSH = IB_UVERBS_WR_FLUSH,
IB_WR_ATOMIC_WRITE = IB_UVERBS_WR_ATOMIC_WRITE,
/* These are kernel only and can not be issued by userspace */
IB_WR_REG_MR = 0x20,
@ -1458,10 +1466,12 @@ enum ib_access_flags {
IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING,
IB_ACCESS_FLUSH_GLOBAL = IB_UVERBS_ACCESS_FLUSH_GLOBAL,
IB_ACCESS_FLUSH_PERSISTENT = IB_UVERBS_ACCESS_FLUSH_PERSISTENT,
IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE,
IB_ACCESS_SUPPORTED =
((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL,
((IB_ACCESS_FLUSH_PERSISTENT << 1) - 1) | IB_ACCESS_OPTIONAL,
};
/*
@ -2203,6 +2213,7 @@ struct ib_port_data {
struct ib_port_cache cache;
struct net_device __rcu *netdev;
netdevice_tracker netdev_tracker;
struct hlist_node ndev_hash_link;
struct rdma_port_counter port_counter;
struct ib_port *sysfs;
@ -4321,6 +4332,8 @@ int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata);
static inline int ib_check_mr_access(struct ib_device *ib_dev,
unsigned int flags)
{
u64 device_cap = ib_dev->attrs.device_cap_flags;
/*
* Local write permission is required if remote write or
* remote atomic permission is also requested.
@ -4334,7 +4347,14 @@ static inline int ib_check_mr_access(struct ib_device *ib_dev,
if (flags & IB_ACCESS_ON_DEMAND &&
!(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING))
return -EINVAL;
return -EOPNOTSUPP;
if ((flags & IB_ACCESS_FLUSH_GLOBAL &&
!(device_cap & IB_DEVICE_FLUSH_GLOBAL)) ||
(flags & IB_ACCESS_FLUSH_PERSISTENT &&
!(device_cap & IB_DEVICE_FLUSH_PERSISTENT)))
return -EOPNOTSUPP;
return 0;
}

View File

@ -51,7 +51,7 @@ static inline void *opa_vnic_dev_priv(const struct net_device *dev)
return oparn->dev_priv;
}
/* opa_vnic skb meta data structrue */
/* opa_vnic skb meta data structure */
struct opa_vnic_skb_mdata {
u8 vl;
u8 entropy;

View File

@ -49,7 +49,6 @@ DECLARE_EVENT_CLASS(ib_mad_send_template,
__field(int, retries_left)
__field(int, max_retries)
__field(int, retry)
__field(u16, pkey)
),
TP_fast_assign(
@ -89,7 +88,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template,
"hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \
"method 0x%x status 0x%x class_specific 0x%x tid 0x%llx " \
"attr_id 0x%x attr_mod 0x%x => dlid 0x%08x sl %d "\
"pkey 0x%x rpqn 0x%x rqpkey 0x%x",
"rpqn 0x%x rqpkey 0x%x",
__entry->dev_index, __entry->port_num, __entry->qp_num,
__entry->agent_priv, be64_to_cpu(__entry->wrtid),
__entry->retries_left, __entry->max_retries,
@ -100,7 +99,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template,
be16_to_cpu(__entry->class_specific),
be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
be32_to_cpu(__entry->attr_mod),
be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey,
be32_to_cpu(__entry->dlid), __entry->sl,
__entry->rqpn, __entry->rqkey
)
);
@ -204,7 +203,6 @@ TRACE_EVENT(ib_mad_recv_done_handler,
__field(u16, wc_status)
__field(u32, slid)
__field(u32, dev_index)
__field(u16, pkey)
),
TP_fast_assign(
@ -224,9 +222,6 @@ TRACE_EVENT(ib_mad_recv_done_handler,
__entry->slid = wc->slid;
__entry->src_qp = wc->src_qp;
__entry->sl = wc->sl;
ib_query_pkey(qp_info->port_priv->device,
qp_info->port_priv->port_num,
wc->pkey_index, &__entry->pkey);
__entry->wc_status = wc->status;
),
@ -234,7 +229,7 @@ TRACE_EVENT(ib_mad_recv_done_handler,
"base_ver 0x%02x class 0x%02x class_ver 0x%02x " \
"method 0x%02x status 0x%04x class_specific 0x%04x " \
"tid 0x%016llx attr_id 0x%04x attr_mod 0x%08x " \
"slid 0x%08x src QP%d, sl %d pkey 0x%04x",
"slid 0x%08x src QP%d, sl %d",
__entry->dev_index, __entry->port_num, __entry->qp_num,
__entry->wc_status,
__entry->length,
@ -244,7 +239,7 @@ TRACE_EVENT(ib_mad_recv_done_handler,
be16_to_cpu(__entry->class_specific),
be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
be32_to_cpu(__entry->attr_mod),
__entry->slid, __entry->src_qp, __entry->sl, __entry->pkey
__entry->slid, __entry->src_qp, __entry->sl
)
);

View File

@ -85,11 +85,26 @@ struct hns_roce_ib_create_qp_resp {
__aligned_u64 dwqe_mmap_key;
};
enum {
HNS_ROCE_EXSGE_FLAGS = 1 << 0,
};
enum {
HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0,
};
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 cqe_size;
__u32 srq_tab_size;
__u32 reserved;
__u32 config;
__u32 max_inline_data;
};
struct hns_roce_ib_alloc_ucontext {
__u32 config;
__u32 reserved;
};
struct hns_roce_ib_alloc_pd_resp {

View File

@ -57,6 +57,8 @@ enum ib_uverbs_access_flags {
IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5,
IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6,
IB_UVERBS_ACCESS_HUGETLB = 1 << 7,
IB_UVERBS_ACCESS_FLUSH_GLOBAL = 1 << 8,
IB_UVERBS_ACCESS_FLUSH_PERSISTENT = 1 << 9,
IB_UVERBS_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_OPTIONAL_FIRST,
IB_UVERBS_ACCESS_OPTIONAL_RANGE =
@ -251,6 +253,7 @@ enum rdma_driver_id {
RDMA_DRIVER_EFA,
RDMA_DRIVER_SIW,
RDMA_DRIVER_ERDMA,
RDMA_DRIVER_MANA,
};
enum ib_uverbs_gid_type {

View File

@ -105,6 +105,18 @@ enum {
IB_USER_VERBS_EX_CMD_MODIFY_CQ
};
/* see IBA A19.4.1.1 Placement Types */
enum ib_placement_type {
IB_FLUSH_GLOBAL = 1U << 0,
IB_FLUSH_PERSISTENT = 1U << 1,
};
/* see IBA A19.4.1.2 Selectivity Level */
enum ib_selectivity_level {
IB_FLUSH_RANGE = 0,
IB_FLUSH_MR,
};
/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
@ -466,6 +478,8 @@ enum ib_uverbs_wc_opcode {
IB_UVERBS_WC_BIND_MW = 5,
IB_UVERBS_WC_LOCAL_INV = 6,
IB_UVERBS_WC_TSO = 7,
IB_UVERBS_WC_FLUSH = 8,
IB_UVERBS_WC_ATOMIC_WRITE = 9,
};
struct ib_uverbs_wc {
@ -784,6 +798,8 @@ enum ib_uverbs_wr_opcode {
IB_UVERBS_WR_RDMA_READ_WITH_INV = 11,
IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12,
IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13,
IB_UVERBS_WR_FLUSH = 14,
IB_UVERBS_WR_ATOMIC_WRITE = 15,
/* Review enum ib_wr_opcode before modifying this */
};
@ -1331,6 +1347,11 @@ enum ib_uverbs_device_cap_flags {
/* Deprecated. Please use IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS. */
IB_UVERBS_DEVICE_RAW_SCATTER_FCS = 1ULL << 34,
IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING = 1ULL << 36,
/* Flush placement types */
IB_UVERBS_DEVICE_FLUSH_GLOBAL = 1ULL << 38,
IB_UVERBS_DEVICE_FLUSH_PERSISTENT = 1ULL << 39,
/* Atomic write attributes */
IB_UVERBS_DEVICE_ATOMIC_WRITE = 1ULL << 40,
};
enum ib_uverbs_raw_packet_caps {

View File

@ -0,0 +1,66 @@
/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */
/*
* Copyright (c) 2022, Microsoft Corporation. All rights reserved.
*/
#ifndef MANA_ABI_USER_H
#define MANA_ABI_USER_H
#include <linux/types.h>
#include <rdma/ib_user_ioctl_verbs.h>
/*
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
#define MANA_IB_UVERBS_ABI_VERSION 1
struct mana_ib_create_cq {
__aligned_u64 buf_addr;
};
struct mana_ib_create_qp {
__aligned_u64 sq_buf_addr;
__u32 sq_buf_size;
__u32 port;
};
struct mana_ib_create_qp_resp {
__u32 sqid;
__u32 cqid;
__u32 tx_vp_offset;
__u32 reserved;
};
struct mana_ib_create_wq {
__aligned_u64 wq_buf_addr;
__u32 wq_buf_size;
__u32 reserved;
};
/* RX Hash function flags */
enum mana_ib_rx_hash_function_flags {
MANA_IB_RX_HASH_FUNC_TOEPLITZ = 1 << 0,
};
struct mana_ib_create_qp_rss {
__aligned_u64 rx_hash_fields_mask;
__u8 rx_hash_function;
__u8 reserved[7];
__u32 rx_hash_key_len;
__u8 rx_hash_key[40];
__u32 port;
};
struct rss_resp_entry {
__u32 cqid;
__u32 wqid;
};
struct mana_ib_create_qp_rss_resp {
__aligned_u64 num_entries;
struct rss_resp_entry entries[64];
};
#endif

View File

@ -82,6 +82,13 @@ struct rxe_send_wr {
__u32 invalidate_rkey;
} ex;
union {
struct {
__aligned_u64 remote_addr;
__u32 length;
__u32 rkey;
__u8 type;
__u8 level;
} flush;
struct {
__aligned_u64 remote_addr;
__u32 rkey;
@ -146,6 +153,7 @@ struct rxe_dma_info {
__u32 reserved;
union {
__DECLARE_FLEX_ARRAY(__u8, inline_data);
__DECLARE_FLEX_ARRAY(__u8, atomic_wr);
__DECLARE_FLEX_ARRAY(struct rxe_sge, sge);
};
};