Round one of 4.8 code

- Updates/fixes for iw_cxgb4 driver
 - Updates/fixes for mlx5 driver
 - Add flow steering and RSS API
 - Add hardware stats to mlx4 and mlx5 drivers
 - Add firmware version API for RDMA driver use
 - Add the rxe driver (this is a software RoCE driver that makes any
   Ethernet device a RoCE device)
 - Fixes for i40iw driver
 - Support for send only multicast joins in the cma layer
 - Other minor fixes
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJXo1vCAAoJELgmozMOVy/d0HcQAJqMi7siD9cSaMViYbu812pq
 3kNkHZbLNB/947uShDPhhFAWFXU0nRxEnTNSvYxRo+nxnDE/9hEEXpx8OzzKLNU+
 GXyDeHsEEriSFcaSne5Tak/QuiFm3PJv73ttXQROCtHG7KxLG9ieVbfusz42Xwiu
 5R21qfp6PZEOC+j7L/fTZh/kEN3cfaDYrGnCgmU3z0ka9xG5Qe2/+uWGNkuioRA5
 phFUR4MS+1n/VrnxPHrLXTrqv3sw8YfCfRImaXSBrxFVMqhno+cDDtEJQCRnmNrq
 7KcJO2KqDMl/QqsjxdwqojNpUTh2t7SeOeQuzUsfXl15yyyetq2Zu7ZurkCGjNtQ
 NtTt6hv5eXq3mNuBmOPKYDDgakSYyYjS0zueoi8wFFqIeSYxRJv4wx4xoeJ/Bsz8
 2LplpaPMQaTM65FhzYXGhYNBKaRkqjL9ihbIl1OcLNvfXAqLElfONM17/Yc/hgVw
 xfDtvNFrZcl7/exIpBBNOnxwbs4h78vvXsXoBiVoN7V/hBnMzDhkiBHNxNCfZXA0
 REGs/cnyy6cpiJOnVCWs77NqL75oK/qb1mEwe1M+A2kaxe/tLixUdYXo/zclDPm8
 3DLTL9lCgJIBIEiZT4q/alxLK+yUKD+SHtQT3lmF2Bfsmv/I38Uy55SXAiFO4yOq
 kwy96TvYtT43SkyNmmBf
 =oZOO
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull base rdma updates from Doug Ledford:
 "Round one of 4.8 code: while this is mostly normal, there is a new
  driver in here (the driver was hosted outside the kernel for several
  years and is actually a fairly mature and well coded driver).  It
  amounts to 13,000 of the 16,000 lines of added code in here.

  Summary:

   - Updates/fixes for iw_cxgb4 driver
   - Updates/fixes for mlx5 driver
   - Add flow steering and RSS API
   - Add hardware stats to mlx4 and mlx5 drivers
   - Add firmware version API for RDMA driver use
   - Add the rxe driver (this is a software RoCE driver that makes any
     Ethernet device a RoCE device)
   - Fixes for i40iw driver
   - Support for send only multicast joins in the cma layer
   - Other minor fixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (72 commits)
  Soft RoCE driver
  IB/core: Support for CMA multicast join flags
  IB/sa: Add cached attribute containing SM information to SA port
  IB/uverbs: Fix race between uverbs_close and remove_one
  IB/mthca: Clean up error unwind flow in mthca_reset()
  IB/mthca: NULL arg to pci_dev_put is OK
  IB/hfi1: NULL arg to sc_return_credits is OK
  IB/mlx4: Add diagnostic hardware counters
  net/mlx4: Query performance and diagnostics counters
  net/mlx4: Add diagnostic counters capability bit
  Use smaller 512 byte messages for portmapper messages
  IB/ipoib: Report SG feature regardless of HW UD CSUM capability
  IB/mlx4: Don't use GFP_ATOMIC for CQ resize struct
  IB/hfi1: Disable by default
  IB/rdmavt: Disable by default
  IB/mlx5: Fix port counter ID association to QP offset
  IB/mlx5: Fix iteration overrun in GSI qps
  i40iw: Add NULL check for puda buffer
  i40iw: Change dup_ack_thresh to u8
  i40iw: Remove unnecessary check for moving CQ head
  ...
This commit is contained in:
Linus Torvalds 2016-08-04 20:10:31 -04:00
commit 0cda611386
107 changed files with 16536 additions and 660 deletions

View file

@ -7647,6 +7647,15 @@ W: http://www.mellanox.com
Q: http://patchwork.ozlabs.org/project/netdev/list/
F: drivers/net/ethernet/mellanox/mlxsw/
SOFT-ROCE DRIVER (rxe)
M: Moni Shoua <monis@mellanox.com>
L: linux-rdma@vger.kernel.org
S: Supported
W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
Q: http://patchwork.kernel.org/project/linux-rdma/list/
F: drivers/infiniband/hw/rxe/
F: include/uapi/rdma/rdma_user_rxe.h
MEMBARRIER SUPPORT
M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>

View file

@ -84,6 +84,7 @@ source "drivers/infiniband/ulp/iser/Kconfig"
source "drivers/infiniband/ulp/isert/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"

View file

@ -68,6 +68,7 @@ MODULE_DESCRIPTION("Generic RDMA CM Agent");
MODULE_LICENSE("Dual BSD/GPL");
#define CMA_CM_RESPONSE_TIMEOUT 20
#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
#define CMA_MAX_CM_RETRIES 15
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
#define CMA_IBOE_PACKET_LIFETIME 18
@ -162,6 +163,14 @@ struct rdma_bind_list {
unsigned short port;
};
struct class_port_info_context {
struct ib_class_port_info *class_port_info;
struct ib_device *device;
struct completion done;
struct ib_sa_query *sa_query;
u8 port_num;
};
static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
@ -306,6 +315,7 @@ struct cma_multicast {
struct sockaddr_storage addr;
struct kref mcref;
bool igmp_joined;
u8 join_state;
};
struct cma_work {
@ -3752,10 +3762,63 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
}
}
static void cma_query_sa_classport_info_cb(int status,
struct ib_class_port_info *rec,
void *context)
{
struct class_port_info_context *cb_ctx = context;
WARN_ON(!context);
if (status || !rec) {
pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n",
cb_ctx->device->name, cb_ctx->port_num, status);
goto out;
}
memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info));
out:
complete(&cb_ctx->done);
}
static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num,
struct ib_class_port_info *class_port_info)
{
struct class_port_info_context *cb_ctx;
int ret;
cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL);
if (!cb_ctx)
return -ENOMEM;
cb_ctx->device = device;
cb_ctx->class_port_info = class_port_info;
cb_ctx->port_num = port_num;
init_completion(&cb_ctx->done);
ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num,
CMA_QUERY_CLASSPORT_INFO_TIMEOUT,
GFP_KERNEL, cma_query_sa_classport_info_cb,
cb_ctx, &cb_ctx->sa_query);
if (ret < 0) {
pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n",
device->name, port_num, ret);
goto out;
}
wait_for_completion(&cb_ctx->done);
out:
kfree(cb_ctx);
return ret;
}
static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
struct ib_sa_mcmember_rec rec;
struct ib_class_port_info class_port_info;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
ib_sa_comp_mask comp_mask;
int ret;
@ -3774,7 +3837,24 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
rec.qkey = cpu_to_be32(id_priv->qkey);
rdma_addr_get_sgid(dev_addr, &rec.port_gid);
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = 1;
rec.join_state = mc->join_state;
if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) {
ret = cma_query_sa_classport_info(id_priv->id.device,
id_priv->id.port_num,
&class_port_info);
if (ret)
return ret;
if (!(ib_get_cpi_capmask2(&class_port_info) &
IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) {
pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
"RDMA CM: SM doesn't support Send Only Full Member option\n",
id_priv->id.device->name, id_priv->id.port_num);
return -EOPNOTSUPP;
}
}
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
@ -3843,6 +3923,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
enum ib_gid_type gid_type;
bool send_only;
send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
if (cma_zero_addr((struct sockaddr *)&mc->addr))
return -EINVAL;
@ -3878,10 +3961,12 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
true);
if (!err)
mc->igmp_joined = true;
if (!send_only) {
err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
true);
if (!err)
mc->igmp_joined = true;
}
}
} else {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
@ -3911,7 +3996,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
}
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
void *context)
u8 join_state, void *context)
{
struct rdma_id_private *id_priv;
struct cma_multicast *mc;
@ -3930,6 +4015,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
mc->context = context;
mc->id_priv = id_priv;
mc->igmp_joined = false;
mc->join_state = join_state;
spin_lock(&id_priv->lock);
list_add(&mc->list, &id_priv->mc_list);
spin_unlock(&id_priv->lock);

View file

@ -311,6 +311,15 @@ static int read_port_immutable(struct ib_device *device)
return 0;
}
void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len)
{
if (dev->get_dev_fw_str)
dev->get_dev_fw_str(dev, str, str_len);
else
str[0] = '\0';
}
EXPORT_SYMBOL(ib_get_device_fw_str);
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register

View file

@ -183,15 +183,14 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
/*
* Release a reference on cm_id. If the last reference is being
* released, enable the waiting thread (in iw_destroy_cm_id) to
* get woken up, and return 1 if a thread is already waiting.
* released, free the cm_id and return 1.
*/
static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
{
BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
if (atomic_dec_and_test(&cm_id_priv->refcount)) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
complete(&cm_id_priv->destroy_comp);
free_cm_id(cm_id_priv);
return 1;
}
@ -208,19 +207,10 @@ static void add_ref(struct iw_cm_id *cm_id)
static void rem_ref(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
int cb_destroy;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
/*
* Test bit before deref in case the cm_id gets freed on another
* thread.
*/
cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
if (iwcm_deref_id(cm_id_priv) && cb_destroy) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
free_cm_id(cm_id_priv);
}
(void)iwcm_deref_id(cm_id_priv);
}
static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
@ -370,6 +360,12 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
wait_event(cm_id_priv->connect_wait,
!test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
/*
* Since we're deleting the cm_id, drop any events that
* might arrive before the last dereference.
*/
set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
spin_lock_irqsave(&cm_id_priv->lock, flags);
switch (cm_id_priv->state) {
case IW_CM_STATE_LISTEN:
@ -433,13 +429,7 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id)
struct iwcm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
destroy_cm_id(cm_id);
wait_for_completion(&cm_id_priv->destroy_comp);
free_cm_id(cm_id_priv);
}
EXPORT_SYMBOL(iw_destroy_cm_id);
@ -809,10 +799,7 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
ret = cm_id->cm_handler(cm_id, iw_event);
if (ret) {
iw_cm_reject(cm_id, NULL, 0);
set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
destroy_cm_id(cm_id);
if (atomic_read(&cm_id_priv->refcount)==0)
free_cm_id(cm_id_priv);
iw_destroy_cm_id(cm_id);
}
out:
@ -1000,7 +987,6 @@ static void cm_work_handler(struct work_struct *_work)
unsigned long flags;
int empty;
int ret = 0;
int destroy_id;
spin_lock_irqsave(&cm_id_priv->lock, flags);
empty = list_empty(&cm_id_priv->work_list);
@ -1013,20 +999,14 @@ static void cm_work_handler(struct work_struct *_work)
put_work(work);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
ret = process_event(cm_id_priv, &levent);
if (ret) {
set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
destroy_cm_id(&cm_id_priv->id);
}
BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
if (iwcm_deref_id(cm_id_priv)) {
if (destroy_id) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
free_cm_id(cm_id_priv);
}
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
ret = process_event(cm_id_priv, &levent);
if (ret)
destroy_cm_id(&cm_id_priv->id);
} else
pr_debug("dropping event %d\n", levent.event);
if (iwcm_deref_id(cm_id_priv))
return;
}
if (empty)
return;
spin_lock_irqsave(&cm_id_priv->lock, flags);

View file

@ -56,7 +56,7 @@ struct iwcm_id_private {
struct list_head work_free_list;
};
#define IWCM_F_CALLBACK_DESTROY 1
#define IWCM_F_DROP_EVENTS 1
#define IWCM_F_CONNECT_WAIT 2
#endif /* IWCM_H */

View file

@ -37,6 +37,7 @@
#define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1)
#define IWPM_REMINFO_HASH_SIZE 64
#define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1)
#define IWPM_MSG_SIZE 512
static LIST_HEAD(iwpm_nlmsg_req_list);
static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock);
@ -452,7 +453,7 @@ struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
{
struct sk_buff *skb = NULL;
skb = dev_alloc_skb(NLMSG_GOODSIZE);
skb = dev_alloc_skb(IWPM_MSG_SIZE);
if (!skb) {
pr_err("%s Unable to allocate skb\n", __func__);
goto create_nlmsg_exit;

View file

@ -93,18 +93,6 @@ enum {
struct mcast_member;
/*
* There are 4 types of join states:
* FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember.
*/
enum {
FULLMEMBER_JOIN,
NONMEMBER_JOIN,
SENDONLY_NONMEBER_JOIN,
SENDONLY_FULLMEMBER_JOIN,
NUM_JOIN_MEMBERSHIP_TYPES,
};
struct mcast_group {
struct ib_sa_mcmember_rec rec;
struct rb_node node;

View file

@ -229,7 +229,10 @@ static void ibnl_rcv(struct sk_buff *skb)
int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
__u32 pid)
{
return nlmsg_unicast(nls, skb, pid);
int err;
err = netlink_unicast(nls, skb, pid, 0);
return (err < 0) ? err : 0;
}
EXPORT_SYMBOL(ibnl_unicast);
@ -252,6 +255,7 @@ int __init ibnl_init(void)
return -ENOMEM;
}
nls->sk_sndtimeo = 10 * HZ;
return 0;
}

View file

@ -65,10 +65,17 @@ struct ib_sa_sm_ah {
u8 src_path_mask;
};
struct ib_sa_classport_cache {
bool valid;
struct ib_class_port_info data;
};
struct ib_sa_port {
struct ib_mad_agent *agent;
struct ib_sa_sm_ah *sm_ah;
struct work_struct update_task;
struct ib_sa_classport_cache classport_info;
spinlock_t classport_lock; /* protects class port info set */
spinlock_t ah_lock;
u8 port_num;
};
@ -998,6 +1005,13 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
port->sm_ah = NULL;
spin_unlock_irqrestore(&port->ah_lock, flags);
if (event->event == IB_EVENT_SM_CHANGE ||
event->event == IB_EVENT_CLIENT_REREGISTER ||
event->event == IB_EVENT_LID_CHANGE) {
spin_lock_irqsave(&port->classport_lock, flags);
port->classport_info.valid = false;
spin_unlock_irqrestore(&port->classport_lock, flags);
}
queue_work(ib_wq, &sa_dev->port[event->element.port_num -
sa_dev->start_port].update_task);
}
@ -1719,6 +1733,7 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
{
unsigned long flags;
struct ib_sa_classport_info_query *query =
container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
@ -1728,6 +1743,16 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
ib_unpack(classport_info_rec_table,
ARRAY_SIZE(classport_info_rec_table),
mad->data, &rec);
spin_lock_irqsave(&sa_query->port->classport_lock, flags);
if (!status && !sa_query->port->classport_info.valid) {
memcpy(&sa_query->port->classport_info.data, &rec,
sizeof(sa_query->port->classport_info.data));
sa_query->port->classport_info.valid = true;
}
spin_unlock_irqrestore(&sa_query->port->classport_lock, flags);
query->callback(status, &rec, query->context);
} else {
query->callback(status, NULL, query->context);
@ -1754,7 +1779,9 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
struct ib_class_port_info cached_class_port_info;
int ret;
unsigned long flags;
if (!sa_dev)
return -ENODEV;
@ -1762,6 +1789,17 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
/* Use cached ClassPortInfo attribute if valid instead of sending mad */
spin_lock_irqsave(&port->classport_lock, flags);
if (port->classport_info.valid && callback) {
memcpy(&cached_class_port_info, &port->classport_info.data,
sizeof(cached_class_port_info));
spin_unlock_irqrestore(&port->classport_lock, flags);
callback(0, &cached_class_port_info, context);
return 0;
}
spin_unlock_irqrestore(&port->classport_lock, flags);
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
@ -1885,6 +1923,9 @@ static void ib_sa_add_one(struct ib_device *device)
sa_dev->port[i].sm_ah = NULL;
sa_dev->port[i].port_num = i + s;
spin_lock_init(&sa_dev->port[i].classport_lock);
sa_dev->port[i].classport_info.valid = false;
sa_dev->port[i].agent =
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
NULL, 0, send_handler,

View file

@ -38,6 +38,7 @@
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/netdevice.h>
#include <linux/ethtool.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_pma.h>
@ -1200,16 +1201,28 @@ static ssize_t set_node_desc(struct device *device,
return count;
}
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
ib_get_device_fw_str(dev, buf, PAGE_SIZE);
strlcat(buf, "\n", PAGE_SIZE);
return strlen(buf);
}
static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static struct device_attribute *ib_class_attributes[] = {
&dev_attr_node_type,
&dev_attr_sys_image_guid,
&dev_attr_node_guid,
&dev_attr_node_desc
&dev_attr_node_desc,
&dev_attr_fw_ver,
};
static void free_port_list_attributes(struct ib_device *device)

View file

@ -106,6 +106,7 @@ struct ucma_multicast {
int events_reported;
u64 uid;
u8 join_state;
struct list_head list;
struct sockaddr_storage addr;
};
@ -1317,12 +1318,20 @@ static ssize_t ucma_process_join(struct ucma_file *file,
struct ucma_multicast *mc;
struct sockaddr *addr;
int ret;
u8 join_state;
if (out_len < sizeof(resp))
return -ENOSPC;
addr = (struct sockaddr *) &cmd->addr;
if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
return -EINVAL;
if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER)
join_state = BIT(FULLMEMBER_JOIN);
else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER)
join_state = BIT(SENDONLY_FULLMEMBER_JOIN);
else
return -EINVAL;
ctx = ucma_get_ctx(file, cmd->id);
@ -1335,10 +1344,11 @@ static ssize_t ucma_process_join(struct ucma_file *file,
ret = -ENOMEM;
goto err1;
}
mc->join_state = join_state;
mc->uid = cmd->uid;
memcpy(&mc->addr, addr, cmd->addr_size);
ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc);
ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
join_state, mc);
if (ret)
goto err2;
@ -1382,7 +1392,7 @@ static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
join_cmd.uid = cmd.uid;
join_cmd.id = cmd.id;
join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
join_cmd.reserved = 0;
join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER;
memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
return ucma_process_join(file, &join_cmd, out_len);

View file

@ -116,6 +116,7 @@ struct ib_uverbs_event_file {
struct ib_uverbs_file {
struct kref ref;
struct mutex mutex;
struct mutex cleanup_mutex; /* protect cleanup */
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
@ -162,6 +163,10 @@ struct ib_uqp_object {
struct ib_uxrcd_object *uxrcd;
};
struct ib_uwq_object {
struct ib_uevent_object uevent;
};
struct ib_ucq_object {
struct ib_uobject uobject;
struct ib_uverbs_file *uverbs_file;
@ -181,6 +186,8 @@ extern struct idr ib_uverbs_qp_idr;
extern struct idr ib_uverbs_srq_idr;
extern struct idr ib_uverbs_xrcd_idr;
extern struct idr ib_uverbs_rule_idr;
extern struct idr ib_uverbs_wq_idr;
extern struct idr ib_uverbs_rwq_ind_tbl_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
@ -199,6 +206,7 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
@ -219,6 +227,7 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_eth eth;
struct ib_uverbs_flow_spec_ipv4 ipv4;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
};
};
@ -275,5 +284,10 @@ IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
IB_UVERBS_DECLARE_EX_CMD(query_device);
IB_UVERBS_DECLARE_EX_CMD(create_cq);
IB_UVERBS_DECLARE_EX_CMD(create_qp);
IB_UVERBS_DECLARE_EX_CMD(create_wq);
IB_UVERBS_DECLARE_EX_CMD(modify_wq);
IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
#endif /* UVERBS_H */

View file

@ -57,6 +57,8 @@ static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" };
static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" };
/*
* The ib_uobject locking scheme is as follows:
@ -243,6 +245,27 @@ static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
}
static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context)
{
return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0);
}
static void put_wq_read(struct ib_wq *wq)
{
put_uobj_read(wq->uobject);
}
static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle,
struct ib_ucontext *context)
{
return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0);
}
static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table)
{
put_uobj_read(ind_table->uobject);
}
static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
{
struct ib_uobject *uobj;
@ -326,6 +349,8 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&ucontext->qp_list);
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
INIT_LIST_HEAD(&ucontext->wq_list);
INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list);
INIT_LIST_HEAD(&ucontext->xrcd_list);
INIT_LIST_HEAD(&ucontext->rule_list);
rcu_read_lock();
@ -1750,6 +1775,8 @@ static int create_qp(struct ib_uverbs_file *file,
struct ib_qp_init_attr attr = {};
struct ib_uverbs_ex_create_qp_resp resp;
int ret;
struct ib_rwq_ind_table *ind_tbl = NULL;
bool has_sq = true;
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
@ -1761,6 +1788,32 @@ static int create_qp(struct ib_uverbs_file *file,
init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
&qp_lock_class);
down_write(&obj->uevent.uobject.mutex);
if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
sizeof(cmd->rwq_ind_tbl_handle) &&
(cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle,
file->ucontext);
if (!ind_tbl) {
ret = -EINVAL;
goto err_put;
}
attr.rwq_ind_tbl = ind_tbl;
}
if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) +
sizeof(cmd->reserved1)) && cmd->reserved1) {
ret = -EOPNOTSUPP;
goto err_put;
}
if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) {
ret = -EINVAL;
goto err_put;
}
if (ind_tbl && !cmd->max_send_wr)
has_sq = false;
if (cmd->qp_type == IB_QPT_XRC_TGT) {
xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
@ -1784,20 +1837,24 @@ static int create_qp(struct ib_uverbs_file *file,
}
}
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
rcq = idr_read_cq(cmd->recv_cq_handle,
file->ucontext, 0);
if (!rcq) {
ret = -EINVAL;
goto err_put;
if (!ind_tbl) {
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
rcq = idr_read_cq(cmd->recv_cq_handle,
file->ucontext, 0);
if (!rcq) {
ret = -EINVAL;
goto err_put;
}
}
}
}
scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
rcq = rcq ?: scq;
if (has_sq)
scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
if (!ind_tbl)
rcq = rcq ?: scq;
pd = idr_read_pd(cmd->pd_handle, file->ucontext);
if (!pd || !scq) {
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
}
@ -1864,16 +1921,20 @@ static int create_qp(struct ib_uverbs_file *file,
qp->send_cq = attr.send_cq;
qp->recv_cq = attr.recv_cq;
qp->srq = attr.srq;
qp->rwq_ind_tbl = ind_tbl;
qp->event_handler = attr.event_handler;
qp->qp_context = attr.qp_context;
qp->qp_type = attr.qp_type;
atomic_set(&qp->usecnt, 0);
atomic_inc(&pd->usecnt);
atomic_inc(&attr.send_cq->usecnt);
if (attr.send_cq)
atomic_inc(&attr.send_cq->usecnt);
if (attr.recv_cq)
atomic_inc(&attr.recv_cq->usecnt);
if (attr.srq)
atomic_inc(&attr.srq->usecnt);
if (ind_tbl)
atomic_inc(&ind_tbl->usecnt);
}
qp->uobject = &obj->uevent.uobject;
@ -1913,6 +1974,8 @@ static int create_qp(struct ib_uverbs_file *file,
put_cq_read(rcq);
if (srq)
put_srq_read(srq);
if (ind_tbl)
put_rwq_indirection_table_read(ind_tbl);
mutex_lock(&file->mutex);
list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
@ -1940,6 +2003,8 @@ static int create_qp(struct ib_uverbs_file *file,
put_cq_read(rcq);
if (srq)
put_srq_read(srq);
if (ind_tbl)
put_rwq_indirection_table_read(ind_tbl);
put_uobj_write(&obj->uevent.uobject);
return ret;
@ -2033,7 +2098,7 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
if (err)
return err;
if (cmd.comp_mask)
if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK)
return -EINVAL;
if (cmd.reserved)
@ -3040,6 +3105,15 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
sizeof(struct ib_flow_ipv4_filter));
break;
case IB_FLOW_SPEC_IPV6:
ib_spec->ipv6.size = sizeof(struct ib_flow_spec_ipv6);
if (ib_spec->ipv6.size != kern_spec->ipv6.size)
return -EINVAL;
memcpy(&ib_spec->ipv6.val, &kern_spec->ipv6.val,
sizeof(struct ib_flow_ipv6_filter));
memcpy(&ib_spec->ipv6.mask, &kern_spec->ipv6.mask,
sizeof(struct ib_flow_ipv6_filter));
break;
case IB_FLOW_SPEC_TCP:
case IB_FLOW_SPEC_UDP:
ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
@ -3056,6 +3130,445 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
return 0;
}
int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_create_wq cmd = {};
struct ib_uverbs_ex_create_wq_resp resp = {};
struct ib_uwq_object *obj;
int err = 0;
struct ib_cq *cq;
struct ib_pd *pd;
struct ib_wq *wq;
struct ib_wq_init_attr wq_init_attr = {};
size_t required_cmd_sz;
size_t required_resp_len;
required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
if (ucore->inlen < required_cmd_sz)
return -EINVAL;
if (ucore->outlen < required_resp_len)
return -ENOSPC;
if (ucore->inlen > sizeof(cmd) &&
!ib_is_udata_cleared(ucore, sizeof(cmd),
ucore->inlen - sizeof(cmd)))
return -EOPNOTSUPP;
err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
if (err)
return err;
if (cmd.comp_mask)
return -EOPNOTSUPP;
obj = kmalloc(sizeof(*obj), GFP_KERNEL);
if (!obj)
return -ENOMEM;
init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext,
&wq_lock_class);
down_write(&obj->uevent.uobject.mutex);
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
}
wq_init_attr.cq = cq;
wq_init_attr.max_sge = cmd.max_sge;
wq_init_attr.max_wr = cmd.max_wr;
wq_init_attr.wq_context = file;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
if (IS_ERR(wq)) {
err = PTR_ERR(wq);
goto err_put_cq;
}
wq->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = wq;
wq->wq_type = wq_init_attr.wq_type;
wq->cq = cq;
wq->pd = pd;
wq->device = pd->device;
wq->wq_context = wq_init_attr.wq_context;
atomic_set(&wq->usecnt, 0);
atomic_inc(&pd->usecnt);
atomic_inc(&cq->usecnt);
wq->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = wq;
err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
if (err)
goto destroy_wq;
memset(&resp, 0, sizeof(resp));
resp.wq_handle = obj->uevent.uobject.id;
resp.max_sge = wq_init_attr.max_sge;
resp.max_wr = wq_init_attr.max_wr;
resp.wqn = wq->wq_num;
resp.response_length = required_resp_len;
err = ib_copy_to_udata(ucore,
&resp, resp.response_length);
if (err)
goto err_copy;
put_pd_read(pd);
put_cq_read(cq);
mutex_lock(&file->mutex);
list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list);
mutex_unlock(&file->mutex);
obj->uevent.uobject.live = 1;
up_write(&obj->uevent.uobject.mutex);
return 0;
err_copy:
idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
destroy_wq:
ib_destroy_wq(wq);
err_put_cq:
put_cq_read(cq);
err_put_pd:
put_pd_read(pd);
err_uobj:
put_uobj_write(&obj->uevent.uobject);
return err;
}
int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_destroy_wq cmd = {};
struct ib_uverbs_ex_destroy_wq_resp resp = {};
struct ib_wq *wq;
struct ib_uobject *uobj;
struct ib_uwq_object *obj;
size_t required_cmd_sz;
size_t required_resp_len;
int ret;
required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle);
required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
if (ucore->inlen < required_cmd_sz)
return -EINVAL;
if (ucore->outlen < required_resp_len)
return -ENOSPC;
if (ucore->inlen > sizeof(cmd) &&
!ib_is_udata_cleared(ucore, sizeof(cmd),
ucore->inlen - sizeof(cmd)))
return -EOPNOTSUPP;
ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
if (ret)
return ret;
if (cmd.comp_mask)
return -EOPNOTSUPP;
resp.response_length = required_resp_len;
uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle,
file->ucontext);
if (!uobj)
return -EINVAL;
wq = uobj->object;
obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
ret = ib_destroy_wq(wq);
if (!ret)
uobj->live = 0;
put_uobj_write(uobj);
if (ret)
return ret;
idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
mutex_lock(&file->mutex);
list_del(&uobj->list);
mutex_unlock(&file->mutex);
ib_uverbs_release_uevent(file, &obj->uevent);
resp.events_reported = obj->uevent.events_reported;
put_uobj(uobj);
ret = ib_copy_to_udata(ucore, &resp, resp.response_length);
if (ret)
return ret;
return 0;
}
int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_modify_wq cmd = {};
struct ib_wq *wq;
struct ib_wq_attr wq_attr = {};
size_t required_cmd_sz;
int ret;
required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state);
if (ucore->inlen < required_cmd_sz)
return -EINVAL;
if (ucore->inlen > sizeof(cmd) &&
!ib_is_udata_cleared(ucore, sizeof(cmd),
ucore->inlen - sizeof(cmd)))
return -EOPNOTSUPP;
ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
if (ret)
return ret;
if (!cmd.attr_mask)
return -EINVAL;
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
return -EINVAL;
wq = idr_read_wq(cmd.wq_handle, file->ucontext);
if (!wq)
return -EINVAL;
wq_attr.curr_wq_state = cmd.curr_wq_state;
wq_attr.wq_state = cmd.wq_state;
ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
put_wq_read(wq);
return ret;
}
int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_create_rwq_ind_table cmd = {};
struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {};
struct ib_uobject *uobj;
int err = 0;
struct ib_rwq_ind_table_init_attr init_attr = {};
struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_wq **wqs = NULL;
u32 *wqs_handles = NULL;
struct ib_wq *wq = NULL;
int i, j, num_read_wqs;
u32 num_wq_handles;
u32 expected_in_size;
size_t required_cmd_sz_header;
size_t required_resp_len;
required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
if (ucore->inlen < required_cmd_sz_header)
return -EINVAL;
if (ucore->outlen < required_resp_len)
return -ENOSPC;
err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header);
if (err)
return err;
ucore->inbuf += required_cmd_sz_header;
ucore->inlen -= required_cmd_sz_header;
if (cmd.comp_mask)
return -EOPNOTSUPP;
if (cmd.log_ind_tbl_size > IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE)
return -EINVAL;
num_wq_handles = 1 << cmd.log_ind_tbl_size;
expected_in_size = num_wq_handles * sizeof(__u32);
if (num_wq_handles == 1)
/* input size for wq handles is u64 aligned */
expected_in_size += sizeof(__u32);
if (ucore->inlen < expected_in_size)
return -EINVAL;
if (ucore->inlen > expected_in_size &&
!ib_is_udata_cleared(ucore, expected_in_size,
ucore->inlen - expected_in_size))
return -EOPNOTSUPP;
wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles),
GFP_KERNEL);
if (!wqs_handles)
return -ENOMEM;
err = ib_copy_from_udata(wqs_handles, ucore,
num_wq_handles * sizeof(__u32));
if (err)
goto err_free;
wqs = kcalloc(num_wq_handles, sizeof(*wqs), GFP_KERNEL);
if (!wqs) {
err = -ENOMEM;
goto err_free;
}
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext);
if (!wq) {
err = -EINVAL;
goto put_wqs;
}
wqs[num_read_wqs] = wq;
}
uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
if (!uobj) {
err = -ENOMEM;
goto put_wqs;
}
init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class);
down_write(&uobj->mutex);
init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
init_attr.ind_tbl = wqs;
rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
if (IS_ERR(rwq_ind_tbl)) {
err = PTR_ERR(rwq_ind_tbl);
goto err_uobj;
}
rwq_ind_tbl->ind_tbl = wqs;
rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size;
rwq_ind_tbl->uobject = uobj;
uobj->object = rwq_ind_tbl;
rwq_ind_tbl->device = ib_dev;
atomic_set(&rwq_ind_tbl->usecnt, 0);
for (i = 0; i < num_wq_handles; i++)
atomic_inc(&wqs[i]->usecnt);
err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
if (err)
goto destroy_ind_tbl;
resp.ind_tbl_handle = uobj->id;
resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
resp.response_length = required_resp_len;
err = ib_copy_to_udata(ucore,
&resp, resp.response_length);
if (err)
goto err_copy;
kfree(wqs_handles);
for (j = 0; j < num_read_wqs; j++)
put_wq_read(wqs[j]);
mutex_lock(&file->mutex);
list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list);
mutex_unlock(&file->mutex);
uobj->live = 1;
up_write(&uobj->mutex);
return 0;
err_copy:
idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
destroy_ind_tbl:
ib_destroy_rwq_ind_table(rwq_ind_tbl);
err_uobj:
put_uobj_write(uobj);
put_wqs:
for (j = 0; j < num_read_wqs; j++)
put_wq_read(wqs[j]);
err_free:
kfree(wqs_handles);
kfree(wqs);
return err;
}
int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_uobject *uobj;
int ret;
struct ib_wq **ind_tbl;
size_t required_cmd_sz;
required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
if (ucore->inlen < required_cmd_sz)
return -EINVAL;
if (ucore->inlen > sizeof(cmd) &&
!ib_is_udata_cleared(ucore, sizeof(cmd),
ucore->inlen - sizeof(cmd)))
return -EOPNOTSUPP;
ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
if (ret)
return ret;
if (cmd.comp_mask)
return -EOPNOTSUPP;
uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle,
file->ucontext);
if (!uobj)
return -EINVAL;
rwq_ind_tbl = uobj->object;
ind_tbl = rwq_ind_tbl->ind_tbl;
ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
if (!ret)
uobj->live = 0;
put_uobj_write(uobj);
if (ret)
return ret;
idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
mutex_lock(&file->mutex);
list_del(&uobj->list);
mutex_unlock(&file->mutex);
put_uobj(uobj);
kfree(ind_tbl);
return ret;
}
int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,

View file

@ -76,6 +76,8 @@ DEFINE_IDR(ib_uverbs_qp_idr);
DEFINE_IDR(ib_uverbs_srq_idr);
DEFINE_IDR(ib_uverbs_xrcd_idr);
DEFINE_IDR(ib_uverbs_rule_idr);
DEFINE_IDR(ib_uverbs_wq_idr);
DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@ -130,6 +132,11 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
[IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
[IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
[IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq,
[IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq,
[IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq,
[IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
[IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
};
static void ib_uverbs_add_one(struct ib_device *device);
@ -265,6 +272,27 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(uqp);
}
list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
ib_destroy_rwq_ind_table(rwq_ind_tbl);
kfree(ind_tbl);
kfree(uobj);
}
list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
struct ib_wq *wq = uobj->object;
struct ib_uwq_object *uwq =
container_of(uobj, struct ib_uwq_object, uevent.uobject);
idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
ib_destroy_wq(wq);
ib_uverbs_release_uevent(file, &uwq->uevent);
kfree(uwq);
}
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = uobj->object;
struct ib_uevent_object *uevent =
@ -568,6 +596,16 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
&uobj->events_reported);
}
void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
{
struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
struct ib_uevent_object, uobject);
ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
event->event, &uobj->event_list,
&uobj->events_reported);
}
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
struct ib_uevent_object *uobj;
@ -931,6 +969,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
file->async_file = NULL;
kref_init(&file->ref);
mutex_init(&file->mutex);
mutex_init(&file->cleanup_mutex);
filp->private_data = file;
kobject_get(&dev->kobj);
@ -956,18 +995,20 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_file *file = filp->private_data;
struct ib_uverbs_device *dev = file->device;
struct ib_ucontext *ucontext = NULL;
mutex_lock(&file->cleanup_mutex);
if (file->ucontext) {
ib_uverbs_cleanup_ucontext(file, file->ucontext);
file->ucontext = NULL;
}
mutex_unlock(&file->cleanup_mutex);
mutex_lock(&file->device->lists_mutex);
ucontext = file->ucontext;
file->ucontext = NULL;
if (!file->is_closed) {
list_del(&file->list);
file->is_closed = 1;
}
mutex_unlock(&file->device->lists_mutex);
if (ucontext)
ib_uverbs_cleanup_ucontext(file, ucontext);
if (file->async_file)
kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
@ -1181,22 +1222,30 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
mutex_lock(&uverbs_dev->lists_mutex);
while (!list_empty(&uverbs_dev->uverbs_file_list)) {
struct ib_ucontext *ucontext;
file = list_first_entry(&uverbs_dev->uverbs_file_list,
struct ib_uverbs_file, list);
file->is_closed = 1;
ucontext = file->ucontext;
list_del(&file->list);
file->ucontext = NULL;
kref_get(&file->ref);
mutex_unlock(&uverbs_dev->lists_mutex);
/* We must release the mutex before going ahead and calling
* disassociate_ucontext. disassociate_ucontext might end up
* indirectly calling uverbs_close, for example due to freeing
* the resources (e.g mmput).
*/
ib_uverbs_event_handler(&file->event_handler, &event);
mutex_lock(&file->cleanup_mutex);
ucontext = file->ucontext;
file->ucontext = NULL;
mutex_unlock(&file->cleanup_mutex);
/* At this point ib_uverbs_close cannot be running
* ib_uverbs_cleanup_ucontext
*/
if (ucontext) {
/* We must release the mutex before going ahead and
* calling disassociate_ucontext. disassociate_ucontext
* might end up indirectly calling uverbs_close,
* for example due to freeing the resources
* (e.g mmput).
*/
ib_dev->disassociate_ucontext(ucontext);
ib_uverbs_cleanup_ucontext(file, ucontext);
}

View file

@ -758,6 +758,12 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
struct ib_qp *qp;
int ret;
if (qp_init_attr->rwq_ind_tbl &&
(qp_init_attr->recv_cq ||
qp_init_attr->srq || qp_init_attr->cap.max_recv_wr ||
qp_init_attr->cap.max_recv_sge))
return ERR_PTR(-EINVAL);
/*
* If the callers is using the RDMA API calculate the resources
* needed for the RDMA READ/WRITE operations.
@ -775,6 +781,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
qp->real_qp = qp;
qp->uobject = NULL;
qp->qp_type = qp_init_attr->qp_type;
qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl;
atomic_set(&qp->usecnt, 0);
qp->mrs_used = 0;
@ -792,7 +799,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
qp->srq = NULL;
} else {
qp->recv_cq = qp_init_attr->recv_cq;
atomic_inc(&qp_init_attr->recv_cq->usecnt);
if (qp_init_attr->recv_cq)
atomic_inc(&qp_init_attr->recv_cq->usecnt);
qp->srq = qp_init_attr->srq;
if (qp->srq)
atomic_inc(&qp_init_attr->srq->usecnt);
@ -803,7 +811,10 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
qp->xrcd = NULL;
atomic_inc(&pd->usecnt);
atomic_inc(&qp_init_attr->send_cq->usecnt);
if (qp_init_attr->send_cq)
atomic_inc(&qp_init_attr->send_cq->usecnt);
if (qp_init_attr->rwq_ind_tbl)
atomic_inc(&qp->rwq_ind_tbl->usecnt);
if (qp_init_attr->cap.max_rdma_ctxs) {
ret = rdma_rw_init_mrs(qp, qp_init_attr);
@ -1283,6 +1294,7 @@ int ib_destroy_qp(struct ib_qp *qp)
struct ib_pd *pd;
struct ib_cq *scq, *rcq;
struct ib_srq *srq;
struct ib_rwq_ind_table *ind_tbl;
int ret;
WARN_ON_ONCE(qp->mrs_used > 0);
@ -1297,6 +1309,7 @@ int ib_destroy_qp(struct ib_qp *qp)
scq = qp->send_cq;
rcq = qp->recv_cq;
srq = qp->srq;
ind_tbl = qp->rwq_ind_tbl;
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
@ -1311,6 +1324,8 @@ int ib_destroy_qp(struct ib_qp *qp)
atomic_dec(&rcq->usecnt);
if (srq)
atomic_dec(&srq->usecnt);
if (ind_tbl)
atomic_dec(&ind_tbl->usecnt);
}
return ret;
@ -1558,6 +1573,150 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
}
EXPORT_SYMBOL(ib_dealloc_xrcd);
/**
* ib_create_wq - Creates a WQ associated with the specified protection
* domain.
* @pd: The protection domain associated with the WQ.
* @wq_init_attr: A list of initial attributes required to create the
* WQ. If WQ creation succeeds, then the attributes are updated to
* the actual capabilities of the created WQ.
*
* wq_init_attr->max_wr and wq_init_attr->max_sge determine
* the requested size of the WQ, and set to the actual values allocated
* on return.
* If ib_create_wq() succeeds, then max_wr and max_sge will always be
* at least as large as the requested values.
*/
struct ib_wq *ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *wq_attr)
{
struct ib_wq *wq;
if (!pd->device->create_wq)
return ERR_PTR(-ENOSYS);
wq = pd->device->create_wq(pd, wq_attr, NULL);
if (!IS_ERR(wq)) {
wq->event_handler = wq_attr->event_handler;
wq->wq_context = wq_attr->wq_context;
wq->wq_type = wq_attr->wq_type;
wq->cq = wq_attr->cq;
wq->device = pd->device;
wq->pd = pd;
wq->uobject = NULL;
atomic_inc(&pd->usecnt);
atomic_inc(&wq_attr->cq->usecnt);
atomic_set(&wq->usecnt, 0);
}
return wq;
}
EXPORT_SYMBOL(ib_create_wq);
/**
* ib_destroy_wq - Destroys the specified WQ.
* @wq: The WQ to destroy.
*/
int ib_destroy_wq(struct ib_wq *wq)
{
int err;
struct ib_cq *cq = wq->cq;
struct ib_pd *pd = wq->pd;
if (atomic_read(&wq->usecnt))
return -EBUSY;
err = wq->device->destroy_wq(wq);
if (!err) {
atomic_dec(&pd->usecnt);
atomic_dec(&cq->usecnt);
}
return err;
}
EXPORT_SYMBOL(ib_destroy_wq);
/**
* ib_modify_wq - Modifies the specified WQ.
* @wq: The WQ to modify.
* @wq_attr: On input, specifies the WQ attributes to modify.
* @wq_attr_mask: A bit-mask used to specify which attributes of the WQ
* are being modified.
* On output, the current values of selected WQ attributes are returned.
*/
int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask)
{
int err;
if (!wq->device->modify_wq)
return -ENOSYS;
err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL);
return err;
}
EXPORT_SYMBOL(ib_modify_wq);
/*
* ib_create_rwq_ind_table - Creates a RQ Indirection Table.
* @device: The device on which to create the rwq indirection table.
* @ib_rwq_ind_table_init_attr: A list of initial attributes required to
* create the Indirection Table.
*
* Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less
* than the created ib_rwq_ind_table object and the caller is responsible
* for its memory allocation/free.
*/
struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
struct ib_rwq_ind_table_init_attr *init_attr)
{
struct ib_rwq_ind_table *rwq_ind_table;
int i;
u32 table_size;
if (!device->create_rwq_ind_table)
return ERR_PTR(-ENOSYS);
table_size = (1 << init_attr->log_ind_tbl_size);
rwq_ind_table = device->create_rwq_ind_table(device,
init_attr, NULL);
if (IS_ERR(rwq_ind_table))
return rwq_ind_table;
rwq_ind_table->ind_tbl = init_attr->ind_tbl;
rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size;
rwq_ind_table->device = device;
rwq_ind_table->uobject = NULL;
atomic_set(&rwq_ind_table->usecnt, 0);
for (i = 0; i < table_size; i++)
atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt);
return rwq_ind_table;
}
EXPORT_SYMBOL(ib_create_rwq_ind_table);
/*
* ib_destroy_rwq_ind_table - Destroys the specified Indirection Table.
* @wq_ind_table: The Indirection Table to destroy.
*/
int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
{
int err, i;
u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size);
struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl;
if (atomic_read(&rwq_ind_table->usecnt))
return -EBUSY;
err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table);
if (!err) {
for (i = 0; i < table_size; i++)
atomic_dec(&ind_tbl[i]->usecnt);
}
return err;
}
EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
struct ib_flow *ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)

View file

@ -1396,10 +1396,10 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
state_set(&child_ep->com, CONNECTING);
child_ep->com.tdev = tdev;
child_ep->com.cm_id = NULL;
child_ep->com.local_addr.sin_family = PF_INET;
child_ep->com.local_addr.sin_family = AF_INET;
child_ep->com.local_addr.sin_port = req->local_port;
child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
child_ep->com.remote_addr.sin_family = PF_INET;
child_ep->com.remote_addr.sin_family = AF_INET;
child_ep->com.remote_addr.sin_port = req->peer_port;
child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
get_ep(&parent_ep->com);

View file

@ -1183,18 +1183,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
}
static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
struct ethtool_drvinfo info;
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
PDBG("%s dev 0x%p\n", __func__, dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.fw_version);
}
static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
char *buf)
{
@ -1334,13 +1322,11 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *iwch_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id,
};
@ -1362,6 +1348,18 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str,
size_t str_len)
{
struct iwch_dev *iwch_dev = to_iwch_dev(ibdev);
struct ethtool_drvinfo info;
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
PDBG("%s dev 0x%p\n", __func__, iwch_dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
snprintf(str, str_len, "%s", info.fw_version);
}
int iwch_register_device(struct iwch_dev *dev)
{
int ret;
@ -1437,6 +1435,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.get_hw_stats = iwch_get_mib;
dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
dev->ibdev.get_port_immutable = iwch_port_immutable;
dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm)

View file

@ -294,6 +294,25 @@ static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
return;
}
static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size)
{
struct sk_buff *skb;
unsigned int i;
size_t len;
len = roundup(sizeof(union cpl_wr_size), 16);
for (i = 0; i < size; i++) {
skb = alloc_skb(len, GFP_KERNEL);
if (!skb)
goto fail;
skb_queue_tail(ep_skb_list, skb);
}
return 0;
fail:
skb_queue_purge(ep_skb_list);
return -ENOMEM;
}
static void *alloc_ep(int size, gfp_t gfp)
{
struct c4iw_ep_common *epc;
@ -384,6 +403,8 @@ void _c4iw_free_ep(struct kref *kref)
if (ep->mpa_skb)
kfree_skb(ep->mpa_skb);
}
if (!skb_queue_empty(&ep->com.ep_skb_list))
skb_queue_purge(&ep->com.ep_skb_list);
kfree(ep);
}
@ -620,25 +641,27 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb)
}
}
static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
static int send_flowc(struct c4iw_ep *ep)
{
unsigned int flowclen = 80;
struct fw_flowc_wr *flowc;
struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
int i;
u16 vlan = ep->l2t->vlan;
int nparams;
if (WARN_ON(!skb))
return -ENOMEM;
if (vlan == CPL_L2T_VLAN_NONE)
nparams = 8;
else
nparams = 9;
skb = get_skb(skb, flowclen, GFP_KERNEL);
flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
flowc = (struct fw_flowc_wr *)__skb_put(skb, FLOWC_LEN);
flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
FW_FLOWC_WR_NPARAMS_V(nparams));
flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen,
flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN,
16)) | FW_WR_FLOWID_V(ep->hwtid));
flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
@ -679,18 +702,16 @@ static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
return c4iw_ofld_send(&ep->com.dev->rdev, skb);
}
static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
static int send_halfclose(struct c4iw_ep *ep)
{
struct cpl_close_con_req *req;
struct sk_buff *skb;
struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
int wrlen = roundup(sizeof *req, 16);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
skb = get_skb(NULL, wrlen, gfp);
if (!skb) {
printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
if (WARN_ON(!skb))
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
@ -701,26 +722,24 @@ static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
static int send_abort(struct c4iw_ep *ep)
{
struct cpl_abort_req *req;
int wrlen = roundup(sizeof *req, 16);
struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
skb = get_skb(skb, wrlen, gfp);
if (!skb) {
printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
__func__);
if (WARN_ON(!req_skb))
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
t4_set_arp_err_handler(skb, ep, abort_arp_failure);
req = (struct cpl_abort_req *) skb_put(skb, wrlen);
set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx);
t4_set_arp_err_handler(req_skb, ep, abort_arp_failure);
req = (struct cpl_abort_req *)skb_put(req_skb, wrlen);
memset(req, 0, wrlen);
INIT_TP_WR(req, ep->hwtid);
OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
req->cmd = CPL_ABORT_SEND_RST;
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
}
static void best_mtu(const unsigned short *mtus, unsigned short mtu,
@ -992,9 +1011,19 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
mpa = (struct mpa_message *)(req + 1);
memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
mpa->flags = (crc_enabled ? MPA_CRC : 0) |
(markers_enabled ? MPA_MARKERS : 0) |
(mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
mpa->flags = 0;
if (crc_enabled)
mpa->flags |= MPA_CRC;
if (markers_enabled) {
mpa->flags |= MPA_MARKERS;
ep->mpa_attr.recv_marker_enabled = 1;
} else {
ep->mpa_attr.recv_marker_enabled = 0;
}
if (mpa_rev_to_use == 2)
mpa->flags |= MPA_ENHANCED_RDMA_CONN;
mpa->private_data_size = htons(ep->plen);
mpa->revision = mpa_rev_to_use;
if (mpa_rev_to_use == 1) {
@ -1169,8 +1198,11 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
mpa = (struct mpa_message *)(req + 1);
memset(mpa, 0, sizeof(*mpa));
memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
(markers_enabled ? MPA_MARKERS : 0);
mpa->flags = 0;
if (ep->mpa_attr.crc_enabled)
mpa->flags |= MPA_CRC;
if (ep->mpa_attr.recv_marker_enabled)
mpa->flags |= MPA_MARKERS;
mpa->revision = ep->mpa_attr.version;
mpa->private_data_size = htons(plen);
@ -1248,7 +1280,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
set_bit(ACT_ESTAB, &ep->com.history);
/* start MPA negotiation */
ret = send_flowc(ep, NULL);
ret = send_flowc(ep);
if (ret)
goto err;
if (ep->retry_with_mpa_v1)
@ -1555,7 +1587,6 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
*/
__state_set(&ep->com, FPDU_MODE);
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
ep->mpa_attr.version = mpa->revision;
ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
@ -2004,12 +2035,17 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
}
/*
* Return whether a failed active open has allocated a TID
* Some of the error codes above implicitly indicate that there is no TID
* allocated with the result of an ACT_OPEN. We use this predicate to make
* that explicit.
*/
static inline int act_open_has_tid(int status)
{
return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
status != CPL_ERR_ARP_MISS;
return (status != CPL_ERR_TCAM_PARITY &&
status != CPL_ERR_TCAM_MISS &&
status != CPL_ERR_TCAM_FULL &&
status != CPL_ERR_CONN_EXIST_SYNRECV &&
status != CPL_ERR_CONN_EXIST);
}
/* Returns whether a CPL status conveys negative advice.
@ -2130,6 +2166,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
static int c4iw_reconnect(struct c4iw_ep *ep)
{
int err = 0;
int size = 0;
struct sockaddr_in *laddr = (struct sockaddr_in *)
&ep->com.cm_id->m_local_addr;
struct sockaddr_in *raddr = (struct sockaddr_in *)
@ -2145,6 +2182,21 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
init_timer(&ep->timer);
c4iw_init_wr_wait(&ep->com.wr_wait);
/* When MPA revision is different on nodes, the node with MPA_rev=2
* tries to reconnect with MPA_rev 1 for the same EP through
* c4iw_reconnect(), where the same EP is assigned with new tid for
* further connection establishment. As we are using the same EP pointer
* for reconnect, few skbs are used during the previous c4iw_connect(),
* which leaves the EP with inadequate skbs for further
* c4iw_reconnect(), Further causing an assert BUG_ON() due to empty
* skb_list() during peer_abort(). Allocate skbs which is already used.
*/
size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list));
if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) {
err = -ENOMEM;
goto fail1;
}
/*
* Allocate an active TID to initiate a TCP connection.
*/
@ -2210,6 +2262,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
* response of 1st connect request.
*/
connect_reply_upcall(ep, -ECONNRESET);
fail1:
c4iw_put_ep(&ep->com);
out:
return err;
@ -2576,6 +2629,10 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
child_ep->mtu = peer_mss + hdrs;
skb_queue_head_init(&child_ep->com.ep_skb_list);
if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF))
goto fail;
state_set(&child_ep->com, CONNECTING);
child_ep->com.dev = dev;
child_ep->com.cm_id = NULL;
@ -2640,6 +2697,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
}
goto out;
fail:
c4iw_put_ep(&child_ep->com);
reject:
reject_cr(dev, hwtid, skb);
if (parent_ep)
@ -2670,7 +2729,7 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
ep->com.state = MPA_REQ_WAIT;
start_ep_timer(ep);
set_bit(PASS_ESTAB, &ep->com.history);
ret = send_flowc(ep, skb);
ret = send_flowc(ep);
mutex_unlock(&ep->com.mutex);
if (ret)
c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
@ -2871,10 +2930,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
}
mutex_unlock(&ep->com.mutex);
rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
if (!rpl_skb) {
printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
__func__);
rpl_skb = skb_dequeue(&ep->com.ep_skb_list);
if (WARN_ON(!rpl_skb)) {
release = 1;
goto out;
}
@ -3011,9 +3068,9 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
PDBG("%s last streaming msg ack ep %p tid %u state %u "
"initiator %u freeing skb\n", __func__, ep, ep->hwtid,
state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
mutex_lock(&ep->com.mutex);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL;
mutex_lock(&ep->com.mutex);
if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
stop_ep_timer(ep);
mutex_unlock(&ep->com.mutex);
@ -3025,9 +3082,9 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
{
int err = 0;
int disconnect = 0;
int abort;
struct c4iw_ep *ep = to_ep(cm_id);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
@ -3038,16 +3095,13 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
}
set_bit(ULP_REJECT, &ep->com.history);
if (mpa_rev == 0)
disconnect = 2;
else {
err = send_mpa_reject(ep, pdata, pdata_len);
disconnect = 1;
}
abort = 1;
else
abort = send_mpa_reject(ep, pdata, pdata_len);
mutex_unlock(&ep->com.mutex);
if (disconnect) {
stop_ep_timer(ep);
err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
}
stop_ep_timer(ep);
c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
c4iw_put_ep(&ep->com);
return 0;
}
@ -3248,6 +3302,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
err = -ENOMEM;
goto out;
}
skb_queue_head_init(&ep->com.ep_skb_list);
if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) {
err = -ENOMEM;
goto fail1;
}
init_timer(&ep->timer);
ep->plen = conn_param->private_data_len;
if (ep->plen)
@ -3266,7 +3327,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (!ep->com.qp) {
PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
err = -EINVAL;
goto fail1;
goto fail2;
}
ref_qp(ep);
PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
@ -3279,7 +3340,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (ep->atid == -1) {
printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
err = -ENOMEM;
goto fail1;
goto fail2;
}
insert_handle(dev, &dev->atid_idr, ep, ep->atid);
@ -3303,7 +3364,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
err = pick_local_ipaddrs(dev, cm_id);
if (err)
goto fail1;
goto fail2;
}
/* find a route */
@ -3323,7 +3384,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
err = pick_local_ip6addrs(dev, cm_id);
if (err)
goto fail1;
goto fail2;
}
/* find a route */
@ -3339,14 +3400,14 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (!ep->dst) {
printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
err = -EHOSTUNREACH;
goto fail2;
goto fail3;
}
err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
if (err) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
goto fail3;
goto fail4;
}
PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
@ -3362,13 +3423,15 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
goto out;
cxgb4_l2t_release(ep->l2t);
fail3:
fail4:
dst_release(ep->dst);
fail2:
fail3:
remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
fail1:
fail2:
skb_queue_purge(&ep->com.ep_skb_list);
deref_cm_id(&ep->com);
fail1:
c4iw_put_ep(&ep->com);
out:
return err;
@ -3461,6 +3524,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
err = -ENOMEM;
goto fail1;
}
skb_queue_head_init(&ep->com.ep_skb_list);
PDBG("%s ep %p\n", __func__, ep);
ep->com.cm_id = cm_id;
ref_cm_id(&ep->com);
@ -3577,11 +3641,22 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
case MPA_REQ_RCVD:
case MPA_REP_SENT:
case FPDU_MODE:
case CONNECTING:
close = 1;
if (abrupt)
ep->com.state = ABORTING;
else {
ep->com.state = CLOSING;
/*
* if we close before we see the fw4_ack() then we fix
* up the timer state since we're reusing it.
*/
if (ep->mpa_skb &&
test_bit(STOP_MPA_TIMER, &ep->com.flags)) {
clear_bit(STOP_MPA_TIMER, &ep->com.flags);
stop_ep_timer(ep);
}
start_ep_timer(ep);
}
set_bit(CLOSE_SENT, &ep->com.flags);
@ -3611,10 +3686,10 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
if (abrupt) {
set_bit(EP_DISC_ABORT, &ep->com.history);
close_complete_upcall(ep, -ECONNRESET);
ret = send_abort(ep, NULL, gfp);
ret = send_abort(ep);
} else {
set_bit(EP_DISC_CLOSE, &ep->com.history);
ret = send_halfclose(ep, gfp);
ret = send_halfclose(ep);
}
if (ret) {
set_bit(EP_DISC_FAIL, &ep->com.history);

View file

@ -33,19 +33,15 @@
#include "iw_cxgb4.h"
static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
struct c4iw_dev_ucontext *uctx)
struct c4iw_dev_ucontext *uctx, struct sk_buff *skb)
{
struct fw_ri_res_wr *res_wr;
struct fw_ri_res *res;
int wr_len;
struct c4iw_wr_wait wr_wait;
struct sk_buff *skb;
int ret;
wr_len = sizeof *res_wr + sizeof *res;
skb = alloc_skb(wr_len, GFP_KERNEL);
if (!skb)
return -ENOMEM;
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
@ -863,7 +859,9 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq)
ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
: NULL;
destroy_cq(&chp->rhp->rdev, &chp->cq,
ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx);
ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx,
chp->destroy_skb);
chp->destroy_skb = NULL;
kfree(chp);
return 0;
}
@ -879,7 +877,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
struct c4iw_cq *chp;
struct c4iw_create_cq_resp uresp;
struct c4iw_ucontext *ucontext = NULL;
int ret;
int ret, wr_len;
size_t memsize, hwentries;
struct c4iw_mm_entry *mm, *mm2;
@ -896,6 +894,13 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
if (!chp)
return ERR_PTR(-ENOMEM);
wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
if (!chp->destroy_skb) {
ret = -ENOMEM;
goto err1;
}
if (ib_context)
ucontext = to_c4iw_ucontext(ib_context);
@ -936,7 +941,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
ret = create_cq(&rhp->rdev, &chp->cq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
if (ret)
goto err1;
goto err2;
chp->rhp = rhp;
chp->cq.size--; /* status page */
@ -947,15 +952,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
init_waitqueue_head(&chp->wait);
ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
if (ret)
goto err2;
goto err3;
if (ucontext) {
mm = kmalloc(sizeof *mm, GFP_KERNEL);
if (!mm)
goto err3;
goto err4;
mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
if (!mm2)
goto err4;
goto err5;
uresp.qid_mask = rhp->rdev.cqmask;
uresp.cqid = chp->cq.cqid;
@ -970,7 +975,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
ret = ib_copy_to_udata(udata, &uresp,
sizeof(uresp) - sizeof(uresp.reserved));
if (ret)
goto err5;
goto err6;
mm->key = uresp.key;
mm->addr = virt_to_phys(chp->cq.queue);
@ -986,15 +991,18 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
__func__, chp->cq.cqid, chp, chp->cq.size,
chp->cq.memsize, (unsigned long long) chp->cq.dma_addr);
return &chp->ibcq;
err5:
err6:
kfree(mm2);
err4:
err5:
kfree(mm);
err3:
err4:
remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
err2:
err3:
destroy_cq(&chp->rhp->rdev, &chp->cq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
chp->destroy_skb);
err2:
kfree_skb(chp->destroy_skb);
err1:
kfree(chp);
return ERR_PTR(ret);

View file

@ -317,7 +317,7 @@ static int qp_open(struct inode *inode, struct file *file)
idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
spin_unlock_irq(&qpd->devp->lock);
qpd->bufsize = count * 128;
qpd->bufsize = count * 180;
qpd->buf = vmalloc(qpd->bufsize);
if (!qpd->buf) {
kfree(qpd);

View file

@ -384,6 +384,7 @@ struct c4iw_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
struct c4iw_dev *rhp;
struct sk_buff *dereg_skb;
u64 kva;
struct tpt_attributes attr;
u64 *mpl;
@ -400,6 +401,7 @@ static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr)
struct c4iw_mw {
struct ib_mw ibmw;
struct c4iw_dev *rhp;
struct sk_buff *dereg_skb;
u64 kva;
struct tpt_attributes attr;
};
@ -412,6 +414,7 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw)
struct c4iw_cq {
struct ib_cq ibcq;
struct c4iw_dev *rhp;
struct sk_buff *destroy_skb;
struct t4_cq cq;
spinlock_t lock;
spinlock_t comp_handler_lock;
@ -472,7 +475,7 @@ struct c4iw_qp {
struct t4_wq wq;
spinlock_t lock;
struct mutex mutex;
atomic_t refcnt;
struct kref kref;
wait_queue_head_t wait;
struct timer_list timer;
int sq_sig_all;
@ -789,10 +792,29 @@ enum c4iw_ep_history {
CM_ID_DEREFED = 28,
};
enum conn_pre_alloc_buffers {
CN_ABORT_REQ_BUF,
CN_ABORT_RPL_BUF,
CN_CLOSE_CON_REQ_BUF,
CN_DESTROY_BUF,
CN_FLOWC_BUF,
CN_MAX_CON_BUF
};
#define FLOWC_LEN 80
union cpl_wr_size {
struct cpl_abort_req abrt_req;
struct cpl_abort_rpl abrt_rpl;
struct fw_ri_wr ri_req;
struct cpl_close_con_req close_req;
char flowc_buf[FLOWC_LEN];
};
struct c4iw_ep_common {
struct iw_cm_id *cm_id;
struct c4iw_qp *qp;
struct c4iw_dev *dev;
struct sk_buff_head ep_skb_list;
enum c4iw_ep_state state;
struct kref kref;
struct mutex mutex;

View file

@ -59,9 +59,9 @@ static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length)
}
static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
u32 len, dma_addr_t data, int wait)
u32 len, dma_addr_t data,
int wait, struct sk_buff *skb)
{
struct sk_buff *skb;
struct ulp_mem_io *req;
struct ulptx_sgl *sgl;
u8 wr_len;
@ -74,9 +74,11 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
c4iw_init_wr_wait(&wr_wait);
wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16);
skb = alloc_skb(wr_len, GFP_KERNEL);
if (!skb)
return -ENOMEM;
if (!skb) {
skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
if (!skb)
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
@ -108,9 +110,8 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
}
static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
void *data)
void *data, struct sk_buff *skb)
{
struct sk_buff *skb;
struct ulp_mem_io *req;
struct ulptx_idata *sc;
u8 wr_len, *to_dp, *from_dp;
@ -134,9 +135,11 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
wr_len = roundup(sizeof *req + sizeof *sc +
roundup(copy_len, T4_ULPTX_MIN_IO), 16);
skb = alloc_skb(wr_len, GFP_KERNEL);
if (!skb)
return -ENOMEM;
if (!skb) {
skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
if (!skb)
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
@ -173,6 +176,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO -
(copy_len % T4_ULPTX_MIN_IO));
ret = c4iw_ofld_send(rdev, skb);
skb = NULL;
if (ret)
return ret;
len -= C4IW_MAX_INLINE_SIZE;
@ -182,7 +186,8 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
return ret;
}
static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data)
static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len,
void *data, struct sk_buff *skb)
{
u32 remain = len;
u32 dmalen;
@ -205,7 +210,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *
dmalen = T4_ULPTX_MAX_DMA;
remain -= dmalen;
ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr,
!remain);
!remain, skb);
if (ret)
goto out;
addr += dmalen >> 5;
@ -213,7 +218,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *
daddr += dmalen;
}
if (remain)
ret = _c4iw_write_mem_inline(rdev, addr, remain, data);
ret = _c4iw_write_mem_inline(rdev, addr, remain, data, skb);
out:
dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE);
return ret;
@ -224,23 +229,25 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *
* If data is NULL, clear len byte of memory to zero.
*/
static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,
void *data)
void *data, struct sk_buff *skb)
{
if (is_t5(rdev->lldi.adapter_type) && use_dsgl) {
if (len > inline_threshold) {
if (_c4iw_write_mem_dma(rdev, addr, len, data)) {
if (_c4iw_write_mem_dma(rdev, addr, len, data, skb)) {
printk_ratelimited(KERN_WARNING
"%s: dma map"
" failure (non fatal)\n",
pci_name(rdev->lldi.pdev));
return _c4iw_write_mem_inline(rdev, addr, len,
data);
} else
data, skb);
} else {
return 0;
}
} else
return _c4iw_write_mem_inline(rdev, addr, len, data);
return _c4iw_write_mem_inline(rdev, addr,
len, data, skb);
} else
return _c4iw_write_mem_inline(rdev, addr, len, data);
return _c4iw_write_mem_inline(rdev, addr, len, data, skb);
}
/*
@ -253,7 +260,8 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
u32 *stag, u8 stag_state, u32 pdid,
enum fw_ri_stag_type type, enum fw_ri_mem_perms perm,
int bind_enabled, u32 zbva, u64 to,
u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr)
u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr,
struct sk_buff *skb)
{
int err;
struct fw_ri_tpte tpt;
@ -307,7 +315,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
}
err = write_adapter_mem(rdev, stag_idx +
(rdev->lldi.vr->stag.start >> 5),
sizeof(tpt), &tpt);
sizeof(tpt), &tpt, skb);
if (reset_tpt_entry) {
c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
@ -327,28 +335,29 @@ static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl,
__func__, pbl_addr, rdev->lldi.vr->pbl.start,
pbl_size);
err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl);
err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL);
return err;
}
static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size,
u32 pbl_addr)
u32 pbl_addr, struct sk_buff *skb)
{
return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0,
pbl_size, pbl_addr);
pbl_size, pbl_addr, skb);
}
static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid)
{
*stag = T4_STAG_UNSET;
return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0,
0UL, 0, 0, 0, 0);
0UL, 0, 0, 0, 0, NULL);
}
static int deallocate_window(struct c4iw_rdev *rdev, u32 stag)
static int deallocate_window(struct c4iw_rdev *rdev, u32 stag,
struct sk_buff *skb)
{
return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0,
0);
0, skb);
}
static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
@ -356,7 +365,7 @@ static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
{
*stag = T4_STAG_UNSET;
return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0,
0UL, 0, 0, pbl_size, pbl_addr);
0UL, 0, 0, pbl_size, pbl_addr, NULL);
}
static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
@ -383,14 +392,16 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
mhp->attr.mw_bind_enable, mhp->attr.zbva,
mhp->attr.va_fbo, mhp->attr.len ?
mhp->attr.len : -1, shift - 12,
mhp->attr.pbl_size, mhp->attr.pbl_addr);
mhp->attr.pbl_size, mhp->attr.pbl_addr, NULL);
if (ret)
return ret;
ret = finish_mem_reg(mhp, stag);
if (ret)
if (ret) {
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr);
mhp->attr.pbl_addr, mhp->dereg_skb);
mhp->dereg_skb = NULL;
}
return ret;
}
@ -423,6 +434,12 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
if (!mhp)
return ERR_PTR(-ENOMEM);
mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
if (!mhp->dereg_skb) {
ret = -ENOMEM;
goto err0;
}
mhp->rhp = rhp;
mhp->attr.pdid = php->pdid;
mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
@ -435,7 +452,8 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid,
FW_RI_STAG_NSMR, mhp->attr.perms,
mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0);
mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0,
NULL);
if (ret)
goto err1;
@ -445,8 +463,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
return &mhp->ibmr;
err2:
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr);
mhp->attr.pbl_addr, mhp->dereg_skb);
err1:
kfree_skb(mhp->dereg_skb);
err0:
kfree(mhp);
return ERR_PTR(ret);
}
@ -481,11 +501,18 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mhp)
return ERR_PTR(-ENOMEM);
mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
if (!mhp->dereg_skb) {
kfree(mhp);
return ERR_PTR(-ENOMEM);
}
mhp->rhp = rhp;
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
kfree_skb(mhp->dereg_skb);
kfree(mhp);
return ERR_PTR(err);
}
@ -550,6 +577,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
err:
ib_umem_release(mhp->umem);
kfree_skb(mhp->dereg_skb);
kfree(mhp);
return ERR_PTR(err);
}
@ -572,11 +600,16 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
return ERR_PTR(-ENOMEM);
ret = allocate_window(&rhp->rdev, &stag, php->pdid);
if (ret) {
kfree(mhp);
return ERR_PTR(ret);
mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
if (!mhp->dereg_skb) {
ret = -ENOMEM;
goto free_mhp;
}
ret = allocate_window(&rhp->rdev, &stag, php->pdid);
if (ret)
goto free_skb;
mhp->rhp = rhp;
mhp->attr.pdid = php->pdid;
mhp->attr.type = FW_RI_STAG_MW;
@ -584,12 +617,19 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
mmid = (stag) >> 8;
mhp->ibmw.rkey = stag;
if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
deallocate_window(&rhp->rdev, mhp->attr.stag);
kfree(mhp);
return ERR_PTR(-ENOMEM);
ret = -ENOMEM;
goto dealloc_win;
}
PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmw);
dealloc_win:
deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb);
free_skb:
kfree_skb(mhp->dereg_skb);
free_mhp:
kfree(mhp);
return ERR_PTR(ret);
}
int c4iw_dealloc_mw(struct ib_mw *mw)
@ -602,7 +642,8 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
rhp = mhp->rhp;
mmid = (mw->rkey) >> 8;
remove_handle(rhp, &rhp->mmidr, mmid);
deallocate_window(&rhp->rdev, mhp->attr.stag);
deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb);
kfree_skb(mhp->dereg_skb);
kfree(mhp);
PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
return 0;
@ -666,7 +707,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
return &(mhp->ibmr);
err3:
dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr);
mhp->attr.pbl_addr, mhp->dereg_skb);
err2:
c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
mhp->attr.pbl_size << 3);
@ -717,7 +758,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr);
mhp->attr.pbl_addr, mhp->dereg_skb);
if (mhp->attr.pbl_size)
c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
mhp->attr.pbl_size << 3);

View file

@ -409,20 +409,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
}
static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
PDBG("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%u.%u.%u.%u\n",
FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers),
FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers),
FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
}
static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
char *buf)
{
@ -502,13 +488,11 @@ static int c4iw_get_mib(struct ib_device *ibdev,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *c4iw_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id,
};
@ -530,6 +514,20 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static void get_dev_fw_str(struct ib_device *dev, char *str,
size_t str_len)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev);
PDBG("%s dev 0x%p\n", __func__, dev);
snprintf(str, str_len, "%u.%u.%u.%u",
FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers),
FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers),
FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
}
int c4iw_register_device(struct c4iw_dev *dev)
{
int ret;
@ -605,6 +603,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.get_hw_stats = c4iw_get_mib;
dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
dev->ibdev.get_port_immutable = c4iw_port_immutable;
dev->ibdev.get_dev_fw_str = get_dev_fw_str;
dev->ibdev.drain_sq = c4iw_drain_sq;
dev->ibdev.drain_rq = c4iw_drain_rq;

View file

@ -683,17 +683,25 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr,
return 0;
}
void _free_qp(struct kref *kref)
{
struct c4iw_qp *qhp;
qhp = container_of(kref, struct c4iw_qp, kref);
PDBG("%s qhp %p\n", __func__, qhp);
kfree(qhp);
}
void c4iw_qp_add_ref(struct ib_qp *qp)
{
PDBG("%s ib_qp %p\n", __func__, qp);
atomic_inc(&(to_c4iw_qp(qp)->refcnt));
kref_get(&to_c4iw_qp(qp)->kref);
}
void c4iw_qp_rem_ref(struct ib_qp *qp)
{
PDBG("%s ib_qp %p\n", __func__, qp);
if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt)))
wake_up(&(to_c4iw_qp(qp)->wait));
kref_put(&to_c4iw_qp(qp)->kref, _free_qp);
}
static void add_to_fc_list(struct list_head *head, struct list_head *entry)
@ -1081,9 +1089,10 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
qhp->ep->hwtid);
skb = alloc_skb(sizeof *wqe, gfp);
if (!skb)
skb = skb_dequeue(&qhp->ep->com.ep_skb_list);
if (WARN_ON(!skb))
return;
set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
@ -1202,9 +1211,10 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
ep->hwtid);
skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
if (!skb)
skb = skb_dequeue(&ep->com.ep_skb_list);
if (WARN_ON(!skb))
return -ENOMEM;
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
@ -1592,8 +1602,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
wait_event(qhp->wait, !qhp->ep);
remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
atomic_dec(&qhp->refcnt);
wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
spin_lock_irq(&rhp->lock);
if (!list_empty(&qhp->db_fc_entry))
@ -1606,8 +1614,9 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
c4iw_qp_rem_ref(ib_qp);
PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
kfree(qhp);
return 0;
}
@ -1704,7 +1713,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
init_completion(&qhp->rq_drained);
mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait);
atomic_set(&qhp->refcnt, 1);
kref_init(&qhp->kref);
ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
if (ret)
@ -1896,12 +1905,20 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
return 0;
}
static void move_qp_to_err(struct c4iw_qp *qp)
{
struct c4iw_qp_attributes attrs = { .next_state = C4IW_QP_STATE_ERROR };
(void)c4iw_modify_qp(qp->rhp, qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
}
void c4iw_drain_sq(struct ib_qp *ibqp)
{
struct c4iw_qp *qp = to_c4iw_qp(ibqp);
unsigned long flag;
bool need_to_wait;
move_qp_to_err(qp);
spin_lock_irqsave(&qp->lock, flag);
need_to_wait = !t4_sq_empty(&qp->wq);
spin_unlock_irqrestore(&qp->lock, flag);
@ -1916,6 +1933,7 @@ void c4iw_drain_rq(struct ib_qp *ibqp)
unsigned long flag;
bool need_to_wait;
move_qp_to_err(qp);
spin_lock_irqsave(&qp->lock, flag);
need_to_wait = !t4_rq_empty(&qp->wq);
spin_unlock_irqrestore(&qp->lock, flag);

View file

@ -3,7 +3,6 @@ config INFINIBAND_HFI1
depends on X86_64 && INFINIBAND_RDMAVT
select MMU_NOTIFIER
select CRC32
default m
---help---
This is a low-level driver for Intel OPA Gen1 adapter.
config HFI1_DEBUG_SDMA_ORDER

View file

@ -228,7 +228,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(struct hfi1_base_info));
break;
case HFI1_IOCTL_CREDIT_UPD:
if (uctxt && uctxt->sc)
if (uctxt)
sc_return_credits(uctxt->sc);
break;

View file

@ -1174,6 +1174,8 @@ struct hfi1_devdata {
/* 8051 firmware version helper */
#define dc8051_ver(a, b) ((a) << 8 | (b))
#define dc8051_ver_maj(a) ((a & 0xff00) >> 8)
#define dc8051_ver_min(a) (a & 0x00ff)
/* f_put_tid types */
#define PT_EXPECTED 0

View file

@ -1291,9 +1291,12 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
{
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
u16 ver = dd->dc8051_ver;
memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) |
(u64)dc8051_ver_min(ver);
rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
@ -1567,6 +1570,17 @@ static void init_ibport(struct hfi1_pportdata *ppd)
RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
}
static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
size_t str_len)
{
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
struct hfi1_ibdev *dev = dev_from_rdi(rdi);
u16 ver = dd_from_dev(dev)->dc8051_ver;
snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver),
dc8051_ver_min(ver));
}
/**
* hfi1_register_ib_device - register our device with the infiniband core
* @dd: the device data structure
@ -1613,6 +1627,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
/* keep process mad in the driver */
ibdev->process_mad = hfi1_process_mad;
ibdev->get_dev_fw_str = hfi1_get_dev_fw_str;
strncpy(ibdev->node_desc, init_utsname()->nodename,
sizeof(ibdev->node_desc));

View file

@ -1567,12 +1567,12 @@ static enum i40iw_status_code i40iw_del_multiple_qhash(
ret = i40iw_manage_qhash(iwdev, cm_info,
I40IW_QHASH_TYPE_TCP_SYN,
I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false);
kfree(child_listen_node);
cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
i40iw_debug(&iwdev->sc_dev,
I40IW_DEBUG_CM,
"freed pointer = %p\n",
child_listen_node);
kfree(child_listen_node);
cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
}
spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);

View file

@ -1557,6 +1557,9 @@ enum i40iw_alignment {
#define I40IW_RING_MOVE_TAIL(_ring) \
(_ring).tail = ((_ring).tail + 1) % (_ring).size
#define I40IW_RING_MOVE_HEAD_NOCHECK(_ring) \
(_ring).head = ((_ring).head + 1) % (_ring).size
#define I40IW_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
(_ring).tail = ((_ring).tail + (_count)) % (_ring).size

View file

@ -1025,6 +1025,8 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq,
u16 txoffset, bufoffset;
buf = i40iw_puda_get_listbuf(pbufl);
if (!buf)
return;
nextseqnum = buf->seqnum + fpdu_len;
txbuf->totallen = buf->hdrlen + fpdu_len;
txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen;
@ -1048,6 +1050,8 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq,
fpdu_len -= buf->datalen;
i40iw_puda_ret_bufpool(ieq, buf);
buf = i40iw_puda_get_listbuf(pbufl);
if (!buf)
return;
bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
} while (1);

View file

@ -667,7 +667,7 @@ struct i40iw_tcp_offload_info {
bool time_stamp;
u8 cwnd_inc_limit;
bool drop_ooo_seg;
bool dup_ack_thresh;
u8 dup_ack_thresh;
u8 ttl;
u8 src_mac_addr_idx;
bool avoid_stretch_ack;

View file

@ -291,9 +291,9 @@ static enum i40iw_status_code i40iw_rdma_write(struct i40iw_qp_uk *qp,
i40iw_set_fragment(wqe, 0, op_info->lo_sg_list);
for (i = 1; i < op_info->num_lo_sges; i++) {
byte_off = 32 + (i - 1) * 16;
for (i = 1, byte_off = 32; i < op_info->num_lo_sges; i++) {
i40iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i]);
byte_off += 16;
}
wmb(); /* make sure WQE is populated before valid bit is set */
@ -401,9 +401,9 @@ static enum i40iw_status_code i40iw_send(struct i40iw_qp_uk *qp,
i40iw_set_fragment(wqe, 0, op_info->sg_list);
for (i = 1; i < op_info->num_sges; i++) {
byte_off = 32 + (i - 1) * 16;
for (i = 1, byte_off = 32; i < op_info->num_sges; i++) {
i40iw_set_fragment(wqe, byte_off, &op_info->sg_list[i]);
byte_off += 16;
}
wmb(); /* make sure WQE is populated before valid bit is set */
@ -685,9 +685,9 @@ static enum i40iw_status_code i40iw_post_receive(struct i40iw_qp_uk *qp,
i40iw_set_fragment(wqe, 0, info->sg_list);
for (i = 1; i < info->num_sges; i++) {
byte_off = 32 + (i - 1) * 16;
for (i = 1, byte_off = 32; i < info->num_sges; i++) {
i40iw_set_fragment(wqe, byte_off, &info->sg_list[i]);
byte_off += 16;
}
wmb(); /* make sure WQE is populated before valid bit is set */
@ -753,8 +753,7 @@ static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq,
* @post_cq: update cq tail
*/
static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
struct i40iw_cq_poll_info *info,
bool post_cq)
struct i40iw_cq_poll_info *info)
{
u64 comp_ctx, qword0, qword2, qword3, wqe_qword;
u64 *cqe, *sw_wqe;
@ -762,7 +761,6 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
struct i40iw_ring *pring = NULL;
u32 wqe_idx, q_type, array_idx = 0;
enum i40iw_status_code ret_code = 0;
enum i40iw_status_code ret_code2 = 0;
bool move_cq_head = true;
u8 polarity;
u8 addl_wqes = 0;
@ -870,19 +868,14 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
move_cq_head = false;
if (move_cq_head) {
I40IW_RING_MOVE_HEAD(cq->cq_ring, ret_code2);
if (ret_code2 && !ret_code)
ret_code = ret_code2;
I40IW_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
if (I40IW_RING_GETCURRENT_HEAD(cq->cq_ring) == 0)
cq->polarity ^= 1;
if (post_cq) {
I40IW_RING_MOVE_TAIL(cq->cq_ring);
set_64bit_val(cq->shadow_area, 0,
I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
}
I40IW_RING_MOVE_TAIL(cq->cq_ring);
set_64bit_val(cq->shadow_area, 0,
I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
} else {
if (info->is_srq)
return ret_code;

View file

@ -327,7 +327,7 @@ struct i40iw_cq_ops {
void (*iw_cq_request_notification)(struct i40iw_cq_uk *,
enum i40iw_completion_notify);
enum i40iw_status_code (*iw_cq_poll_completion)(struct i40iw_cq_uk *,
struct i40iw_cq_poll_info *, bool);
struct i40iw_cq_poll_info *);
enum i40iw_status_code (*iw_cq_post_entries)(struct i40iw_cq_uk *, u8 count);
void (*iw_cq_clean)(void *, struct i40iw_cq_uk *);
};

View file

@ -529,7 +529,7 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, 0, &rqshift);
if (status)
return -ENOSYS;
return -ENOMEM;
sqdepth = sq_size << sqshift;
rqdepth = rq_size << rqshift;
@ -671,7 +671,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
if (init_attr->qp_type != IB_QPT_RC) {
err_code = -ENOSYS;
err_code = -EINVAL;
goto error;
}
if (iwdev->push_mode)
@ -1840,6 +1840,7 @@ struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *pd,
iwmr->ibmr.lkey = stag;
iwmr->page_cnt = 1;
iwmr->pgaddrmem[0] = addr;
iwmr->length = size;
status = i40iw_hwreg_mr(iwdev, iwmr, access);
if (status) {
i40iw_free_stag(iwdev, stag);
@ -1863,7 +1864,7 @@ static struct ib_mr *i40iw_get_dma_mr(struct ib_pd *pd, int acc)
{
u64 kva = 0;
return i40iw_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
return i40iw_reg_phys_mr(pd, 0, 0, acc, &kva);
}
/**
@ -1974,18 +1975,6 @@ static ssize_t i40iw_show_rev(struct device *dev,
return sprintf(buf, "%x\n", hw_rev);
}
/**
* i40iw_show_fw_ver
*/
static ssize_t i40iw_show_fw_ver(struct device *dev,
struct device_attribute *attr, char *buf)
{
u32 firmware_version = I40IW_FW_VERSION;
return sprintf(buf, "%u.%u\n", firmware_version,
(firmware_version & 0x000000ff));
}
/**
* i40iw_show_hca
*/
@ -2006,13 +1995,11 @@ static ssize_t i40iw_show_board(struct device *dev,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, i40iw_show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL);
static struct device_attribute *i40iw_dev_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
@ -2091,8 +2078,12 @@ static int i40iw_post_send(struct ib_qp *ibqp,
ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
}
if (ret)
err = -EIO;
if (ret) {
if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
err = -ENOMEM;
else
err = -EINVAL;
}
break;
case IB_WR_RDMA_WRITE:
info.op_type = I40IW_OP_TYPE_RDMA_WRITE;
@ -2113,8 +2104,12 @@ static int i40iw_post_send(struct ib_qp *ibqp,
ret = ukqp->ops.iw_rdma_write(ukqp, &info, false);
}
if (ret)
err = -EIO;
if (ret) {
if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
err = -ENOMEM;
else
err = -EINVAL;
}
break;
case IB_WR_RDMA_READ_WITH_INV:
inv_stag = true;
@ -2132,15 +2127,19 @@ static int i40iw_post_send(struct ib_qp *ibqp,
info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false);
if (ret)
err = -EIO;
if (ret) {
if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
err = -ENOMEM;
else
err = -EINVAL;
}
break;
case IB_WR_LOCAL_INV:
info.op_type = I40IW_OP_TYPE_INV_STAG;
info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true);
if (ret)
err = -EIO;
err = -ENOMEM;
break;
case IB_WR_REG_MR:
{
@ -2174,7 +2173,7 @@ static int i40iw_post_send(struct ib_qp *ibqp,
ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true);
if (ret)
err = -EIO;
err = -ENOMEM;
break;
}
default:
@ -2214,6 +2213,7 @@ static int i40iw_post_recv(struct ib_qp *ibqp,
struct i40iw_sge sg_list[I40IW_MAX_WQ_FRAGMENT_COUNT];
enum i40iw_status_code ret = 0;
unsigned long flags;
int err = 0;
iwqp = (struct i40iw_qp *)ibqp;
ukqp = &iwqp->sc_qp.qp_uk;
@ -2228,6 +2228,10 @@ static int i40iw_post_recv(struct ib_qp *ibqp,
ret = ukqp->ops.iw_post_receive(ukqp, &post_recv);
if (ret) {
i40iw_pr_err(" post_recv err %d\n", ret);
if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
err = -ENOMEM;
else
err = -EINVAL;
*bad_wr = ib_wr;
goto out;
}
@ -2235,9 +2239,7 @@ static int i40iw_post_recv(struct ib_qp *ibqp,
}
out:
spin_unlock_irqrestore(&iwqp->lock, flags);
if (ret)
return -ENOSYS;
return 0;
return err;
}
/**
@ -2264,7 +2266,7 @@ static int i40iw_poll_cq(struct ib_cq *ibcq,
spin_lock_irqsave(&iwcq->lock, flags);
while (cqe_count < num_entries) {
ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true);
ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info);
if (ret == I40IW_ERR_QUEUE_EMPTY) {
break;
} else if (ret == I40IW_ERR_QUEUE_DESTROYED) {
@ -2437,6 +2439,15 @@ static const char * const i40iw_hw_stat_names[] = {
"iwRdmaInv"
};
static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str,
size_t str_len)
{
u32 firmware_version = I40IW_FW_VERSION;
snprintf(str, str_len, "%u.%u", firmware_version,
(firmware_version & 0x000000ff));
}
/**
* i40iw_alloc_hw_stats - Allocate a hw stats structure
* @ibdev: device pointer from stack
@ -2528,7 +2539,7 @@ static int i40iw_modify_port(struct ib_device *ibdev,
int port_modify_mask,
struct ib_port_modify *props)
{
return 0;
return -ENOSYS;
}
/**
@ -2660,6 +2671,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name,
sizeof(iwibdev->ibdev.iwcm->ifname));
iwibdev->ibdev.get_port_immutable = i40iw_port_immutable;
iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str;
iwibdev->ibdev.poll_cq = i40iw_poll_cq;
iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
iwibdev->ibdev.post_send = i40iw_post_send;
@ -2723,7 +2735,7 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
if (!iwdev->iwibdev)
return -ENOSYS;
return -ENOMEM;
iwibdev = iwdev->iwibdev;
ret = ib_register_device(&iwibdev->ibdev, NULL);
@ -2748,5 +2760,5 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
kfree(iwdev->iwibdev->ibdev.iwcm);
iwdev->iwibdev->ibdev.iwcm = NULL;
ib_dealloc_device(&iwdev->iwibdev->ibdev);
return -ENOSYS;
return ret;
}

View file

@ -288,7 +288,7 @@ static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
if (cq->resize_buf)
return -EBUSY;
cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL);
if (!cq->resize_buf)
return -ENOMEM;
@ -316,7 +316,7 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
return -EFAULT;
cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL);
if (!cq->resize_buf)
return -ENOMEM;

View file

@ -2025,16 +2025,6 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32),
(int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
(int) dev->dev->caps.fw_ver & 0xffff);
}
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
@ -2053,17 +2043,204 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
struct diag_counter {
const char *name;
u32 offset;
};
#define DIAG_COUNTER(_name, _offset) \
{ .name = #_name, .offset = _offset }
static const struct diag_counter diag_basic[] = {
DIAG_COUNTER(rq_num_lle, 0x00),
DIAG_COUNTER(sq_num_lle, 0x04),
DIAG_COUNTER(rq_num_lqpoe, 0x08),
DIAG_COUNTER(sq_num_lqpoe, 0x0C),
DIAG_COUNTER(rq_num_lpe, 0x18),
DIAG_COUNTER(sq_num_lpe, 0x1C),
DIAG_COUNTER(rq_num_wrfe, 0x20),
DIAG_COUNTER(sq_num_wrfe, 0x24),
DIAG_COUNTER(sq_num_mwbe, 0x2C),
DIAG_COUNTER(sq_num_bre, 0x34),
DIAG_COUNTER(sq_num_rire, 0x44),
DIAG_COUNTER(rq_num_rire, 0x48),
DIAG_COUNTER(sq_num_rae, 0x4C),
DIAG_COUNTER(rq_num_rae, 0x50),
DIAG_COUNTER(sq_num_roe, 0x54),
DIAG_COUNTER(sq_num_tree, 0x5C),
DIAG_COUNTER(sq_num_rree, 0x64),
DIAG_COUNTER(rq_num_rnr, 0x68),
DIAG_COUNTER(sq_num_rnr, 0x6C),
DIAG_COUNTER(rq_num_oos, 0x100),
DIAG_COUNTER(sq_num_oos, 0x104),
};
static const struct diag_counter diag_ext[] = {
DIAG_COUNTER(rq_num_dup, 0x130),
DIAG_COUNTER(sq_num_to, 0x134),
};
static const struct diag_counter diag_device_only[] = {
DIAG_COUNTER(num_cqovf, 0x1A0),
DIAG_COUNTER(rq_num_udsdprd, 0x118),
};
static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,
u8 port_num)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_diag_counters *diag = dev->diag_counters;
if (!diag[!!port_num].name)
return NULL;
return rdma_alloc_hw_stats_struct(diag[!!port_num].name,
diag[!!port_num].num_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u8 port, int index)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_diag_counters *diag = dev->diag_counters;
u32 hw_value[ARRAY_SIZE(diag_device_only) +
ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};
int ret;
int i;
ret = mlx4_query_diag_counters(dev->dev,
MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,
diag[!!port].offset, hw_value,
diag[!!port].num_counters, port);
if (ret)
return ret;
for (i = 0; i < diag[!!port].num_counters; i++)
stats->value[i] = hw_value[i];
return diag[!!port].num_counters;
}
static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
const char ***name,
u32 **offset,
u32 *num,
bool port)
{
u32 num_counters;
num_counters = ARRAY_SIZE(diag_basic);
if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)
num_counters += ARRAY_SIZE(diag_ext);
if (!port)
num_counters += ARRAY_SIZE(diag_device_only);
*name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);
if (!*name)
return -ENOMEM;
*offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
if (!*offset)
goto err_name;
*num = num_counters;
return 0;
err_name:
kfree(*name);
return -ENOMEM;
}
static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
const char **name,
u32 *offset,
bool port)
{
int i;
int j;
for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
name[i] = diag_basic[i].name;
offset[i] = diag_basic[i].offset;
}
if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
name[j] = diag_ext[i].name;
offset[j] = diag_ext[i].offset;
}
}
if (!port) {
for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
name[j] = diag_device_only[i].name;
offset[j] = diag_device_only[i].offset;
}
}
}
static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
{
struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
int i;
int ret;
bool per_port = !!(ibdev->dev->caps.flags2 &
MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
/* i == 1 means we are building port counters */
if (i && !per_port)
continue;
ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
&diag[i].offset,
&diag[i].num_counters, i);
if (ret)
goto err_alloc;
mlx4_ib_fill_diag_counters(ibdev, diag[i].name,
diag[i].offset, i);
}
ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats;
ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats;
return 0;
err_alloc:
if (i) {
kfree(diag[i - 1].name);
kfree(diag[i - 1].offset);
}
return ret;
}
static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
{
int i;
for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
kfree(ibdev->diag_counters[i].offset);
kfree(ibdev->diag_counters[i].name);
}
}
#define MLX4_IB_INVALID_MAC ((u64)-1)
static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
struct net_device *dev,
@ -2280,6 +2457,17 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static void get_fw_ver_str(struct ib_device *device, char *str,
size_t str_len)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev);
snprintf(str, str_len, "%d.%d.%d",
(int) (dev->dev->caps.fw_ver >> 32),
(int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
(int) dev->dev->caps.fw_ver & 0xffff);
}
static void *mlx4_ib_add(struct mlx4_dev *dev)
{
struct mlx4_ib_dev *ibdev;
@ -2413,6 +2601,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str;
ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
if (!mlx4_is_slave(ibdev->dev)) {
@ -2555,9 +2744,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
if (ib_register_device(&ibdev->ib_dev, NULL))
if (mlx4_ib_alloc_diag_counters(ibdev))
goto err_steer_free_bitmap;
if (ib_register_device(&ibdev->ib_dev, NULL))
goto err_diag_counters;
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
@ -2623,6 +2815,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
err_reg:
ib_unregister_device(&ibdev->ib_dev);
err_diag_counters:
mlx4_ib_diag_cleanup(ibdev);
err_steer_free_bitmap:
kfree(ibdev->ib_uc_qpns_bitmap);
@ -2726,6 +2921,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
mlx4_ib_close_sriov(ibdev);
mlx4_ib_mad_cleanup(ibdev);
ib_unregister_device(&ibdev->ib_dev);
mlx4_ib_diag_cleanup(ibdev);
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");

View file

@ -549,6 +549,14 @@ struct mlx4_ib_counters {
u32 default_counter;
};
#define MLX4_DIAG_COUNTERS_TYPES 2
struct mlx4_ib_diag_counters {
const char **name;
u32 *offset;
u32 num_counters;
};
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
@ -585,6 +593,7 @@ struct mlx4_ib_dev {
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES];
};
struct ib_event_work {

View file

@ -424,6 +424,83 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
item->key = be32_to_cpu(cqe->mkey);
}
static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries,
struct ib_wc *wc, int *npolled)
{
struct mlx5_ib_wq *wq;
unsigned int cur;
unsigned int idx;
int np;
int i;
wq = &qp->sq;
cur = wq->head - wq->tail;
np = *npolled;
if (cur == 0)
return;
for (i = 0; i < cur && np < num_entries; i++) {
idx = wq->last_poll & (wq->wqe_cnt - 1);
wc->wr_id = wq->wrid[idx];
wc->status = IB_WC_WR_FLUSH_ERR;
wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
wq->tail++;
np++;
wc->qp = &qp->ibqp;
wc++;
wq->last_poll = wq->w_list[idx].next;
}
*npolled = np;
}
static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries,
struct ib_wc *wc, int *npolled)
{
struct mlx5_ib_wq *wq;
unsigned int cur;
int np;
int i;
wq = &qp->rq;
cur = wq->head - wq->tail;
np = *npolled;
if (cur == 0)
return;
for (i = 0; i < cur && np < num_entries; i++) {
wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
wc->status = IB_WC_WR_FLUSH_ERR;
wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
wq->tail++;
np++;
wc->qp = &qp->ibqp;
wc++;
}
*npolled = np;
}
static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
struct ib_wc *wc, int *npolled)
{
struct mlx5_ib_qp *qp;
*npolled = 0;
/* Find uncompleted WQEs belonging to that cq and retrun mmics ones */
list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
sw_send_comp(qp, num_entries, wc + *npolled, npolled);
if (*npolled >= num_entries)
return;
}
list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
sw_recv_comp(qp, num_entries, wc + *npolled, npolled);
if (*npolled >= num_entries)
return;
}
}
static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_ib_qp **cur_qp,
struct ib_wc *wc)
@ -594,12 +671,18 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
{
struct mlx5_ib_cq *cq = to_mcq(ibcq);
struct mlx5_ib_qp *cur_qp = NULL;
struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
struct mlx5_core_dev *mdev = dev->mdev;
unsigned long flags;
int soft_polled = 0;
int npolled;
int err = 0;
spin_lock_irqsave(&cq->lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled);
goto out;
}
if (unlikely(!list_empty(&cq->wc_list)))
soft_polled = poll_soft_wc(cq, num_entries, wc);
@ -612,7 +695,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
if (npolled)
mlx5_cq_set_ci(&cq->mcq);
out:
spin_unlock_irqrestore(&cq->lock, flags);
if (err == 0 || err == -EAGAIN)
@ -843,6 +926,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
cq->resize_buf = NULL;
cq->resize_umem = NULL;
cq->create_flags = attr->flags;
INIT_LIST_HEAD(&cq->list_send_qp);
INIT_LIST_HEAD(&cq->list_recv_qp);
if (context) {
err = create_cq_user(dev, udata, context, cq, entries,

View file

@ -69,15 +69,6 @@ static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
}
static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index)
{
return ++index % gsi->cap.max_send_wr;
}
#define for_each_outstanding_wr(gsi, index) \
for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \
index = next_outstanding(gsi, index))
/* Call with gsi->lock locked */
static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
{
@ -85,8 +76,9 @@ static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
struct mlx5_ib_gsi_wr *wr;
u32 index;
for_each_outstanding_wr(gsi, index) {
wr = &gsi->outstanding_wrs[index];
for (index = gsi->outstanding_ci; index != gsi->outstanding_pi;
index++) {
wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr];
if (!wr->completed)
break;
@ -430,8 +422,9 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
return -ENOMEM;
}
gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi];
gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi);
gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi %
gsi->cap.max_send_wr];
gsi->outstanding_pi++;
if (!wc) {
memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));

View file

@ -42,11 +42,13 @@
#include <asm/pat.h>
#endif
#include <linux/sched.h>
#include <linux/delay.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <linux/in.h>
@ -457,8 +459,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
int max_rq_sg;
int max_sq_sg;
u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
struct mlx5_ib_query_device_resp resp = {};
size_t resp_len;
u64 max_tso;
if (uhw->inlen || uhw->outlen)
resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
if (uhw->outlen && uhw->outlen < resp_len)
return -EINVAL;
else
resp.response_length = resp_len;
if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
return -EINVAL;
memset(props, 0, sizeof(*props));
@ -511,10 +522,21 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
(MLX5_CAP_ETH(dev->mdev, csum_cap)))
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
if (MLX5_CAP_ETH(mdev, csum_cap))
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
if (max_tso) {
resp.tso_caps.max_tso = 1 << max_tso;
resp.tso_caps.supported_qpts |=
1 << IB_QPT_RAW_PACKET;
resp.response_length += sizeof(resp.tso_caps);
}
}
}
if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
props->device_cap_flags |= IB_DEVICE_UD_TSO;
@ -576,6 +598,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (!mlx5_core_is_pf(mdev))
props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
if (err)
return err;
}
return 0;
}
@ -983,6 +1012,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
goto out_uars;
}
INIT_LIST_HEAD(&context->vma_private_list);
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
@ -992,6 +1022,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (field_avail(typeof(resp), cqe_version, udata->outlen))
resp.response_length += sizeof(resp.cqe_version);
if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE;
resp.response_length += sizeof(resp.cmds_supp_uhw);
}
/*
* We don't want to expose information from the PCI bar that is located
* after 4096 bytes, so if the arch only supports larger pages, let's
@ -1006,8 +1041,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
offsetof(struct mlx5_init_seg, internal_timer_h) %
PAGE_SIZE;
resp.response_length += sizeof(resp.hca_core_clock_offset) +
sizeof(resp.reserved2) +
sizeof(resp.reserved3);
sizeof(resp.reserved2);
}
err = ib_copy_to_udata(udata, &resp, resp.response_length);
@ -1086,6 +1120,125 @@ static int get_index(unsigned long offset)
return get_arg(offset);
}
static void mlx5_ib_vma_open(struct vm_area_struct *area)
{
/* vma_open is called when a new VMA is created on top of our VMA. This
* is done through either mremap flow or split_vma (usually due to
* mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
* as this VMA is strongly hardware related. Therefore we set the
* vm_ops of the newly created/cloned VMA to NULL, to prevent it from
* calling us again and trying to do incorrect actions. We assume that
* the original VMA size is exactly a single page, and therefore all
* "splitting" operation will not happen to it.
*/
area->vm_ops = NULL;
}
static void mlx5_ib_vma_close(struct vm_area_struct *area)
{
struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
/* It's guaranteed that all VMAs opened on a FD are closed before the
* file itself is closed, therefore no sync is needed with the regular
* closing flow. (e.g. mlx5 ib_dealloc_ucontext)
* However need a sync with accessing the vma as part of
* mlx5_ib_disassociate_ucontext.
* The close operation is usually called under mm->mmap_sem except when
* process is exiting.
* The exiting case is handled explicitly as part of
* mlx5_ib_disassociate_ucontext.
*/
mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
/* setting the vma context pointer to null in the mlx5_ib driver's
* private data, to protect a race condition in
* mlx5_ib_disassociate_ucontext().
*/
mlx5_ib_vma_priv_data->vma = NULL;
list_del(&mlx5_ib_vma_priv_data->list);
kfree(mlx5_ib_vma_priv_data);
}
static const struct vm_operations_struct mlx5_ib_vm_ops = {
.open = mlx5_ib_vma_open,
.close = mlx5_ib_vma_close
};
static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
struct mlx5_ib_ucontext *ctx)
{
struct mlx5_ib_vma_private_data *vma_prv;
struct list_head *vma_head = &ctx->vma_private_list;
vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
if (!vma_prv)
return -ENOMEM;
vma_prv->vma = vma;
vma->vm_private_data = vma_prv;
vma->vm_ops = &mlx5_ib_vm_ops;
list_add(&vma_prv->list, vma_head);
return 0;
}
static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
int ret;
struct vm_area_struct *vma;
struct mlx5_ib_vma_private_data *vma_private, *n;
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
if (!owning_process)
return;
owning_mm = get_task_mm(owning_process);
if (!owning_mm) {
pr_info("no mm, disassociate ucontext is pending task termination\n");
while (1) {
put_task_struct(owning_process);
usleep_range(1000, 2000);
owning_process = get_pid_task(ibcontext->tgid,
PIDTYPE_PID);
if (!owning_process ||
owning_process->state == TASK_DEAD) {
pr_info("disassociate ucontext done, task was terminated\n");
/* in case task was dead need to release the
* task struct.
*/
if (owning_process)
put_task_struct(owning_process);
return;
}
}
}
/* need to protect from a race on closing the vma as part of
* mlx5_ib_vma_close.
*/
down_read(&owning_mm->mmap_sem);
list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
list) {
vma = vma_private->vma;
ret = zap_vma_ptes(vma, vma->vm_start,
PAGE_SIZE);
WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__);
/* context going to be destroyed, should
* not access ops any more.
*/
vma->vm_ops = NULL;
list_del(&vma_private->list);
kfree(vma_private);
}
up_read(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
}
static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
{
switch (cmd) {
@ -1101,8 +1254,10 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
}
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
struct vm_area_struct *vma, struct mlx5_uuar_info *uuari)
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
{
struct mlx5_uuar_info *uuari = &context->uuari;
int err;
unsigned long idx;
phys_addr_t pfn, pa;
@ -1152,14 +1307,13 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
vma->vm_start, &pa);
return 0;
return mlx5_ib_set_vma_data(vma, context);
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_uuar_info *uuari = &context->uuari;
unsigned long command;
phys_addr_t pfn;
@ -1168,7 +1322,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
case MLX5_IB_MMAP_WC_PAGE:
case MLX5_IB_MMAP_NC_PAGE:
case MLX5_IB_MMAP_REGULAR_PAGE:
return uar_mmap(dev, command, vma, uuari);
return uar_mmap(dev, command, vma, context);
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
return -ENOSYS;
@ -1331,6 +1485,32 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
&ib_spec->ipv4.val.dst_ip,
sizeof(ib_spec->ipv4.val.dst_ip));
break;
case IB_FLOW_SPEC_IPV6:
if (ib_spec->size != sizeof(ib_spec->ipv6))
return -EINVAL;
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
ethertype, 0xffff);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
ethertype, ETH_P_IPV6);
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.mask.src_ip,
sizeof(ib_spec->ipv6.mask.src_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.val.src_ip,
sizeof(ib_spec->ipv6.val.src_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.mask.dst_ip,
sizeof(ib_spec->ipv6.mask.dst_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.val.dst_ip,
sizeof(ib_spec->ipv6.val.dst_ip));
break;
case IB_FLOW_SPEC_TCP:
if (ib_spec->size != sizeof(ib_spec->tcp_udp))
return -EINVAL;
@ -1801,15 +1981,6 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
}
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
}
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
@ -1828,7 +1999,6 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
@ -1836,7 +2006,6 @@ static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
static struct device_attribute *mlx5_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id,
&dev_attr_fw_pages,
@ -1854,6 +2023,65 @@ static void pkey_change_handler(struct work_struct *work)
mutex_unlock(&ports->devr->mutex);
}
static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
{
struct mlx5_ib_qp *mqp;
struct mlx5_ib_cq *send_mcq, *recv_mcq;
struct mlx5_core_cq *mcq;
struct list_head cq_armed_list;
unsigned long flags_qp;
unsigned long flags_cq;
unsigned long flags;
INIT_LIST_HEAD(&cq_armed_list);
/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
spin_lock_irqsave(&mqp->sq.lock, flags_qp);
if (mqp->sq.tail != mqp->sq.head) {
send_mcq = to_mcq(mqp->ibqp.send_cq);
spin_lock_irqsave(&send_mcq->lock, flags_cq);
if (send_mcq->mcq.comp &&
mqp->ibqp.send_cq->comp_handler) {
if (!send_mcq->mcq.reset_notify_added) {
send_mcq->mcq.reset_notify_added = 1;
list_add_tail(&send_mcq->mcq.reset_notify,
&cq_armed_list);
}
}
spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
}
spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
spin_lock_irqsave(&mqp->rq.lock, flags_qp);
/* no handling is needed for SRQ */
if (!mqp->ibqp.srq) {
if (mqp->rq.tail != mqp->rq.head) {
recv_mcq = to_mcq(mqp->ibqp.recv_cq);
spin_lock_irqsave(&recv_mcq->lock, flags_cq);
if (recv_mcq->mcq.comp &&
mqp->ibqp.recv_cq->comp_handler) {
if (!recv_mcq->mcq.reset_notify_added) {
recv_mcq->mcq.reset_notify_added = 1;
list_add_tail(&recv_mcq->mcq.reset_notify,
&cq_armed_list);
}
}
spin_unlock_irqrestore(&recv_mcq->lock,
flags_cq);
}
}
spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
}
/*At that point all inflight post send were put to be executed as of we
* lock/unlock above locks Now need to arm all involved CQs.
*/
list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
mcq->comp(mcq);
}
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
@ -1866,6 +2094,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
case MLX5_DEV_EVENT_SYS_ERROR:
ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
mlx5_ib_handle_internal_error(ibdev);
break;
case MLX5_DEV_EVENT_PORT_UP:
@ -2272,6 +2501,15 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static void get_dev_fw_str(struct ib_device *ibdev, char *str,
size_t str_len)
{
struct mlx5_ib_dev *dev =
container_of(ibdev, struct mlx5_ib_dev, ib_dev);
snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev),
fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
}
static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
{
int err;
@ -2298,6 +2536,113 @@ static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
unregister_netdevice_notifier(&dev->roce.nb);
}
static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
for (i = 0; i < dev->num_ports; i++)
mlx5_core_dealloc_q_counter(dev->mdev,
dev->port[i].q_cnt_id);
}
static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
{
int i;
int ret;
for (i = 0; i < dev->num_ports; i++) {
ret = mlx5_core_alloc_q_counter(dev->mdev,
&dev->port[i].q_cnt_id);
if (ret) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
i + 1, ret);
goto dealloc_counters;
}
}
return 0;
dealloc_counters:
while (--i >= 0)
mlx5_core_dealloc_q_counter(dev->mdev,
dev->port[i].q_cnt_id);
return ret;
}
static const char * const names[] = {
"rx_write_requests",
"rx_read_requests",
"rx_atomic_requests",
"out_of_buffer",
"out_of_sequence",
"duplicate_request",
"rnr_nak_retry_err",
"packet_seq_err",
"implied_nak_seq_err",
"local_ack_timeout_err",
};
static const size_t stats_offsets[] = {
MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
};
static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
u8 port_num)
{
BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
/* We support only per port stats */
if (port_num == 0)
return NULL;
return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u8 port, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
void *out;
__be32 val;
int ret;
int i;
if (!port || !stats)
return -ENOSYS;
out = mlx5_vzalloc(outlen);
if (!out)
return -ENOMEM;
ret = mlx5_core_query_q_counter(dev->mdev,
dev->port[port - 1].q_cnt_id, 0,
out, outlen);
if (ret)
goto free;
for (i = 0; i < ARRAY_SIZE(names); i++) {
val = *(__be32 *)(out + stats_offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
free:
kvfree(out);
return ARRAY_SIZE(names);
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
@ -2320,10 +2665,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->mdev = mdev;
dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
GFP_KERNEL);
if (!dev->port)
goto err_dealloc;
rwlock_init(&dev->roce.netdev_lock);
err = get_port_caps(dev);
if (err)
goto err_dealloc;
goto err_free_port;
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
@ -2418,6 +2768,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
if (mlx5_core_is_pf(mdev)) {
dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
@ -2425,6 +2776,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
}
dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
mlx5_ib_internal_fill_odp_caps(dev);
if (MLX5_CAP_GEN(mdev, imaicl)) {
@ -2435,6 +2788,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
}
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
}
if (MLX5_CAP_GEN(mdev, xrc)) {
dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
@ -2447,9 +2806,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
IB_LINK_LAYER_ETHERNET) {
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.create_wq = mlx5_ib_create_wq;
dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
}
err = init_node_data(dev);
if (err)
@ -2457,6 +2826,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
if (ll == IB_LINK_LAYER_ETHERNET) {
err = mlx5_enable_roce(dev);
@ -2472,10 +2843,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (err)
goto err_rsrc;
err = ib_register_device(&dev->ib_dev, NULL);
err = mlx5_ib_alloc_q_counters(dev);
if (err)
goto err_odp;
err = ib_register_device(&dev->ib_dev, NULL);
if (err)
goto err_q_cnt;
err = create_umr_res(dev);
if (err)
goto err_dev;
@ -2497,6 +2872,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
err_dev:
ib_unregister_device(&dev->ib_dev);
err_q_cnt:
mlx5_ib_dealloc_q_counters(dev);
err_odp:
mlx5_ib_odp_remove_one(dev);
@ -2507,6 +2885,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (ll == IB_LINK_LAYER_ETHERNET)
mlx5_disable_roce(dev);
err_free_port:
kfree(dev->port);
err_dealloc:
ib_dealloc_device((struct ib_device *)dev);
@ -2519,11 +2900,13 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
ib_unregister_device(&dev->ib_dev);
mlx5_ib_dealloc_q_counters(dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
if (ll == IB_LINK_LAYER_ETHERNET)
mlx5_disable_roce(dev);
kfree(dev->port);
ib_dealloc_device(&dev->ib_dev);
}

View file

@ -105,6 +105,11 @@ enum {
MLX5_CQE_VERSION_V1,
};
struct mlx5_ib_vma_private_data {
struct list_head list;
struct vm_area_struct *vma;
};
struct mlx5_ib_ucontext {
struct ib_ucontext ibucontext;
struct list_head db_page_list;
@ -116,6 +121,7 @@ struct mlx5_ib_ucontext {
u8 cqe_version;
/* Transport Domain number */
u32 tdn;
struct list_head vma_private_list;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@ -217,12 +223,41 @@ struct mlx5_ib_wq {
void *qend;
};
struct mlx5_ib_rwq {
struct ib_wq ibwq;
u32 rqn;
u32 rq_num_pas;
u32 log_rq_stride;
u32 log_rq_size;
u32 rq_page_offset;
u32 log_page_size;
struct ib_umem *umem;
size_t buf_size;
unsigned int page_shift;
int create_type;
struct mlx5_db db;
u32 user_index;
u32 wqe_count;
u32 wqe_shift;
int wq_sig;
};
enum {
MLX5_QP_USER,
MLX5_QP_KERNEL,
MLX5_QP_EMPTY
};
enum {
MLX5_WQ_USER,
MLX5_WQ_KERNEL
};
struct mlx5_ib_rwq_ind_table {
struct ib_rwq_ind_table ib_rwq_ind_tbl;
u32 rqtn;
};
/*
* Connect-IB can trigger up to four concurrent pagefaults
* per-QP.
@ -266,6 +301,10 @@ struct mlx5_ib_qp_trans {
u8 resp_depth;
};
struct mlx5_ib_rss_qp {
u32 tirn;
};
struct mlx5_ib_rq {
struct mlx5_ib_qp_base base;
struct mlx5_ib_wq *rq;
@ -294,6 +333,7 @@ struct mlx5_ib_qp {
union {
struct mlx5_ib_qp_trans trans_qp;
struct mlx5_ib_raw_packet_qp raw_packet_qp;
struct mlx5_ib_rss_qp rss_qp;
};
struct mlx5_buf buf;
@ -340,6 +380,9 @@ struct mlx5_ib_qp {
spinlock_t disable_page_faults_lock;
struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
#endif
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
};
struct mlx5_ib_cq_buf {
@ -401,6 +444,8 @@ struct mlx5_ib_cq {
struct mlx5_ib_cq_buf *resize_buf;
struct ib_umem *resize_umem;
int cqe_size;
struct list_head list_send_qp;
struct list_head list_recv_qp;
u32 create_flags;
struct list_head wc_list;
enum ib_cq_notify_flags notify_flags;
@ -546,6 +591,10 @@ struct mlx5_ib_resources {
struct mutex mutex;
};
struct mlx5_ib_port {
u16 q_cnt_id;
};
struct mlx5_roce {
/* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
* netdev pointer
@ -581,6 +630,11 @@ struct mlx5_ib_dev {
struct srcu_struct mr_srcu;
#endif
struct mlx5_ib_flow_db flow_db;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
/* Array with num_ports elements */
struct mlx5_ib_port *port;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@ -628,6 +682,16 @@ static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp)
return container_of(ibqp, struct mlx5_ib_qp, ibqp);
}
static inline struct mlx5_ib_rwq *to_mrwq(struct ib_wq *ibwq)
{
return container_of(ibwq, struct mlx5_ib_rwq, ibwq);
}
static inline struct mlx5_ib_rwq_ind_table *to_mrwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
{
return container_of(ib_rwq_ind_tbl, struct mlx5_ib_rwq_ind_table, ib_rwq_ind_tbl);
}
static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
{
return container_of(msrq, struct mlx5_ib_srq, msrq);
@ -762,6 +826,16 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
int mlx5_ib_destroy_wq(struct ib_wq *wq);
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
struct ib_rwq_ind_table_init_attr *init_attr,
struct ib_udata *udata);
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
extern struct workqueue_struct *mlx5_ib_page_fault_wq;

View file

@ -1193,12 +1193,16 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_core_dev *mdev = dev->mdev;
struct umr_common *umrc = &dev->umrc;
struct mlx5_ib_umr_context umr_context;
struct mlx5_umr_wr umrwr = {};
struct ib_send_wr *bad;
int err;
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
mlx5_ib_init_umr_context(&umr_context);
umrwr.wr.wr_cqe = &umr_context.cqe;

View file

@ -77,6 +77,10 @@ struct mlx5_wqe_eth_pad {
u8 rsvd0[16];
};
static void get_cqs(enum ib_qp_type qp_type,
struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq);
static int is_qp0(enum ib_qp_type qp_type)
{
return qp_type == IB_QPT_SMI;
@ -609,6 +613,11 @@ static int to_mlx5_st(enum ib_qp_type type)
}
}
static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
struct mlx5_ib_cq *recv_cq);
static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
struct mlx5_ib_cq *recv_cq);
static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
{
return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
@ -649,6 +658,71 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
return err;
}
static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq)
{
struct mlx5_ib_ucontext *context;
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &rwq->db);
if (rwq->umem)
ib_umem_release(rwq->umem);
}
static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_rwq *rwq,
struct mlx5_ib_create_wq *ucmd)
{
struct mlx5_ib_ucontext *context;
int page_shift = 0;
int npages;
u32 offset = 0;
int ncont = 0;
int err;
if (!ucmd->buf_addr)
return -EINVAL;
context = to_mucontext(pd->uobject->context);
rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr,
rwq->buf_size, 0, 0);
if (IS_ERR(rwq->umem)) {
mlx5_ib_dbg(dev, "umem_get failed\n");
err = PTR_ERR(rwq->umem);
return err;
}
mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift,
&ncont, NULL);
err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift,
&rwq->rq_page_offset);
if (err) {
mlx5_ib_warn(dev, "bad offset\n");
goto err_umem;
}
rwq->rq_num_pas = ncont;
rwq->page_shift = page_shift;
rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE);
mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n",
(unsigned long long)ucmd->buf_addr, rwq->buf_size,
npages, page_shift, ncont, offset);
err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db);
if (err) {
mlx5_ib_dbg(dev, "map failed\n");
goto err_umem;
}
rwq->create_type = MLX5_WQ_USER;
return 0;
err_umem:
ib_umem_release(rwq->umem);
return err;
}
static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp, struct ib_udata *udata,
struct ib_qp_init_attr *attr,
@ -1201,6 +1275,187 @@ static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
rq->doorbell = &qp->db;
}
static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn);
}
static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
struct ib_uobject *uobj = pd->uobject;
struct ib_ucontext *ucontext = uobj->context;
struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
struct mlx5_ib_create_qp_resp resp = {};
int inlen;
int err;
u32 *in;
void *tirc;
void *hfso;
u32 selected_fields = 0;
size_t min_resp_len;
u32 tdn = mucontext->tdn;
struct mlx5_ib_create_qp_rss ucmd = {};
size_t required_cmd_sz;
if (init_attr->qp_type != IB_QPT_RAW_PACKET)
return -EOPNOTSUPP;
if (init_attr->create_flags || init_attr->send_cq)
return -EINVAL;
min_resp_len = offsetof(typeof(resp), uuar_index) + sizeof(resp.uuar_index);
if (udata->outlen < min_resp_len)
return -EINVAL;
required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1);
if (udata->inlen < required_cmd_sz) {
mlx5_ib_dbg(dev, "invalid inlen\n");
return -EINVAL;
}
if (udata->inlen > sizeof(ucmd) &&
!ib_is_udata_cleared(udata, sizeof(ucmd),
udata->inlen - sizeof(ucmd))) {
mlx5_ib_dbg(dev, "inlen is not supported\n");
return -EOPNOTSUPP;
}
if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
mlx5_ib_dbg(dev, "copy failed\n");
return -EFAULT;
}
if (ucmd.comp_mask) {
mlx5_ib_dbg(dev, "invalid comp mask\n");
return -EOPNOTSUPP;
}
if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)) || ucmd.reserved1) {
mlx5_ib_dbg(dev, "invalid reserved\n");
return -EOPNOTSUPP;
}
err = ib_copy_to_udata(udata, &resp, min_resp_len);
if (err) {
mlx5_ib_dbg(dev, "copy failed\n");
return -EINVAL;
}
inlen = MLX5_ST_SZ_BYTES(create_tir_in);
in = mlx5_vzalloc(inlen);
if (!in)
return -ENOMEM;
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
MLX5_SET(tirc, tirc, disp_type,
MLX5_TIRC_DISP_TYPE_INDIRECT);
MLX5_SET(tirc, tirc, indirect_table,
init_attr->rwq_ind_tbl->ind_tbl_num);
MLX5_SET(tirc, tirc, transport_domain, tdn);
hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
switch (ucmd.rx_hash_function) {
case MLX5_RX_HASH_FUNC_TOEPLITZ:
{
void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
if (len != ucmd.rx_key_len) {
err = -EINVAL;
goto err;
}
MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
memcpy(rss_key, ucmd.rx_hash_key, len);
break;
}
default:
err = -EOPNOTSUPP;
goto err;
}
if (!ucmd.rx_hash_fields_mask) {
/* special case when this TIR serves as steering entry without hashing */
if (!init_attr->rwq_ind_tbl->log_ind_tbl_size)
goto create_tir;
err = -EINVAL;
goto err;
}
if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) &&
((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) {
err = -EINVAL;
goto err;
}
/* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */
if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4))
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) &&
((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) {
err = -EINVAL;
goto err;
}
/* If none of TCP & UDP SRC/DST was set - this bit field is ignored */
if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_TCP);
else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_UDP);
if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6))
selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP;
if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) ||
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP;
if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP))
selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT;
if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) ||
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT;
MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
create_tir:
err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
if (err)
goto err;
kvfree(in);
/* qpn is reserved for that QP */
qp->trans_qp.base.mqp.qpn = 0;
return 0;
err:
kvfree(in);
return err;
}
static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, struct mlx5_ib_qp *qp)
@ -1211,6 +1466,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_create_qp_resp resp;
struct mlx5_create_qp_mbox_in *in;
struct mlx5_ib_create_qp ucmd;
struct mlx5_ib_cq *send_cq;
struct mlx5_ib_cq *recv_cq;
unsigned long flags;
int inlen = sizeof(*in);
int err;
u32 uidx = MLX5_IB_DEFAULT_UIDX;
@ -1227,6 +1485,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
if (init_attr->rwq_ind_tbl) {
if (!udata)
return -ENOSYS;
err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata);
return err;
}
if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
if (!MLX5_CAP_GEN(mdev, block_lb_mc)) {
mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
@ -1460,6 +1726,23 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
base->container_mibqp = qp;
base->mqp.event = mlx5_ib_qp_event;
get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq,
&send_cq, &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx5_ib_lock_cqs(send_cq, recv_cq);
/* Maintain device to QPs access, needed for further handling via reset
* flow
*/
list_add_tail(&qp->qps_list, &dev->qp_list);
/* Maintain CQ to QPs access, needed for further handling via reset flow
*/
if (send_cq)
list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
if (recv_cq)
list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
mlx5_ib_unlock_cqs(send_cq, recv_cq);
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
return 0;
err_create:
@ -1478,23 +1761,23 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv
if (send_cq) {
if (recv_cq) {
if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_lock_irq(&send_cq->lock);
spin_lock(&send_cq->lock);
spin_lock_nested(&recv_cq->lock,
SINGLE_DEPTH_NESTING);
} else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
spin_lock_irq(&send_cq->lock);
spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
} else {
spin_lock_irq(&recv_cq->lock);
spin_lock(&recv_cq->lock);
spin_lock_nested(&send_cq->lock,
SINGLE_DEPTH_NESTING);
}
} else {
spin_lock_irq(&send_cq->lock);
spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
}
} else if (recv_cq) {
spin_lock_irq(&recv_cq->lock);
spin_lock(&recv_cq->lock);
__acquire(&send_cq->lock);
} else {
__acquire(&send_cq->lock);
@ -1509,21 +1792,21 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re
if (recv_cq) {
if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_unlock(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
spin_unlock(&send_cq->lock);
} else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
__release(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
spin_unlock(&send_cq->lock);
} else {
spin_unlock(&send_cq->lock);
spin_unlock_irq(&recv_cq->lock);
spin_unlock(&recv_cq->lock);
}
} else {
__release(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
spin_unlock(&send_cq->lock);
}
} else if (recv_cq) {
__release(&send_cq->lock);
spin_unlock_irq(&recv_cq->lock);
spin_unlock(&recv_cq->lock);
} else {
__release(&recv_cq->lock);
__release(&send_cq->lock);
@ -1535,17 +1818,18 @@ static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
return to_mpd(qp->ibqp.pd);
}
static void get_cqs(struct mlx5_ib_qp *qp,
static void get_cqs(enum ib_qp_type qp_type,
struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
{
switch (qp->ibqp.qp_type) {
switch (qp_type) {
case IB_QPT_XRC_TGT:
*send_cq = NULL;
*recv_cq = NULL;
break;
case MLX5_IB_QPT_REG_UMR:
case IB_QPT_XRC_INI:
*send_cq = to_mcq(qp->ibqp.send_cq);
*send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
*recv_cq = NULL;
break;
@ -1557,8 +1841,8 @@ static void get_cqs(struct mlx5_ib_qp *qp,
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
case IB_QPT_RAW_PACKET:
*send_cq = to_mcq(qp->ibqp.send_cq);
*recv_cq = to_mcq(qp->ibqp.recv_cq);
*send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
*recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL;
break;
case IB_QPT_MAX:
@ -1577,8 +1861,14 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_modify_qp_mbox_in *in;
unsigned long flags;
int err;
if (qp->ibqp.rwq_ind_tbl) {
destroy_rss_raw_qp_tir(dev, qp);
return;
}
base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
@ -1602,17 +1892,28 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
base->mqp.qpn);
}
get_cqs(qp, &send_cq, &recv_cq);
get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
&send_cq, &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx5_ib_lock_cqs(send_cq, recv_cq);
/* del from lists under both locks above to protect reset flow paths */
list_del(&qp->qps_list);
if (send_cq)
list_del(&qp->cq_send_list);
if (recv_cq)
list_del(&qp->cq_recv_list);
if (qp->create_type == MLX5_QP_KERNEL) {
mlx5_ib_lock_cqs(send_cq, recv_cq);
__mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
if (send_cq != recv_cq)
__mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
NULL);
mlx5_ib_unlock_cqs(send_cq, recv_cq);
}
mlx5_ib_unlock_cqs(send_cq, recv_cq);
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
destroy_raw_packet_qp(dev, qp);
@ -2300,7 +2601,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
}
pd = get_pd(qp);
get_cqs(qp, &send_cq, &recv_cq);
get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
&send_cq, &recv_cq);
context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
@ -2349,6 +2651,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
else
sqd_event = 0;
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
qp->port) - 1;
struct mlx5_ib_port *mibport = &dev->port[port_num];
context->qp_counter_set_usr_page |=
cpu_to_be32((u32)(mibport->q_cnt_id) << 24);
}
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->sq_crq_size |= cpu_to_be16(1 << 4);
@ -2439,6 +2750,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int port;
enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
if (ibqp->rwq_ind_tbl)
return -ENOSYS;
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
@ -3397,6 +3711,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
{
struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp;
struct mlx5_ib_mr *mr;
struct mlx5_wqe_data_seg *dpseg;
@ -3424,6 +3739,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qp->sq.lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
*bad_wr = wr;
nreq = 0;
goto out;
}
for (nreq = 0; wr; nreq++, wr = wr->next) {
if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
mlx5_ib_warn(dev, "\n");
@ -3725,6 +4047,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_data_seg *scat;
struct mlx5_rwqe_sig *sig;
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_core_dev *mdev = dev->mdev;
unsigned long flags;
int err = 0;
int nreq;
@ -3736,6 +4060,13 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_lock_irqsave(&qp->rq.lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
*bad_wr = wr;
nreq = 0;
goto out;
}
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; nreq++, wr = wr->next) {
@ -4055,6 +4386,9 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int err = 0;
u8 raw_packet_qp_state;
if (ibqp->rwq_ind_tbl)
return -ENOSYS;
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
qp_init_attr);
@ -4164,3 +4498,322 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
return 0;
}
static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
struct ib_wq_init_attr *init_attr)
{
struct mlx5_ib_dev *dev;
__be64 *rq_pas0;
void *in;
void *rqc;
void *wq;
int inlen;
int err;
dev = to_mdev(pd->device);
inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas;
in = mlx5_vzalloc(inlen);
if (!in)
return -ENOMEM;
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
MLX5_SET(rqc, rqc, mem_rq_type,
MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
MLX5_SET(rqc, rqc, user_index, rwq->user_index);
MLX5_SET(rqc, rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
MLX5_SET(rqc, rqc, flush_in_error_en, 1);
wq = MLX5_ADDR_OF(rqc, rqc, wq);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride);
MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size);
MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn);
MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset);
MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
err = mlx5_core_create_rq(dev->mdev, in, inlen, &rwq->rqn);
kvfree(in);
return err;
}
static int set_user_rq_size(struct mlx5_ib_dev *dev,
struct ib_wq_init_attr *wq_init_attr,
struct mlx5_ib_create_wq *ucmd,
struct mlx5_ib_rwq *rwq)
{
/* Sanity check RQ size before proceeding */
if (wq_init_attr->max_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_wq_sz)))
return -EINVAL;
if (!ucmd->rq_wqe_count)
return -EINVAL;
rwq->wqe_count = ucmd->rq_wqe_count;
rwq->wqe_shift = ucmd->rq_wqe_shift;
rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift);
rwq->log_rq_stride = rwq->wqe_shift;
rwq->log_rq_size = ilog2(rwq->wqe_count);
return 0;
}
static int prepare_user_rq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata,
struct mlx5_ib_rwq *rwq)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_create_wq ucmd = {};
int err;
size_t required_cmd_sz;
required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
if (udata->inlen < required_cmd_sz) {
mlx5_ib_dbg(dev, "invalid inlen\n");
return -EINVAL;
}
if (udata->inlen > sizeof(ucmd) &&
!ib_is_udata_cleared(udata, sizeof(ucmd),
udata->inlen - sizeof(ucmd))) {
mlx5_ib_dbg(dev, "inlen is not supported\n");
return -EOPNOTSUPP;
}
if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
mlx5_ib_dbg(dev, "copy failed\n");
return -EFAULT;
}
if (ucmd.comp_mask) {
mlx5_ib_dbg(dev, "invalid comp mask\n");
return -EOPNOTSUPP;
}
if (ucmd.reserved) {
mlx5_ib_dbg(dev, "invalid reserved\n");
return -EOPNOTSUPP;
}
err = set_user_rq_size(dev, init_attr, &ucmd, rwq);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
return err;
}
err = create_user_rq(dev, pd, rwq, &ucmd);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
if (err)
return err;
}
rwq->user_index = ucmd.user_index;
return 0;
}
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev;
struct mlx5_ib_rwq *rwq;
struct mlx5_ib_create_wq_resp resp = {};
size_t min_resp_len;
int err;
if (!udata)
return ERR_PTR(-ENOSYS);
min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
if (udata->outlen && udata->outlen < min_resp_len)
return ERR_PTR(-EINVAL);
dev = to_mdev(pd->device);
switch (init_attr->wq_type) {
case IB_WQT_RQ:
rwq = kzalloc(sizeof(*rwq), GFP_KERNEL);
if (!rwq)
return ERR_PTR(-ENOMEM);
err = prepare_user_rq(pd, init_attr, udata, rwq);
if (err)
goto err;
err = create_rq(rwq, pd, init_attr);
if (err)
goto err_user_rq;
break;
default:
mlx5_ib_dbg(dev, "unsupported wq type %d\n",
init_attr->wq_type);
return ERR_PTR(-EINVAL);
}
rwq->ibwq.wq_num = rwq->rqn;
rwq->ibwq.state = IB_WQS_RESET;
if (udata->outlen) {
resp.response_length = offsetof(typeof(resp), response_length) +
sizeof(resp.response_length);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto err_copy;
}
return &rwq->ibwq;
err_copy:
mlx5_core_destroy_rq(dev->mdev, rwq->rqn);
err_user_rq:
destroy_user_rq(pd, rwq);
err:
kfree(rwq);
return ERR_PTR(err);
}
int mlx5_ib_destroy_wq(struct ib_wq *wq)
{
struct mlx5_ib_dev *dev = to_mdev(wq->device);
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
mlx5_core_destroy_rq(dev->mdev, rwq->rqn);
destroy_user_rq(wq->pd, rwq);
kfree(rwq);
return 0;
}
struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
struct ib_rwq_ind_table_init_attr *init_attr,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_ib_rwq_ind_table *rwq_ind_tbl;
int sz = 1 << init_attr->log_ind_tbl_size;
struct mlx5_ib_create_rwq_ind_tbl_resp resp = {};
size_t min_resp_len;
int inlen;
int err;
int i;
u32 *in;
void *rqtc;
if (udata->inlen > 0 &&
!ib_is_udata_cleared(udata, 0,
udata->inlen))
return ERR_PTR(-EOPNOTSUPP);
min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
if (udata->outlen && udata->outlen < min_resp_len)
return ERR_PTR(-EINVAL);
rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL);
if (!rwq_ind_tbl)
return ERR_PTR(-ENOMEM);
inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
in = mlx5_vzalloc(inlen);
if (!in) {
err = -ENOMEM;
goto err;
}
rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
for (i = 0; i < sz; i++)
MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num);
err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
kvfree(in);
if (err)
goto err;
rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn;
if (udata->outlen) {
resp.response_length = offsetof(typeof(resp), response_length) +
sizeof(resp.response_length);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto err_copy;
}
return &rwq_ind_tbl->ib_rwq_ind_tbl;
err_copy:
mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
err:
kfree(rwq_ind_tbl);
return ERR_PTR(err);
}
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
{
struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
kfree(rwq_ind_tbl);
return 0;
}
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(wq->device);
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
struct mlx5_ib_modify_wq ucmd = {};
size_t required_cmd_sz;
int curr_wq_state;
int wq_state;
int inlen;
int err;
void *rqc;
void *in;
required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
if (udata->inlen < required_cmd_sz)
return -EINVAL;
if (udata->inlen > sizeof(ucmd) &&
!ib_is_udata_cleared(udata, sizeof(ucmd),
udata->inlen - sizeof(ucmd)))
return -EOPNOTSUPP;
if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)))
return -EFAULT;
if (ucmd.comp_mask || ucmd.reserved)
return -EOPNOTSUPP;
inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
in = mlx5_vzalloc(inlen);
if (!in)
return -ENOMEM;
rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
curr_wq_state = (wq_attr_mask & IB_WQ_CUR_STATE) ?
wq_attr->curr_wq_state : wq->state;
wq_state = (wq_attr_mask & IB_WQ_STATE) ?
wq_attr->wq_state : curr_wq_state;
if (curr_wq_state == IB_WQS_ERR)
curr_wq_state = MLX5_RQC_STATE_ERR;
if (wq_state == IB_WQS_ERR)
wq_state = MLX5_RQC_STATE_ERR;
MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
MLX5_SET(rqc, rqc, state, wq_state);
err = mlx5_core_modify_rq(dev->mdev, rwq->rqn, in, inlen);
kvfree(in);
if (!err)
rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
return err;
}

View file

@ -74,14 +74,12 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
}
static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
struct mlx5_create_srq_mbox_in **in,
struct ib_udata *udata, int buf_size, int *inlen,
int is_xrc)
struct mlx5_srq_attr *in,
struct ib_udata *udata, int buf_size)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_create_srq ucmd = {};
size_t ucmdlen;
void *xsrqc;
int err;
int npages;
int page_shift;
@ -104,7 +102,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
udata->inlen - sizeof(ucmd)))
return -EINVAL;
if (is_xrc) {
if (in->type == IB_SRQT_XRC) {
err = get_srq_user_index(to_mucontext(pd->uobject->context),
&ucmd, udata->inlen, &uidx);
if (err)
@ -130,14 +128,13 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
goto err_umem;
}
*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
*in = mlx5_vzalloc(*inlen);
if (!(*in)) {
in->pas = mlx5_vzalloc(sizeof(*in->pas) * ncont);
if (!in->pas) {
err = -ENOMEM;
goto err_umem;
}
mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0);
mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0);
err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &srq->db);
@ -146,20 +143,16 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
goto err_in;
}
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
is_xrc){
xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
xrc_srq_context_entry);
MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
}
in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
in->page_offset = offset;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type == IB_SRQT_XRC)
in->user_index = uidx;
return 0;
err_in:
kvfree(*in);
kvfree(in->pas);
err_umem:
ib_umem_release(srq->umem);
@ -168,15 +161,13 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
}
static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
struct mlx5_create_srq_mbox_in **in, int buf_size,
int *inlen, int is_xrc)
struct mlx5_srq_attr *in, int buf_size)
{
int err;
int i;
struct mlx5_wqe_srq_next_seg *next;
int page_shift;
int npages;
void *xsrqc;
err = mlx5_db_alloc(dev->mdev, &srq->db);
if (err) {
@ -204,13 +195,12 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
buf_size, page_shift, srq->buf.npages, npages);
*inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages;
*in = mlx5_vzalloc(*inlen);
if (!*in) {
in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
if (!in->pas) {
err = -ENOMEM;
goto err_buf;
}
mlx5_fill_page_array(&srq->buf, (*in)->pas);
mlx5_fill_page_array(&srq->buf, in->pas);
srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
if (!srq->wrid) {
@ -221,20 +211,15 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
}
srq->wq_sig = !!srq_signature;
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
is_xrc){
xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
xrc_srq_context_entry);
/* 0xffffff means we ask to work with cqe version 0 */
MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX);
}
in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type == IB_SRQT_XRC)
in->user_index = MLX5_IB_DEFAULT_UIDX;
return 0;
err_in:
kvfree(*in);
kvfree(in->pas);
err_buf:
mlx5_buf_free(dev->mdev, &srq->buf);
@ -267,10 +252,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
int desc_size;
int buf_size;
int err;
struct mlx5_create_srq_mbox_in *uninitialized_var(in);
int uninitialized_var(inlen);
int is_xrc;
u32 flgs, xrcdn;
struct mlx5_srq_attr in = {0};
__u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
/* Sanity check SRQ size before proceeding */
@ -302,14 +284,10 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
srq->msrq.max_avail_gather);
is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
if (pd->uobject)
err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen,
is_xrc);
err = create_srq_user(pd, srq, &in, udata, buf_size);
else
err = create_srq_kernel(dev, srq, &in, buf_size, &inlen,
is_xrc);
err = create_srq_kernel(dev, srq, &in, buf_size);
if (err) {
mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
@ -317,23 +295,23 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
goto err_srq;
}
in->ctx.state_log_sz = ilog2(srq->msrq.max);
flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
xrcdn = 0;
if (is_xrc) {
xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn);
in.type = init_attr->srq_type;
in.log_size = ilog2(srq->msrq.max);
in.wqe_shift = srq->msrq.wqe_shift - 4;
if (srq->wq_sig)
in.flags |= MLX5_SRQ_FLAG_WQ_SIG;
if (init_attr->srq_type == IB_SRQT_XRC) {
in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn;
} else if (init_attr->srq_type == IB_SRQT_BASIC) {
xrcdn = to_mxrcd(dev->devr.x0)->xrcdn;
in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn);
in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn;
in.cqn = to_mcq(dev->devr.c0)->mcq.cqn;
}
in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF));
in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn);
in->ctx.db_record = cpu_to_be64(srq->db.dma);
err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen, is_xrc);
kvfree(in);
in.pd = to_mpd(pd)->pdn;
in.db_record = srq->db.dma;
err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in);
kvfree(in.pas);
if (err) {
mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
goto err_usr_kern_srq;
@ -401,7 +379,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
struct mlx5_ib_srq *srq = to_msrq(ibsrq);
int ret;
struct mlx5_query_srq_mbox_out *out;
struct mlx5_srq_attr *out;
out = kzalloc(sizeof(*out), GFP_KERNEL);
if (!out)
@ -411,7 +389,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
if (ret)
goto out_box;
srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm);
srq_attr->srq_limit = out->lwm;
srq_attr->max_wr = srq->msrq.max - 1;
srq_attr->max_sge = srq->msrq.max_gs;
@ -458,6 +436,8 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct mlx5_ib_srq *srq = to_msrq(ibsrq);
struct mlx5_wqe_srq_next_seg *next;
struct mlx5_wqe_data_seg *scat;
struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
struct mlx5_core_dev *mdev = dev->mdev;
unsigned long flags;
int err = 0;
int nreq;
@ -465,6 +445,12 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
spin_lock_irqsave(&srq->lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
*bad_wr = wr;
goto out;
}
for (nreq = 0; wr; nreq++, wr = wr->next) {
if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
err = -EINVAL;
@ -507,7 +493,7 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
*srq->db.db = cpu_to_be32(srq->wqe_ctr);
}
out:
spin_unlock_irqrestore(&srq->lock, flags);
return err;

View file

@ -46,6 +46,10 @@ enum {
MLX5_SRQ_FLAG_SIGNATURE = 1 << 0,
};
enum {
MLX5_WQ_FLAG_SIGNATURE = 1 << 0,
};
/* Increment this value if any changes that break userspace ABI
* compatibility are made.
@ -79,6 +83,10 @@ enum mlx5_ib_alloc_ucontext_resp_mask {
MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
};
enum mlx5_user_cmds_supp_uhw {
MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0,
};
struct mlx5_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 bf_reg_size;
@ -94,8 +102,8 @@ struct mlx5_ib_alloc_ucontext_resp {
__u32 comp_mask;
__u32 response_length;
__u8 cqe_version;
__u8 reserved2;
__u16 reserved3;
__u8 cmds_supp_uhw;
__u16 reserved2;
__u64 hca_core_clock_offset;
};
@ -103,6 +111,22 @@ struct mlx5_ib_alloc_pd_resp {
__u32 pdn;
};
struct mlx5_ib_tso_caps {
__u32 max_tso; /* Maximum tso payload size in bytes */
/* Corresponding bit will be set if qp type from
* 'enum ib_qp_type' is supported, e.g.
* supported_qpts |= 1 << IB_QPT_UD
*/
__u32 supported_qpts;
};
struct mlx5_ib_query_device_resp {
__u32 comp_mask;
__u32 response_length;
struct mlx5_ib_tso_caps tso_caps;
};
struct mlx5_ib_create_cq {
__u64 buf_addr;
__u64 db_addr;
@ -148,6 +172,40 @@ struct mlx5_ib_create_qp {
__u64 sq_buf_addr;
};
/* RX Hash function flags */
enum mlx5_rx_hash_function_flags {
MLX5_RX_HASH_FUNC_TOEPLITZ = 1 << 0,
};
/*
* RX Hash flags, these flags allows to set which incoming packet's field should
* participates in RX Hash. Each flag represent certain packet's field,
* when the flag is set the field that is represented by the flag will
* participate in RX Hash calculation.
* Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP
* and *TCP and *UDP flags can't be enabled together on the same QP.
*/
enum mlx5_rx_hash_fields {
MLX5_RX_HASH_SRC_IPV4 = 1 << 0,
MLX5_RX_HASH_DST_IPV4 = 1 << 1,
MLX5_RX_HASH_SRC_IPV6 = 1 << 2,
MLX5_RX_HASH_DST_IPV6 = 1 << 3,
MLX5_RX_HASH_SRC_PORT_TCP = 1 << 4,
MLX5_RX_HASH_DST_PORT_TCP = 1 << 5,
MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6,
MLX5_RX_HASH_DST_PORT_UDP = 1 << 7
};
struct mlx5_ib_create_qp_rss {
__u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */
__u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */
__u8 rx_key_len; /* valid only for Toeplitz */
__u8 reserved[6];
__u8 rx_hash_key[128]; /* valid only for Toeplitz */
__u32 comp_mask;
__u32 reserved1;
};
struct mlx5_ib_create_qp_resp {
__u32 uuar_index;
};
@ -159,6 +217,32 @@ struct mlx5_ib_alloc_mw {
__u16 reserved2;
};
struct mlx5_ib_create_wq {
__u64 buf_addr;
__u64 db_addr;
__u32 rq_wqe_count;
__u32 rq_wqe_shift;
__u32 user_index;
__u32 flags;
__u32 comp_mask;
__u32 reserved;
};
struct mlx5_ib_create_wq_resp {
__u32 response_length;
__u32 reserved;
};
struct mlx5_ib_create_rwq_ind_tbl_resp {
__u32 response_length;
__u32 reserved;
};
struct mlx5_ib_modify_wq {
__u32 comp_mask;
__u32 reserved;
};
static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
struct mlx5_ib_create_qp *ucmd,
int inlen,

View file

@ -1081,16 +1081,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
return sprintf(buf, "%x\n", dev->rev_id);
}
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev.dev);
return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32),
(int) (dev->fw_ver >> 16) & 0xffff,
(int) dev->fw_ver & 0xffff);
}
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
@ -1120,13 +1110,11 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *mthca_dev_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
@ -1187,6 +1175,17 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static void get_dev_fw_str(struct ib_device *device, char *str,
size_t str_len)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev);
snprintf(str, str_len, "%d.%d.%d",
(int) (dev->fw_ver >> 32),
(int) (dev->fw_ver >> 16) & 0xffff,
(int) dev->fw_ver & 0xffff);
}
int mthca_register_device(struct mthca_dev *dev)
{
int ret;
@ -1266,6 +1265,7 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
dev->ib_dev.dereg_mr = mthca_dereg_mr;
dev->ib_dev.get_port_immutable = mthca_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
if (dev->mthca_flags & MTHCA_FLAG_FMR) {
dev->ib_dev.alloc_fmr = mthca_alloc_fmr;

View file

@ -98,7 +98,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENOMEM;
mthca_err(mdev, "Couldn't allocate memory to save HCA "
"PCI header, aborting.\n");
goto out;
goto put_dev;
}
for (i = 0; i < 64; ++i) {
@ -108,7 +108,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't save HCA "
"PCI header, aborting.\n");
goto out;
goto free_hca;
}
}
@ -121,7 +121,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENOMEM;
mthca_err(mdev, "Couldn't allocate memory to save HCA "
"bridge PCI header, aborting.\n");
goto out;
goto free_hca;
}
for (i = 0; i < 64; ++i) {
@ -131,7 +131,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't save HCA bridge "
"PCI header, aborting.\n");
goto out;
goto free_bh;
}
}
bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
@ -139,7 +139,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't locate HCA bridge "
"PCI-X capability, aborting.\n");
goto out;
goto free_bh;
}
}
@ -152,7 +152,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENOMEM;
mthca_err(mdev, "Couldn't map HCA reset register, "
"aborting.\n");
goto out;
goto free_bh;
}
writel(MTHCA_RESET_VALUE, reset);
@ -172,7 +172,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't access HCA after reset, "
"aborting.\n");
goto out;
goto free_bh;
}
if (v != 0xffffffff)
@ -184,7 +184,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "PCI device did not come back after reset, "
"aborting.\n");
goto out;
goto free_bh;
}
good:
@ -195,14 +195,14 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge Upstream "
"split transaction control, aborting.\n");
goto out;
goto free_bh;
}
if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc,
bridge_header[(bridge_pcix_cap + 0xc) / 4])) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge Downstream "
"split transaction control, aborting.\n");
goto out;
goto free_bh;
}
/*
* Bridge control register is at 0x3e, so we'll
@ -216,7 +216,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge reg %x, "
"aborting.\n", i);
goto out;
goto free_bh;
}
}
@ -225,7 +225,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, "
"aborting.\n");
goto out;
goto free_bh;
}
}
@ -235,7 +235,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI-X "
"command register, aborting.\n");
goto out;
goto free_bh;
}
}
@ -246,7 +246,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI Express "
"Device Control register, aborting.\n");
goto out;
goto free_bh;
}
linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4];
if (pcie_capability_write_word(mdev->pdev, PCI_EXP_LNKCTL,
@ -254,7 +254,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI Express "
"Link control register, aborting.\n");
goto out;
goto free_bh;
}
}
@ -266,7 +266,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA reg %x, "
"aborting.\n", i);
goto out;
goto free_bh;
}
}
@ -275,14 +275,12 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA COMMAND, "
"aborting.\n");
goto out;
}
out:
if (bridge)
pci_dev_put(bridge);
free_bh:
kfree(bridge_header);
free_hca:
kfree(hca_header);
put_dev:
pci_dev_put(bridge);
return err;
}

View file

@ -2605,23 +2605,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
}
/**
* show_fw_ver
*/
static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct nes_ib_device *nesibdev =
container_of(dev, struct nes_ib_device, ibdev.dev);
struct nes_vnic *nesvnic = nesibdev->nesvnic;
nes_debug(NES_DBG_INIT, "\n");
return sprintf(buf, "%u.%u\n",
(nesvnic->nesdev->nesadapter->firmware_version >> 16),
(nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff));
}
/**
* show_hca
*/
@ -2645,13 +2628,11 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *nes_dev_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
@ -3703,6 +3684,19 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static void get_dev_fw_str(struct ib_device *dev, char *str,
size_t str_len)
{
struct nes_ib_device *nesibdev =
container_of(dev, struct nes_ib_device, ibdev);
struct nes_vnic *nesvnic = nesibdev->nesvnic;
nes_debug(NES_DBG_INIT, "\n");
snprintf(str, str_len, "%u.%u",
(nesvnic->nesdev->nesadapter->firmware_version >> 16),
(nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff));
}
/**
* nes_init_ofa_device
*/
@ -3802,6 +3796,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.iwcm->create_listen = nes_create_listen;
nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen;
nesibdev->ibdev.get_port_immutable = nes_port_immutable;
nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str;
memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name,
sizeof(nesibdev->ibdev.iwcm->ifname));

View file

@ -107,6 +107,14 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static void get_dev_fw_str(struct ib_device *device, char *str,
size_t str_len)
{
struct ocrdma_dev *dev = get_ocrdma_dev(device);
snprintf(str, str_len, "%s", &dev->attr.fw_ver[0]);
}
static int ocrdma_register_device(struct ocrdma_dev *dev)
{
strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
@ -193,6 +201,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.process_mad = ocrdma_process_mad;
dev->ibdev.get_port_immutable = ocrdma_port_immutable;
dev->ibdev.get_dev_fw_str = get_dev_fw_str;
if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
dev->ibdev.uverbs_cmd_mask |=
@ -262,14 +271,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
}
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
char *buf)
{
struct ocrdma_dev *dev = dev_get_drvdata(device);
return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]);
}
static ssize_t show_hca_type(struct device *device,
struct device_attribute *attr, char *buf)
{
@ -279,12 +280,10 @@ static ssize_t show_hca_type(struct device *device,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
static struct device_attribute *ocrdma_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type
};

View file

@ -331,6 +331,21 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static void usnic_get_dev_fw_str(struct ib_device *device,
char *str,
size_t str_len)
{
struct usnic_ib_dev *us_ibdev =
container_of(device, struct usnic_ib_dev, ib_dev);
struct ethtool_drvinfo info;
mutex_lock(&us_ibdev->usdev_lock);
us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
mutex_unlock(&us_ibdev->usdev_lock);
snprintf(str, str_len, "%s", info.fw_version);
}
/* Start of PF discovery section */
static void *usnic_ib_device_add(struct pci_dev *dev)
{
@ -414,6 +429,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq;
us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr;
us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable;
us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str;
if (ib_register_device(&us_ibdev->ib_dev, NULL))

View file

@ -45,21 +45,6 @@
#include "usnic_ib_verbs.h"
#include "usnic_log.h"
static ssize_t usnic_ib_show_fw_ver(struct device *device,
struct device_attribute *attr,
char *buf)
{
struct usnic_ib_dev *us_ibdev =
container_of(device, struct usnic_ib_dev, ib_dev.dev);
struct ethtool_drvinfo info;
mutex_lock(&us_ibdev->usdev_lock);
us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
mutex_unlock(&us_ibdev->usdev_lock);
return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version);
}
static ssize_t usnic_ib_show_board(struct device *device,
struct device_attribute *attr,
char *buf)
@ -192,7 +177,6 @@ usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
}
static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL);
static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL);
static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
@ -201,7 +185,6 @@ static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL);
static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
static struct device_attribute *usnic_class_attributes[] = {
&dev_attr_fw_ver,
&dev_attr_board_id,
&dev_attr_config,
&dev_attr_iface,

View file

@ -1 +1,2 @@
obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt/
obj-$(CONFIG_RDMA_RXE) += rxe/

View file

@ -1,6 +1,5 @@
config INFINIBAND_RDMAVT
tristate "RDMA verbs transport library"
depends on 64BIT
default m
---help---
This is a common software verbs provider for RDMA networks.

View file

@ -0,0 +1,24 @@
config RDMA_RXE
tristate "Software RDMA over Ethernet (RoCE) driver"
depends on INET && PCI && INFINIBAND
depends on NET_UDP_TUNNEL
---help---
This driver implements the InfiniBand RDMA transport over
the Linux network stack. It enables a system with a
standard Ethernet adapter to interoperate with a RoCE
adapter or with another system running the RXE driver.
Documentation on InfiniBand and RoCE can be downloaded at
www.infinibandta.org and www.openfabrics.org. (See also
siw which is a similar software driver for iWARP.)
The driver is split into two layers, one interfaces with the
Linux RDMA stack and implements a kernel or user space
verbs API. The user space verbs API requires a support
library named librxe which is loaded by the generic user
space verbs API, libibverbs. The other layer interfaces
with the Linux network stack at layer 3.
To configure and work with soft-RoCE driver please use the
following wiki page under "configure Soft-RoCE (RXE)" section:
https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home

View file

@ -0,0 +1,24 @@
obj-$(CONFIG_RDMA_RXE) += rdma_rxe.o
rdma_rxe-y := \
rxe.o \
rxe_comp.o \
rxe_req.o \
rxe_resp.o \
rxe_recv.o \
rxe_pool.o \
rxe_queue.o \
rxe_verbs.o \
rxe_av.o \
rxe_srq.o \
rxe_qp.o \
rxe_cq.o \
rxe_mr.o \
rxe_dma.o \
rxe_opcode.o \
rxe_mmap.o \
rxe_icrc.o \
rxe_mcast.o \
rxe_task.o \
rxe_net.o \
rxe_sysfs.o

View file

@ -0,0 +1,386 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "rxe.h"
#include "rxe_loc.h"
MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib");
MODULE_DESCRIPTION("Soft RDMA transport");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION("0.2");
/* free resources for all ports on a device */
static void rxe_cleanup_ports(struct rxe_dev *rxe)
{
kfree(rxe->port.pkey_tbl);
rxe->port.pkey_tbl = NULL;
}
/* free resources for a rxe device all objects created for this device must
* have been destroyed
*/
static void rxe_cleanup(struct rxe_dev *rxe)
{
rxe_pool_cleanup(&rxe->uc_pool);
rxe_pool_cleanup(&rxe->pd_pool);
rxe_pool_cleanup(&rxe->ah_pool);
rxe_pool_cleanup(&rxe->srq_pool);
rxe_pool_cleanup(&rxe->qp_pool);
rxe_pool_cleanup(&rxe->cq_pool);
rxe_pool_cleanup(&rxe->mr_pool);
rxe_pool_cleanup(&rxe->mw_pool);
rxe_pool_cleanup(&rxe->mc_grp_pool);
rxe_pool_cleanup(&rxe->mc_elem_pool);
rxe_cleanup_ports(rxe);
}
/* called when all references have been dropped */
void rxe_release(struct kref *kref)
{
struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt);
rxe_cleanup(rxe);
ib_dealloc_device(&rxe->ib_dev);
}
void rxe_dev_put(struct rxe_dev *rxe)
{
kref_put(&rxe->ref_cnt, rxe_release);
}
EXPORT_SYMBOL_GPL(rxe_dev_put);
/* initialize rxe device parameters */
static int rxe_init_device_param(struct rxe_dev *rxe)
{
rxe->max_inline_data = RXE_MAX_INLINE_DATA;
rxe->attr.fw_ver = RXE_FW_VER;
rxe->attr.max_mr_size = RXE_MAX_MR_SIZE;
rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP;
rxe->attr.vendor_id = RXE_VENDOR_ID;
rxe->attr.vendor_part_id = RXE_VENDOR_PART_ID;
rxe->attr.hw_ver = RXE_HW_VER;
rxe->attr.max_qp = RXE_MAX_QP;
rxe->attr.max_qp_wr = RXE_MAX_QP_WR;
rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS;
rxe->attr.max_sge = RXE_MAX_SGE;
rxe->attr.max_sge_rd = RXE_MAX_SGE_RD;
rxe->attr.max_cq = RXE_MAX_CQ;
rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1;
rxe->attr.max_mr = RXE_MAX_MR;
rxe->attr.max_pd = RXE_MAX_PD;
rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM;
rxe->attr.max_ee_rd_atom = RXE_MAX_EE_RD_ATOM;
rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM;
rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM;
rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM;
rxe->attr.atomic_cap = RXE_ATOMIC_CAP;
rxe->attr.max_ee = RXE_MAX_EE;
rxe->attr.max_rdd = RXE_MAX_RDD;
rxe->attr.max_mw = RXE_MAX_MW;
rxe->attr.max_raw_ipv6_qp = RXE_MAX_RAW_IPV6_QP;
rxe->attr.max_raw_ethy_qp = RXE_MAX_RAW_ETHY_QP;
rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP;
rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH;
rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH;
rxe->attr.max_ah = RXE_MAX_AH;
rxe->attr.max_fmr = RXE_MAX_FMR;
rxe->attr.max_map_per_fmr = RXE_MAX_MAP_PER_FMR;
rxe->attr.max_srq = RXE_MAX_SRQ;
rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR;
rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE;
rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN;
rxe->attr.max_pkeys = RXE_MAX_PKEYS;
rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY;
rxe->max_ucontext = RXE_MAX_UCONTEXT;
return 0;
}
/* initialize port attributes */
static int rxe_init_port_param(struct rxe_port *port)
{
port->attr.state = RXE_PORT_STATE;
port->attr.max_mtu = RXE_PORT_MAX_MTU;
port->attr.active_mtu = RXE_PORT_ACTIVE_MTU;
port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN;
port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS;
port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ;
port->attr.bad_pkey_cntr = RXE_PORT_BAD_PKEY_CNTR;
port->attr.qkey_viol_cntr = RXE_PORT_QKEY_VIOL_CNTR;
port->attr.pkey_tbl_len = RXE_PORT_PKEY_TBL_LEN;
port->attr.lid = RXE_PORT_LID;
port->attr.sm_lid = RXE_PORT_SM_LID;
port->attr.lmc = RXE_PORT_LMC;
port->attr.max_vl_num = RXE_PORT_MAX_VL_NUM;
port->attr.sm_sl = RXE_PORT_SM_SL;
port->attr.subnet_timeout = RXE_PORT_SUBNET_TIMEOUT;
port->attr.init_type_reply = RXE_PORT_INIT_TYPE_REPLY;
port->attr.active_width = RXE_PORT_ACTIVE_WIDTH;
port->attr.active_speed = RXE_PORT_ACTIVE_SPEED;
port->attr.phys_state = RXE_PORT_PHYS_STATE;
port->mtu_cap =
ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU);
port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX);
return 0;
}
/* initialize port state, note IB convention that HCA ports are always
* numbered from 1
*/
static int rxe_init_ports(struct rxe_dev *rxe)
{
struct rxe_port *port = &rxe->port;
rxe_init_port_param(port);
if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len)
return -EINVAL;
port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len,
sizeof(*port->pkey_tbl), GFP_KERNEL);
if (!port->pkey_tbl)
return -ENOMEM;
port->pkey_tbl[0] = 0xffff;
port->port_guid = rxe->ifc_ops->port_guid(rxe);
spin_lock_init(&port->port_lock);
return 0;
}
/* init pools of managed objects */
static int rxe_init_pools(struct rxe_dev *rxe)
{
int err;
err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC,
rxe->max_ucontext);
if (err)
goto err1;
err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD,
rxe->attr.max_pd);
if (err)
goto err2;
err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH,
rxe->attr.max_ah);
if (err)
goto err3;
err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ,
rxe->attr.max_srq);
if (err)
goto err4;
err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP,
rxe->attr.max_qp);
if (err)
goto err5;
err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ,
rxe->attr.max_cq);
if (err)
goto err6;
err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR,
rxe->attr.max_mr);
if (err)
goto err7;
err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW,
rxe->attr.max_mw);
if (err)
goto err8;
err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP,
rxe->attr.max_mcast_grp);
if (err)
goto err9;
err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM,
rxe->attr.max_total_mcast_qp_attach);
if (err)
goto err10;
return 0;
err10:
rxe_pool_cleanup(&rxe->mc_grp_pool);
err9:
rxe_pool_cleanup(&rxe->mw_pool);
err8:
rxe_pool_cleanup(&rxe->mr_pool);
err7:
rxe_pool_cleanup(&rxe->cq_pool);
err6:
rxe_pool_cleanup(&rxe->qp_pool);
err5:
rxe_pool_cleanup(&rxe->srq_pool);
err4:
rxe_pool_cleanup(&rxe->ah_pool);
err3:
rxe_pool_cleanup(&rxe->pd_pool);
err2:
rxe_pool_cleanup(&rxe->uc_pool);
err1:
return err;
}
/* initialize rxe device state */
static int rxe_init(struct rxe_dev *rxe)
{
int err;
/* init default device parameters */
rxe_init_device_param(rxe);
err = rxe_init_ports(rxe);
if (err)
goto err1;
err = rxe_init_pools(rxe);
if (err)
goto err2;
/* init pending mmap list */
spin_lock_init(&rxe->mmap_offset_lock);
spin_lock_init(&rxe->pending_lock);
INIT_LIST_HEAD(&rxe->pending_mmaps);
INIT_LIST_HEAD(&rxe->list);
mutex_init(&rxe->usdev_lock);
return 0;
err2:
rxe_cleanup_ports(rxe);
err1:
return err;
}
int rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
{
struct rxe_port *port = &rxe->port;
enum ib_mtu mtu;
mtu = eth_mtu_int_to_enum(ndev_mtu);
/* Make sure that new MTU in range */
mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256;
port->attr.active_mtu = mtu;
port->mtu_cap = ib_mtu_enum_to_int(mtu);
return 0;
}
EXPORT_SYMBOL(rxe_set_mtu);
/* called by ifc layer to create new rxe device.
* The caller should allocate memory for rxe by calling ib_alloc_device.
*/
int rxe_add(struct rxe_dev *rxe, unsigned int mtu)
{
int err;
kref_init(&rxe->ref_cnt);
err = rxe_init(rxe);
if (err)
goto err1;
err = rxe_set_mtu(rxe, mtu);
if (err)
goto err1;
err = rxe_register_device(rxe);
if (err)
goto err1;
return 0;
err1:
rxe_dev_put(rxe);
return err;
}
EXPORT_SYMBOL(rxe_add);
/* called by the ifc layer to remove a device */
void rxe_remove(struct rxe_dev *rxe)
{
rxe_unregister_device(rxe);
rxe_dev_put(rxe);
}
EXPORT_SYMBOL(rxe_remove);
static int __init rxe_module_init(void)
{
int err;
/* initialize slab caches for managed objects */
err = rxe_cache_init();
if (err) {
pr_err("rxe: unable to init object pools\n");
return err;
}
err = rxe_net_init();
if (err) {
pr_err("rxe: unable to init\n");
rxe_cache_exit();
return err;
}
pr_info("rxe: loaded\n");
return 0;
}
static void __exit rxe_module_exit(void)
{
rxe_remove_all();
rxe_net_exit();
rxe_cache_exit();
pr_info("rxe: unloaded\n");
}
module_init(rxe_module_init);
module_exit(rxe_module_exit);

View file

@ -0,0 +1,77 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef RXE_H
#define RXE_H
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/crc32.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
#include "rxe_net.h"
#include "rxe_opcode.h"
#include "rxe_hdr.h"
#include "rxe_param.h"
#include "rxe_verbs.h"
#define RXE_UVERBS_ABI_VERSION (1)
#define IB_PHYS_STATE_LINK_UP (5)
#define IB_PHYS_STATE_LINK_DOWN (3)
#define RXE_ROCE_V2_SPORT (0xc000)
int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
int rxe_add(struct rxe_dev *rxe, unsigned int mtu);
void rxe_remove(struct rxe_dev *rxe);
void rxe_remove_all(void);
int rxe_rcv(struct sk_buff *skb);
void rxe_dev_put(struct rxe_dev *rxe);
struct rxe_dev *net_to_rxe(struct net_device *ndev);
struct rxe_dev *get_rxe_by_name(const char* name);
void rxe_port_up(struct rxe_dev *rxe);
void rxe_port_down(struct rxe_dev *rxe);
#endif /* RXE_H */

View file

@ -0,0 +1,98 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "rxe.h"
#include "rxe_loc.h"
int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr)
{
struct rxe_port *port;
if (attr->port_num != 1) {
pr_info("rxe: invalid port_num = %d\n", attr->port_num);
return -EINVAL;
}
port = &rxe->port;
if (attr->ah_flags & IB_AH_GRH) {
if (attr->grh.sgid_index > port->attr.gid_tbl_len) {
pr_info("rxe: invalid sgid index = %d\n",
attr->grh.sgid_index);
return -EINVAL;
}
}
return 0;
}
int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,
struct rxe_av *av, struct ib_ah_attr *attr)
{
memset(av, 0, sizeof(*av));
memcpy(&av->grh, &attr->grh, sizeof(attr->grh));
av->port_num = port_num;
return 0;
}
int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,
struct ib_ah_attr *attr)
{
memcpy(&attr->grh, &av->grh, sizeof(av->grh));
attr->port_num = av->port_num;
return 0;
}
int rxe_av_fill_ip_info(struct rxe_dev *rxe,
struct rxe_av *av,
struct ib_ah_attr *attr,
struct ib_gid_attr *sgid_attr,
union ib_gid *sgid)
{
rdma_gid2ip(&av->sgid_addr._sockaddr, sgid);
rdma_gid2ip(&av->dgid_addr._sockaddr, &attr->grh.dgid);
av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid);
return 0;
}
struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)
{
if (!pkt || !pkt->qp)
return NULL;
if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC)
return &pkt->qp->pri_av;
return (pkt->wqe) ? &pkt->wqe->av : NULL;
}

View file

@ -0,0 +1,734 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/skbuff.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
#include "rxe_task.h"
enum comp_state {
COMPST_GET_ACK,
COMPST_GET_WQE,
COMPST_COMP_WQE,
COMPST_COMP_ACK,
COMPST_CHECK_PSN,
COMPST_CHECK_ACK,
COMPST_READ,
COMPST_ATOMIC,
COMPST_WRITE_SEND,
COMPST_UPDATE_COMP,
COMPST_ERROR_RETRY,
COMPST_RNR_RETRY,
COMPST_ERROR,
COMPST_EXIT, /* We have an issue, and we want to rerun the completer */
COMPST_DONE, /* The completer finished successflly */
};
static char *comp_state_name[] = {
[COMPST_GET_ACK] = "GET ACK",
[COMPST_GET_WQE] = "GET WQE",
[COMPST_COMP_WQE] = "COMP WQE",
[COMPST_COMP_ACK] = "COMP ACK",
[COMPST_CHECK_PSN] = "CHECK PSN",
[COMPST_CHECK_ACK] = "CHECK ACK",
[COMPST_READ] = "READ",
[COMPST_ATOMIC] = "ATOMIC",
[COMPST_WRITE_SEND] = "WRITE/SEND",
[COMPST_UPDATE_COMP] = "UPDATE COMP",
[COMPST_ERROR_RETRY] = "ERROR RETRY",
[COMPST_RNR_RETRY] = "RNR RETRY",
[COMPST_ERROR] = "ERROR",
[COMPST_EXIT] = "EXIT",
[COMPST_DONE] = "DONE",
};
static unsigned long rnrnak_usec[32] = {
[IB_RNR_TIMER_655_36] = 655360,
[IB_RNR_TIMER_000_01] = 10,
[IB_RNR_TIMER_000_02] = 20,
[IB_RNR_TIMER_000_03] = 30,
[IB_RNR_TIMER_000_04] = 40,
[IB_RNR_TIMER_000_06] = 60,
[IB_RNR_TIMER_000_08] = 80,
[IB_RNR_TIMER_000_12] = 120,
[IB_RNR_TIMER_000_16] = 160,
[IB_RNR_TIMER_000_24] = 240,
[IB_RNR_TIMER_000_32] = 320,
[IB_RNR_TIMER_000_48] = 480,
[IB_RNR_TIMER_000_64] = 640,
[IB_RNR_TIMER_000_96] = 960,
[IB_RNR_TIMER_001_28] = 1280,
[IB_RNR_TIMER_001_92] = 1920,
[IB_RNR_TIMER_002_56] = 2560,
[IB_RNR_TIMER_003_84] = 3840,
[IB_RNR_TIMER_005_12] = 5120,
[IB_RNR_TIMER_007_68] = 7680,
[IB_RNR_TIMER_010_24] = 10240,
[IB_RNR_TIMER_015_36] = 15360,
[IB_RNR_TIMER_020_48] = 20480,
[IB_RNR_TIMER_030_72] = 30720,
[IB_RNR_TIMER_040_96] = 40960,
[IB_RNR_TIMER_061_44] = 61410,
[IB_RNR_TIMER_081_92] = 81920,
[IB_RNR_TIMER_122_88] = 122880,
[IB_RNR_TIMER_163_84] = 163840,
[IB_RNR_TIMER_245_76] = 245760,
[IB_RNR_TIMER_327_68] = 327680,
[IB_RNR_TIMER_491_52] = 491520,
};
static inline unsigned long rnrnak_jiffies(u8 timeout)
{
return max_t(unsigned long,
usecs_to_jiffies(rnrnak_usec[timeout]), 1);
}
static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
{
switch (opcode) {
case IB_WR_RDMA_WRITE: return IB_WC_RDMA_WRITE;
case IB_WR_RDMA_WRITE_WITH_IMM: return IB_WC_RDMA_WRITE;
case IB_WR_SEND: return IB_WC_SEND;
case IB_WR_SEND_WITH_IMM: return IB_WC_SEND;
case IB_WR_RDMA_READ: return IB_WC_RDMA_READ;
case IB_WR_ATOMIC_CMP_AND_SWP: return IB_WC_COMP_SWAP;
case IB_WR_ATOMIC_FETCH_AND_ADD: return IB_WC_FETCH_ADD;
case IB_WR_LSO: return IB_WC_LSO;
case IB_WR_SEND_WITH_INV: return IB_WC_SEND;
case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ;
case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV;
case IB_WR_REG_MR: return IB_WC_REG_MR;
default:
return 0xff;
}
}
void retransmit_timer(unsigned long data)
{
struct rxe_qp *qp = (struct rxe_qp *)data;
if (qp->valid) {
qp->comp.timeout = 1;
rxe_run_task(&qp->comp.task, 1);
}
}
void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp,
struct sk_buff *skb)
{
int must_sched;
skb_queue_tail(&qp->resp_pkts, skb);
must_sched = skb_queue_len(&qp->resp_pkts) > 1;
rxe_run_task(&qp->comp.task, must_sched);
}
static inline enum comp_state get_wqe(struct rxe_qp *qp,
struct rxe_pkt_info *pkt,
struct rxe_send_wqe **wqe_p)
{
struct rxe_send_wqe *wqe;
/* we come here whether or not we found a response packet to see if
* there are any posted WQEs
*/
wqe = queue_head(qp->sq.queue);
*wqe_p = wqe;
/* no WQE or requester has not started it yet */
if (!wqe || wqe->state == wqe_state_posted)
return pkt ? COMPST_DONE : COMPST_EXIT;
/* WQE does not require an ack */
if (wqe->state == wqe_state_done)
return COMPST_COMP_WQE;
/* WQE caused an error */
if (wqe->state == wqe_state_error)
return COMPST_ERROR;
/* we have a WQE, if we also have an ack check its PSN */
return pkt ? COMPST_CHECK_PSN : COMPST_EXIT;
}
static inline void reset_retry_counters(struct rxe_qp *qp)
{
qp->comp.retry_cnt = qp->attr.retry_cnt;
qp->comp.rnr_retry = qp->attr.rnr_retry;
}
static inline enum comp_state check_psn(struct rxe_qp *qp,
struct rxe_pkt_info *pkt,
struct rxe_send_wqe *wqe)
{
s32 diff;
/* check to see if response is past the oldest WQE. if it is, complete
* send/write or error read/atomic
*/
diff = psn_compare(pkt->psn, wqe->last_psn);
if (diff > 0) {
if (wqe->state == wqe_state_pending) {
if (wqe->mask & WR_ATOMIC_OR_READ_MASK)
return COMPST_ERROR_RETRY;
reset_retry_counters(qp);
return COMPST_COMP_WQE;
} else {
return COMPST_DONE;
}
}
/* compare response packet to expected response */
diff = psn_compare(pkt->psn, qp->comp.psn);
if (diff < 0) {
/* response is most likely a retried packet if it matches an
* uncompleted WQE go complete it else ignore it
*/
if (pkt->psn == wqe->last_psn)
return COMPST_COMP_ACK;
else
return COMPST_DONE;
} else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) {
return COMPST_ERROR_RETRY;
} else {
return COMPST_CHECK_ACK;
}
}
static inline enum comp_state check_ack(struct rxe_qp *qp,
struct rxe_pkt_info *pkt,
struct rxe_send_wqe *wqe)
{
unsigned int mask = pkt->mask;
u8 syn;
/* Check the sequence only */
switch (qp->comp.opcode) {
case -1:
/* Will catch all *_ONLY cases. */
if (!(mask & RXE_START_MASK))
return COMPST_ERROR;
break;
case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&
pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {
return COMPST_ERROR;
}
break;
default:
WARN_ON(1);
}
/* Check operation validity. */
switch (pkt->opcode) {
case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:
case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST:
case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY:
syn = aeth_syn(pkt);
if ((syn & AETH_TYPE_MASK) != AETH_ACK)
return COMPST_ERROR;
/* Fall through (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
* doesn't have an AETH)
*/
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
if (wqe->wr.opcode != IB_WR_RDMA_READ &&
wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) {
return COMPST_ERROR;
}
reset_retry_counters(qp);
return COMPST_READ;
case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE:
syn = aeth_syn(pkt);
if ((syn & AETH_TYPE_MASK) != AETH_ACK)
return COMPST_ERROR;
if (wqe->wr.opcode != IB_WR_ATOMIC_CMP_AND_SWP &&
wqe->wr.opcode != IB_WR_ATOMIC_FETCH_AND_ADD)
return COMPST_ERROR;
reset_retry_counters(qp);
return COMPST_ATOMIC;
case IB_OPCODE_RC_ACKNOWLEDGE:
syn = aeth_syn(pkt);
switch (syn & AETH_TYPE_MASK) {
case AETH_ACK:
reset_retry_counters(qp);
return COMPST_WRITE_SEND;
case AETH_RNR_NAK:
return COMPST_RNR_RETRY;
case AETH_NAK:
switch (syn) {
case AETH_NAK_PSN_SEQ_ERROR:
/* a nak implicitly acks all packets with psns
* before
*/
if (psn_compare(pkt->psn, qp->comp.psn) > 0) {
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
rxe_run_task(&qp->req.task, 1);
}
}
return COMPST_ERROR_RETRY;
case AETH_NAK_INVALID_REQ:
wqe->status = IB_WC_REM_INV_REQ_ERR;
return COMPST_ERROR;
case AETH_NAK_REM_ACC_ERR:
wqe->status = IB_WC_REM_ACCESS_ERR;
return COMPST_ERROR;
case AETH_NAK_REM_OP_ERR:
wqe->status = IB_WC_REM_OP_ERR;
return COMPST_ERROR;
default:
pr_warn("unexpected nak %x\n", syn);
wqe->status = IB_WC_REM_OP_ERR;
return COMPST_ERROR;
}
default:
return COMPST_ERROR;
}
break;
default:
pr_warn("unexpected opcode\n");
}
return COMPST_ERROR;
}
static inline enum comp_state do_read(struct rxe_qp *qp,
struct rxe_pkt_info *pkt,
struct rxe_send_wqe *wqe)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
int ret;
ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE,
&wqe->dma, payload_addr(pkt),
payload_size(pkt), to_mem_obj, NULL);
if (ret)
return COMPST_ERROR;
if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK))
return COMPST_COMP_ACK;
else
return COMPST_UPDATE_COMP;
}
static inline enum comp_state do_atomic(struct rxe_qp *qp,
struct rxe_pkt_info *pkt,
struct rxe_send_wqe *wqe)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
int ret;
u64 atomic_orig = atmack_orig(pkt);
ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE,
&wqe->dma, &atomic_orig,
sizeof(u64), to_mem_obj, NULL);
if (ret)
return COMPST_ERROR;
else
return COMPST_COMP_ACK;
}
static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_cqe *cqe)
{
memset(cqe, 0, sizeof(*cqe));
if (!qp->is_user) {
struct ib_wc *wc = &cqe->ibwc;
wc->wr_id = wqe->wr.wr_id;
wc->status = wqe->status;
wc->opcode = wr_to_wc_opcode(wqe->wr.opcode);
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
wc->wc_flags = IB_WC_WITH_IMM;
wc->byte_len = wqe->dma.length;
wc->qp = &qp->ibqp;
} else {
struct ib_uverbs_wc *uwc = &cqe->uibwc;
uwc->wr_id = wqe->wr.wr_id;
uwc->status = wqe->status;
uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode);
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
uwc->wc_flags = IB_WC_WITH_IMM;
uwc->byte_len = wqe->dma.length;
uwc->qp_num = qp->ibqp.qp_num;
}
}
static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
struct rxe_cqe cqe;
if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
(qp->req.state == QP_STATE_ERROR)) {
make_send_cqe(qp, wqe, &cqe);
rxe_cq_post(qp->scq, &cqe, 0);
}
advance_consumer(qp->sq.queue);
/*
* we completed something so let req run again
* if it is trying to fence
*/
if (qp->req.wait_fence) {
qp->req.wait_fence = 0;
rxe_run_task(&qp->req.task, 1);
}
}
static inline enum comp_state complete_ack(struct rxe_qp *qp,
struct rxe_pkt_info *pkt,
struct rxe_send_wqe *wqe)
{
unsigned long flags;
if (wqe->has_rd_atomic) {
wqe->has_rd_atomic = 0;
atomic_inc(&qp->req.rd_atomic);
if (qp->req.need_rd_atomic) {
qp->comp.timeout_retry = 0;
qp->req.need_rd_atomic = 0;
rxe_run_task(&qp->req.task, 1);
}
}
if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
/* state_lock used by requester & completer */
spin_lock_irqsave(&qp->state_lock, flags);
if ((qp->req.state == QP_STATE_DRAIN) &&
(qp->comp.psn == qp->req.psn)) {
qp->req.state = QP_STATE_DRAINED;
spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->ibqp.event_handler) {
struct ib_event ev;
ev.device = qp->ibqp.device;
ev.element.qp = &qp->ibqp;
ev.event = IB_EVENT_SQ_DRAINED;
qp->ibqp.event_handler(&ev,
qp->ibqp.qp_context);
}
} else {
spin_unlock_irqrestore(&qp->state_lock, flags);
}
}
do_complete(qp, wqe);
if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
return COMPST_UPDATE_COMP;
else
return COMPST_DONE;
}
static inline enum comp_state complete_wqe(struct rxe_qp *qp,
struct rxe_pkt_info *pkt,
struct rxe_send_wqe *wqe)
{
qp->comp.opcode = -1;
if (pkt) {
if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
rxe_run_task(&qp->req.task, 1);
}
}
do_complete(qp, wqe);
return COMPST_GET_WQE;
}
int rxe_completer(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
struct rxe_send_wqe *wqe = wqe;
struct sk_buff *skb = NULL;
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
if (!qp->valid) {
while ((skb = skb_dequeue(&qp->resp_pkts))) {
rxe_drop_ref(qp);
kfree_skb(skb);
}
skb = NULL;
pkt = NULL;
while (queue_head(qp->sq.queue))
advance_consumer(qp->sq.queue);
goto exit;
}
if (qp->req.state == QP_STATE_ERROR) {
while ((skb = skb_dequeue(&qp->resp_pkts))) {
rxe_drop_ref(qp);
kfree_skb(skb);
}
skb = NULL;
pkt = NULL;
while ((wqe = queue_head(qp->sq.queue))) {
wqe->status = IB_WC_WR_FLUSH_ERR;
do_complete(qp, wqe);
}
goto exit;
}
if (qp->req.state == QP_STATE_RESET) {
while ((skb = skb_dequeue(&qp->resp_pkts))) {
rxe_drop_ref(qp);
kfree_skb(skb);
}
skb = NULL;
pkt = NULL;
while (queue_head(qp->sq.queue))
advance_consumer(qp->sq.queue);
goto exit;
}
if (qp->comp.timeout) {
qp->comp.timeout_retry = 1;
qp->comp.timeout = 0;
} else {
qp->comp.timeout_retry = 0;
}
if (qp->req.need_retry)
goto exit;
state = COMPST_GET_ACK;
while (1) {
pr_debug("state = %s\n", comp_state_name[state]);
switch (state) {
case COMPST_GET_ACK:
skb = skb_dequeue(&qp->resp_pkts);
if (skb) {
pkt = SKB_TO_PKT(skb);
qp->comp.timeout_retry = 0;
}
state = COMPST_GET_WQE;
break;
case COMPST_GET_WQE:
state = get_wqe(qp, pkt, &wqe);
break;
case COMPST_CHECK_PSN:
state = check_psn(qp, pkt, wqe);
break;
case COMPST_CHECK_ACK:
state = check_ack(qp, pkt, wqe);
break;
case COMPST_READ:
state = do_read(qp, pkt, wqe);
break;
case COMPST_ATOMIC:
state = do_atomic(qp, pkt, wqe);
break;
case COMPST_WRITE_SEND:
if (wqe->state == wqe_state_pending &&
wqe->last_psn == pkt->psn)
state = COMPST_COMP_ACK;
else
state = COMPST_UPDATE_COMP;
break;
case COMPST_COMP_ACK:
state = complete_ack(qp, pkt, wqe);
break;
case COMPST_COMP_WQE:
state = complete_wqe(qp, pkt, wqe);
break;
case COMPST_UPDATE_COMP:
if (pkt->mask & RXE_END_MASK)
qp->comp.opcode = -1;
else
qp->comp.opcode = pkt->opcode;
if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
rxe_run_task(&qp->req.task, 1);
}
state = COMPST_DONE;
break;
case COMPST_DONE:
if (pkt) {
rxe_drop_ref(pkt->qp);
kfree_skb(skb);
}
goto done;
case COMPST_EXIT:
if (qp->comp.timeout_retry && wqe) {
state = COMPST_ERROR_RETRY;
break;
}
/* re reset the timeout counter if
* (1) QP is type RC
* (2) the QP is alive
* (3) there is a packet sent by the requester that
* might be acked (we still might get spurious
* timeouts but try to keep them as few as possible)
* (4) the timeout parameter is set
*/
if ((qp_type(qp) == IB_QPT_RC) &&
(qp->req.state == QP_STATE_READY) &&
(psn_compare(qp->req.psn, qp->comp.psn) > 0) &&
qp->qp_timeout_jiffies)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
goto exit;
case COMPST_ERROR_RETRY:
/* we come here if the retry timer fired and we did
* not receive a response packet. try to retry the send
* queue if that makes sense and the limits have not
* been exceeded. remember that some timeouts are
* spurious since we do not reset the timer but kick
* it down the road or let it expire
*/
/* there is nothing to retry in this case */
if (!wqe || (wqe->state == wqe_state_posted))
goto exit;
if (qp->comp.retry_cnt > 0) {
if (qp->comp.retry_cnt != 7)
qp->comp.retry_cnt--;
/* no point in retrying if we have already
* seen the last ack that the requester could
* have caused
*/
if (psn_compare(qp->req.psn,
qp->comp.psn) > 0) {
/* tell the requester to retry the
* send send queue next time around
*/
qp->req.need_retry = 1;
rxe_run_task(&qp->req.task, 1);
}
goto exit;
} else {
wqe->status = IB_WC_RETRY_EXC_ERR;
state = COMPST_ERROR;
}
break;
case COMPST_RNR_RETRY:
if (qp->comp.rnr_retry > 0) {
if (qp->comp.rnr_retry != 7)
qp->comp.rnr_retry--;
qp->req.need_retry = 1;
pr_debug("set rnr nak timer\n");
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
goto exit;
} else {
wqe->status = IB_WC_RNR_RETRY_EXC_ERR;
state = COMPST_ERROR;
}
break;
case COMPST_ERROR:
do_complete(qp, wqe);
rxe_qp_error(qp);
goto exit;
}
}
exit:
/* we come here if we are done with processing and want the task to
* exit from the loop calling us
*/
return -EAGAIN;
done:
/* we come here if we have processed a packet we want the task to call
* us again to see if there is anything else to do
*/
return 0;
}

View file

@ -0,0 +1,165 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
int cqe, int comp_vector, struct ib_udata *udata)
{
int count;
if (cqe <= 0) {
pr_warn("cqe(%d) <= 0\n", cqe);
goto err1;
}
if (cqe > rxe->attr.max_cqe) {
pr_warn("cqe(%d) > max_cqe(%d)\n",
cqe, rxe->attr.max_cqe);
goto err1;
}
if (cq) {
count = queue_count(cq->queue);
if (cqe < count) {
pr_warn("cqe(%d) < current # elements in queue (%d)",
cqe, count);
goto err1;
}
}
return 0;
err1:
return -EINVAL;
}
static void rxe_send_complete(unsigned long data)
{
struct rxe_cq *cq = (struct rxe_cq *)data;
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
int comp_vector, struct ib_ucontext *context,
struct ib_udata *udata)
{
int err;
cq->queue = rxe_queue_init(rxe, &cqe,
sizeof(struct rxe_cqe));
if (!cq->queue) {
pr_warn("unable to create cq\n");
return -ENOMEM;
}
err = do_mmap_info(rxe, udata, false, context, cq->queue->buf,
cq->queue->buf_size, &cq->queue->ip);
if (err) {
kvfree(cq->queue->buf);
kfree(cq->queue);
return err;
}
if (udata)
cq->is_user = 1;
tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq);
spin_lock_init(&cq->cq_lock);
cq->ibcq.cqe = cqe;
return 0;
}
int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, struct ib_udata *udata)
{
int err;
err = rxe_queue_resize(cq->queue, (unsigned int *)&cqe,
sizeof(struct rxe_cqe),
cq->queue->ip ? cq->queue->ip->context : NULL,
udata, NULL, &cq->cq_lock);
if (!err)
cq->ibcq.cqe = cqe;
return err;
}
int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
{
struct ib_event ev;
unsigned long flags;
spin_lock_irqsave(&cq->cq_lock, flags);
if (unlikely(queue_full(cq->queue))) {
spin_unlock_irqrestore(&cq->cq_lock, flags);
if (cq->ibcq.event_handler) {
ev.device = cq->ibcq.device;
ev.element.cq = &cq->ibcq;
ev.event = IB_EVENT_CQ_ERR;
cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
}
return -EBUSY;
}
memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe));
/* make sure all changes to the CQ are written before we update the
* producer pointer
*/
smp_wmb();
advance_producer(cq->queue);
spin_unlock_irqrestore(&cq->cq_lock, flags);
if ((cq->notify == IB_CQ_NEXT_COMP) ||
(cq->notify == IB_CQ_SOLICITED && solicited)) {
cq->notify = 0;
tasklet_schedule(&cq->comp_task);
}
return 0;
}
void rxe_cq_cleanup(void *arg)
{
struct rxe_cq *cq = arg;
if (cq->queue)
rxe_queue_cleanup(cq->queue);
}

View file

@ -0,0 +1,166 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "rxe.h"
#include "rxe_loc.h"
#define DMA_BAD_ADDER ((u64)0)
static int rxe_mapping_error(struct ib_device *dev, u64 dma_addr)
{
return dma_addr == DMA_BAD_ADDER;
}
static u64 rxe_dma_map_single(struct ib_device *dev,
void *cpu_addr, size_t size,
enum dma_data_direction direction)
{
WARN_ON(!valid_dma_direction(direction));
return (uintptr_t)cpu_addr;
}
static void rxe_dma_unmap_single(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
WARN_ON(!valid_dma_direction(direction));
}
static u64 rxe_dma_map_page(struct ib_device *dev,
struct page *page,
unsigned long offset,
size_t size, enum dma_data_direction direction)
{
u64 addr;
WARN_ON(!valid_dma_direction(direction));
if (offset + size > PAGE_SIZE) {
addr = DMA_BAD_ADDER;
goto done;
}
addr = (uintptr_t)page_address(page);
if (addr)
addr += offset;
done:
return addr;
}
static void rxe_dma_unmap_page(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
WARN_ON(!valid_dma_direction(direction));
}
static int rxe_map_sg(struct ib_device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction direction)
{
struct scatterlist *sg;
u64 addr;
int i;
int ret = nents;
WARN_ON(!valid_dma_direction(direction));
for_each_sg(sgl, sg, nents, i) {
addr = (uintptr_t)page_address(sg_page(sg));
if (!addr) {
ret = 0;
break;
}
sg->dma_address = addr + sg->offset;
#ifdef CONFIG_NEED_SG_DMA_LENGTH
sg->dma_length = sg->length;
#endif
}
return ret;
}
static void rxe_unmap_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
WARN_ON(!valid_dma_direction(direction));
}
static void rxe_sync_single_for_cpu(struct ib_device *dev,
u64 addr,
size_t size, enum dma_data_direction dir)
{
}
static void rxe_sync_single_for_device(struct ib_device *dev,
u64 addr,
size_t size, enum dma_data_direction dir)
{
}
static void *rxe_dma_alloc_coherent(struct ib_device *dev, size_t size,
u64 *dma_handle, gfp_t flag)
{
struct page *p;
void *addr = NULL;
p = alloc_pages(flag, get_order(size));
if (p)
addr = page_address(p);
if (dma_handle)
*dma_handle = (uintptr_t)addr;
return addr;
}
static void rxe_dma_free_coherent(struct ib_device *dev, size_t size,
void *cpu_addr, u64 dma_handle)
{
free_pages((unsigned long)cpu_addr, get_order(size));
}
struct ib_dma_mapping_ops rxe_dma_mapping_ops = {
.mapping_error = rxe_mapping_error,
.map_single = rxe_dma_map_single,
.unmap_single = rxe_dma_unmap_single,
.map_page = rxe_dma_map_page,
.unmap_page = rxe_dma_unmap_page,
.map_sg = rxe_map_sg,
.unmap_sg = rxe_unmap_sg,
.sync_single_for_cpu = rxe_sync_single_for_cpu,
.sync_single_for_device = rxe_sync_single_for_device,
.alloc_coherent = rxe_dma_alloc_coherent,
.free_coherent = rxe_dma_free_coherent
};

View file

@ -0,0 +1,952 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef RXE_HDR_H
#define RXE_HDR_H
/* extracted information about a packet carried in an sk_buff struct fits in
* the skbuff cb array. Must be at most 48 bytes. stored in control block of
* sk_buff for received packets.
*/
struct rxe_pkt_info {
struct rxe_dev *rxe; /* device that owns packet */
struct rxe_qp *qp; /* qp that owns packet */
struct rxe_send_wqe *wqe; /* send wqe */
u8 *hdr; /* points to bth */
u32 mask; /* useful info about pkt */
u32 psn; /* bth psn of packet */
u16 pkey_index; /* partition of pkt */
u16 paylen; /* length of bth - icrc */
u8 port_num; /* port pkt received on */
u8 opcode; /* bth opcode of packet */
u8 offset; /* bth offset from pkt->hdr */
};
/* Macros should be used only for received skb */
#define SKB_TO_PKT(skb) ((struct rxe_pkt_info *)(skb)->cb)
#define PKT_TO_SKB(pkt) container_of((void *)(pkt), struct sk_buff, cb)
/*
* IBA header types and methods
*
* Some of these are for reference and completeness only since
* rxe does not currently support RD transport
* most of this could be moved into IB core. ib_pack.h has
* part of this but is incomplete
*
* Header specific routines to insert/extract values to/from headers
* the routines that are named __hhh_(set_)fff() take a pointer to a
* hhh header and get(set) the fff field. The routines named
* hhh_(set_)fff take a packet info struct and find the
* header and field based on the opcode in the packet.
* Conversion to/from network byte order from cpu order is also done.
*/
#define RXE_ICRC_SIZE (4)
#define RXE_MAX_HDR_LENGTH (80)
/******************************************************************************
* Base Transport Header
******************************************************************************/
struct rxe_bth {
u8 opcode;
u8 flags;
__be16 pkey;
__be32 qpn;
__be32 apsn;
};
#define BTH_TVER (0)
#define BTH_DEF_PKEY (0xffff)
#define BTH_SE_MASK (0x80)
#define BTH_MIG_MASK (0x40)
#define BTH_PAD_MASK (0x30)
#define BTH_TVER_MASK (0x0f)
#define BTH_FECN_MASK (0x80000000)
#define BTH_BECN_MASK (0x40000000)
#define BTH_RESV6A_MASK (0x3f000000)
#define BTH_QPN_MASK (0x00ffffff)
#define BTH_ACK_MASK (0x80000000)
#define BTH_RESV7_MASK (0x7f000000)
#define BTH_PSN_MASK (0x00ffffff)
static inline u8 __bth_opcode(void *arg)
{
struct rxe_bth *bth = arg;
return bth->opcode;
}
static inline void __bth_set_opcode(void *arg, u8 opcode)
{
struct rxe_bth *bth = arg;
bth->opcode = opcode;
}
static inline u8 __bth_se(void *arg)
{
struct rxe_bth *bth = arg;
return 0 != (BTH_SE_MASK & bth->flags);
}
static inline void __bth_set_se(void *arg, int se)
{
struct rxe_bth *bth = arg;
if (se)
bth->flags |= BTH_SE_MASK;
else
bth->flags &= ~BTH_SE_MASK;
}
static inline u8 __bth_mig(void *arg)
{
struct rxe_bth *bth = arg;
return 0 != (BTH_MIG_MASK & bth->flags);
}
static inline void __bth_set_mig(void *arg, u8 mig)
{
struct rxe_bth *bth = arg;
if (mig)
bth->flags |= BTH_MIG_MASK;
else
bth->flags &= ~BTH_MIG_MASK;
}
static inline u8 __bth_pad(void *arg)
{
struct rxe_bth *bth = arg;
return (BTH_PAD_MASK & bth->flags) >> 4;
}
static inline void __bth_set_pad(void *arg, u8 pad)
{
struct rxe_bth *bth = arg;
bth->flags = (BTH_PAD_MASK & (pad << 4)) |
(~BTH_PAD_MASK & bth->flags);
}
static inline u8 __bth_tver(void *arg)
{
struct rxe_bth *bth = arg;
return BTH_TVER_MASK & bth->flags;
}
static inline void __bth_set_tver(void *arg, u8 tver)
{
struct rxe_bth *bth = arg;
bth->flags = (BTH_TVER_MASK & tver) |
(~BTH_TVER_MASK & bth->flags);
}
static inline u16 __bth_pkey(void *arg)
{
struct rxe_bth *bth = arg;
return be16_to_cpu(bth->pkey);
}
static inline void __bth_set_pkey(void *arg, u16 pkey)
{
struct rxe_bth *bth = arg;
bth->pkey = cpu_to_be16(pkey);
}
static inline u32 __bth_qpn(void *arg)
{
struct rxe_bth *bth = arg;
return BTH_QPN_MASK & be32_to_cpu(bth->qpn);
}
static inline void __bth_set_qpn(void *arg, u32 qpn)
{
struct rxe_bth *bth = arg;
u32 resvqpn = be32_to_cpu(bth->qpn);
bth->qpn = cpu_to_be32((BTH_QPN_MASK & qpn) |
(~BTH_QPN_MASK & resvqpn));
}
static inline int __bth_fecn(void *arg)
{
struct rxe_bth *bth = arg;
return 0 != (cpu_to_be32(BTH_FECN_MASK) & bth->qpn);
}
static inline void __bth_set_fecn(void *arg, int fecn)
{
struct rxe_bth *bth = arg;
if (fecn)
bth->qpn |= cpu_to_be32(BTH_FECN_MASK);
else
bth->qpn &= ~cpu_to_be32(BTH_FECN_MASK);
}
static inline int __bth_becn(void *arg)
{
struct rxe_bth *bth = arg;
return 0 != (cpu_to_be32(BTH_BECN_MASK) & bth->qpn);
}
static inline void __bth_set_becn(void *arg, int becn)
{
struct rxe_bth *bth = arg;
if (becn)
bth->qpn |= cpu_to_be32(BTH_BECN_MASK);
else
bth->qpn &= ~cpu_to_be32(BTH_BECN_MASK);
}
static inline u8 __bth_resv6a(void *arg)
{
struct rxe_bth *bth = arg;
return (BTH_RESV6A_MASK & be32_to_cpu(bth->qpn)) >> 24;
}
static inline void __bth_set_resv6a(void *arg)
{
struct rxe_bth *bth = arg;
bth->qpn = cpu_to_be32(~BTH_RESV6A_MASK);
}
static inline int __bth_ack(void *arg)
{
struct rxe_bth *bth = arg;
return 0 != (cpu_to_be32(BTH_ACK_MASK) & bth->apsn);
}
static inline void __bth_set_ack(void *arg, int ack)
{
struct rxe_bth *bth = arg;
if (ack)
bth->apsn |= cpu_to_be32(BTH_ACK_MASK);
else
bth->apsn &= ~cpu_to_be32(BTH_ACK_MASK);
}
static inline void __bth_set_resv7(void *arg)
{
struct rxe_bth *bth = arg;
bth->apsn &= ~cpu_to_be32(BTH_RESV7_MASK);
}
static inline u32 __bth_psn(void *arg)
{
struct rxe_bth *bth = arg;
return BTH_PSN_MASK & be32_to_cpu(bth->apsn);
}
static inline void __bth_set_psn(void *arg, u32 psn)
{
struct rxe_bth *bth = arg;
u32 apsn = be32_to_cpu(bth->apsn);
bth->apsn = cpu_to_be32((BTH_PSN_MASK & psn) |
(~BTH_PSN_MASK & apsn));
}
static inline u8 bth_opcode(struct rxe_pkt_info *pkt)
{
return __bth_opcode(pkt->hdr + pkt->offset);
}
static inline void bth_set_opcode(struct rxe_pkt_info *pkt, u8 opcode)
{
__bth_set_opcode(pkt->hdr + pkt->offset, opcode);
}
static inline u8 bth_se(struct rxe_pkt_info *pkt)
{
return __bth_se(pkt->hdr + pkt->offset);
}
static inline void bth_set_se(struct rxe_pkt_info *pkt, int se)
{
__bth_set_se(pkt->hdr + pkt->offset, se);
}
static inline u8 bth_mig(struct rxe_pkt_info *pkt)
{
return __bth_mig(pkt->hdr + pkt->offset);
}
static inline void bth_set_mig(struct rxe_pkt_info *pkt, u8 mig)
{
__bth_set_mig(pkt->hdr + pkt->offset, mig);
}
static inline u8 bth_pad(struct rxe_pkt_info *pkt)
{
return __bth_pad(pkt->hdr + pkt->offset);
}
static inline void bth_set_pad(struct rxe_pkt_info *pkt, u8 pad)
{
__bth_set_pad(pkt->hdr + pkt->offset, pad);
}
static inline u8 bth_tver(struct rxe_pkt_info *pkt)
{
return __bth_tver(pkt->hdr + pkt->offset);
}
static inline void bth_set_tver(struct rxe_pkt_info *pkt, u8 tver)
{
__bth_set_tver(pkt->hdr + pkt->offset, tver);
}
static inline u16 bth_pkey(struct rxe_pkt_info *pkt)
{
return __bth_pkey(pkt->hdr + pkt->offset);
}
static inline void bth_set_pkey(struct rxe_pkt_info *pkt, u16 pkey)
{
__bth_set_pkey(pkt->hdr + pkt->offset, pkey);
}
static inline u32 bth_qpn(struct rxe_pkt_info *pkt)
{
return __bth_qpn(pkt->hdr + pkt->offset);
}
static inline void bth_set_qpn(struct rxe_pkt_info *pkt, u32 qpn)
{
__bth_set_qpn(pkt->hdr + pkt->offset, qpn);
}
static inline int bth_fecn(struct rxe_pkt_info *pkt)
{
return __bth_fecn(pkt->hdr + pkt->offset);
}
static inline void bth_set_fecn(struct rxe_pkt_info *pkt, int fecn)
{
__bth_set_fecn(pkt->hdr + pkt->offset, fecn);
}
static inline int bth_becn(struct rxe_pkt_info *pkt)
{
return __bth_becn(pkt->hdr + pkt->offset);
}
static inline void bth_set_becn(struct rxe_pkt_info *pkt, int becn)
{
__bth_set_becn(pkt->hdr + pkt->offset, becn);
}
static inline u8 bth_resv6a(struct rxe_pkt_info *pkt)
{
return __bth_resv6a(pkt->hdr + pkt->offset);
}
static inline void bth_set_resv6a(struct rxe_pkt_info *pkt)
{
__bth_set_resv6a(pkt->hdr + pkt->offset);
}
static inline int bth_ack(struct rxe_pkt_info *pkt)
{
return __bth_ack(pkt->hdr + pkt->offset);
}
static inline void bth_set_ack(struct rxe_pkt_info *pkt, int ack)
{
__bth_set_ack(pkt->hdr + pkt->offset, ack);
}
static inline void bth_set_resv7(struct rxe_pkt_info *pkt)
{
__bth_set_resv7(pkt->hdr + pkt->offset);
}
static inline u32 bth_psn(struct rxe_pkt_info *pkt)
{
return __bth_psn(pkt->hdr + pkt->offset);
}
static inline void bth_set_psn(struct rxe_pkt_info *pkt, u32 psn)
{
__bth_set_psn(pkt->hdr + pkt->offset, psn);
}
static inline void bth_init(struct rxe_pkt_info *pkt, u8 opcode, int se,
int mig, int pad, u16 pkey, u32 qpn, int ack_req,
u32 psn)
{
struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr + pkt->offset);
bth->opcode = opcode;
bth->flags = (pad << 4) & BTH_PAD_MASK;
if (se)
bth->flags |= BTH_SE_MASK;
if (mig)
bth->flags |= BTH_MIG_MASK;
bth->pkey = cpu_to_be16(pkey);
bth->qpn = cpu_to_be32(qpn & BTH_QPN_MASK);
psn &= BTH_PSN_MASK;
if (ack_req)
psn |= BTH_ACK_MASK;
bth->apsn = cpu_to_be32(psn);
}
/******************************************************************************
* Reliable Datagram Extended Transport Header
******************************************************************************/
struct rxe_rdeth {
__be32 een;
};
#define RDETH_EEN_MASK (0x00ffffff)
static inline u8 __rdeth_een(void *arg)
{
struct rxe_rdeth *rdeth = arg;
return RDETH_EEN_MASK & be32_to_cpu(rdeth->een);
}
static inline void __rdeth_set_een(void *arg, u32 een)
{
struct rxe_rdeth *rdeth = arg;
rdeth->een = cpu_to_be32(RDETH_EEN_MASK & een);
}
static inline u8 rdeth_een(struct rxe_pkt_info *pkt)
{
return __rdeth_een(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_RDETH]);
}
static inline void rdeth_set_een(struct rxe_pkt_info *pkt, u32 een)
{
__rdeth_set_een(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_RDETH], een);
}
/******************************************************************************
* Datagram Extended Transport Header
******************************************************************************/
struct rxe_deth {
__be32 qkey;
__be32 sqp;
};
#define GSI_QKEY (0x80010000)
#define DETH_SQP_MASK (0x00ffffff)
static inline u32 __deth_qkey(void *arg)
{
struct rxe_deth *deth = arg;
return be32_to_cpu(deth->qkey);
}
static inline void __deth_set_qkey(void *arg, u32 qkey)
{
struct rxe_deth *deth = arg;
deth->qkey = cpu_to_be32(qkey);
}
static inline u32 __deth_sqp(void *arg)
{
struct rxe_deth *deth = arg;
return DETH_SQP_MASK & be32_to_cpu(deth->sqp);
}
static inline void __deth_set_sqp(void *arg, u32 sqp)
{
struct rxe_deth *deth = arg;
deth->sqp = cpu_to_be32(DETH_SQP_MASK & sqp);
}
static inline u32 deth_qkey(struct rxe_pkt_info *pkt)
{
return __deth_qkey(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_DETH]);
}
static inline void deth_set_qkey(struct rxe_pkt_info *pkt, u32 qkey)
{
__deth_set_qkey(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey);
}
static inline u32 deth_sqp(struct rxe_pkt_info *pkt)
{
return __deth_sqp(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_DETH]);
}
static inline void deth_set_sqp(struct rxe_pkt_info *pkt, u32 sqp)
{
__deth_set_sqp(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp);
}
/******************************************************************************
* RDMA Extended Transport Header
******************************************************************************/
struct rxe_reth {
__be64 va;
__be32 rkey;
__be32 len;
};
static inline u64 __reth_va(void *arg)
{
struct rxe_reth *reth = arg;
return be64_to_cpu(reth->va);
}
static inline void __reth_set_va(void *arg, u64 va)
{
struct rxe_reth *reth = arg;
reth->va = cpu_to_be64(va);
}
static inline u32 __reth_rkey(void *arg)
{
struct rxe_reth *reth = arg;
return be32_to_cpu(reth->rkey);
}
static inline void __reth_set_rkey(void *arg, u32 rkey)
{
struct rxe_reth *reth = arg;
reth->rkey = cpu_to_be32(rkey);
}
static inline u32 __reth_len(void *arg)
{
struct rxe_reth *reth = arg;
return be32_to_cpu(reth->len);
}
static inline void __reth_set_len(void *arg, u32 len)
{
struct rxe_reth *reth = arg;
reth->len = cpu_to_be32(len);
}
static inline u64 reth_va(struct rxe_pkt_info *pkt)
{
return __reth_va(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_RETH]);
}
static inline void reth_set_va(struct rxe_pkt_info *pkt, u64 va)
{
__reth_set_va(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_RETH], va);
}
static inline u32 reth_rkey(struct rxe_pkt_info *pkt)
{
return __reth_rkey(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_RETH]);
}
static inline void reth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)
{
__reth_set_rkey(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey);
}
static inline u32 reth_len(struct rxe_pkt_info *pkt)
{
return __reth_len(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_RETH]);
}
static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len)
{
__reth_set_len(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_RETH], len);
}
/******************************************************************************
* Atomic Extended Transport Header
******************************************************************************/
struct rxe_atmeth {
__be64 va;
__be32 rkey;
__be64 swap_add;
__be64 comp;
} __attribute__((__packed__));
static inline u64 __atmeth_va(void *arg)
{
struct rxe_atmeth *atmeth = arg;
return be64_to_cpu(atmeth->va);
}
static inline void __atmeth_set_va(void *arg, u64 va)
{
struct rxe_atmeth *atmeth = arg;
atmeth->va = cpu_to_be64(va);
}
static inline u32 __atmeth_rkey(void *arg)
{
struct rxe_atmeth *atmeth = arg;
return be32_to_cpu(atmeth->rkey);
}
static inline void __atmeth_set_rkey(void *arg, u32 rkey)
{
struct rxe_atmeth *atmeth = arg;
atmeth->rkey = cpu_to_be32(rkey);
}
static inline u64 __atmeth_swap_add(void *arg)
{
struct rxe_atmeth *atmeth = arg;
return be64_to_cpu(atmeth->swap_add);
}
static inline void __atmeth_set_swap_add(void *arg, u64 swap_add)
{
struct rxe_atmeth *atmeth = arg;
atmeth->swap_add = cpu_to_be64(swap_add);
}
static inline u64 __atmeth_comp(void *arg)
{
struct rxe_atmeth *atmeth = arg;
return be64_to_cpu(atmeth->comp);
}
static inline void __atmeth_set_comp(void *arg, u64 comp)
{
struct rxe_atmeth *atmeth = arg;
atmeth->comp = cpu_to_be64(comp);
}
static inline u64 atmeth_va(struct rxe_pkt_info *pkt)
{
return __atmeth_va(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
}
static inline void atmeth_set_va(struct rxe_pkt_info *pkt, u64 va)
{
__atmeth_set_va(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va);
}
static inline u32 atmeth_rkey(struct rxe_pkt_info *pkt)
{
return __atmeth_rkey(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
}
static inline void atmeth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)
{
__atmeth_set_rkey(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey);
}
static inline u64 atmeth_swap_add(struct rxe_pkt_info *pkt)
{
return __atmeth_swap_add(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
}
static inline void atmeth_set_swap_add(struct rxe_pkt_info *pkt, u64 swap_add)
{
__atmeth_set_swap_add(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add);
}
static inline u64 atmeth_comp(struct rxe_pkt_info *pkt)
{
return __atmeth_comp(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
}
static inline void atmeth_set_comp(struct rxe_pkt_info *pkt, u64 comp)
{
__atmeth_set_comp(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp);
}
/******************************************************************************
* Ack Extended Transport Header
******************************************************************************/
struct rxe_aeth {
__be32 smsn;
};
#define AETH_SYN_MASK (0xff000000)
#define AETH_MSN_MASK (0x00ffffff)
enum aeth_syndrome {
AETH_TYPE_MASK = 0xe0,
AETH_ACK = 0x00,
AETH_RNR_NAK = 0x20,
AETH_RSVD = 0x40,
AETH_NAK = 0x60,
AETH_ACK_UNLIMITED = 0x1f,
AETH_NAK_PSN_SEQ_ERROR = 0x60,
AETH_NAK_INVALID_REQ = 0x61,
AETH_NAK_REM_ACC_ERR = 0x62,
AETH_NAK_REM_OP_ERR = 0x63,
AETH_NAK_INV_RD_REQ = 0x64,
};
static inline u8 __aeth_syn(void *arg)
{
struct rxe_aeth *aeth = arg;
return (AETH_SYN_MASK & be32_to_cpu(aeth->smsn)) >> 24;
}
static inline void __aeth_set_syn(void *arg, u8 syn)
{
struct rxe_aeth *aeth = arg;
u32 smsn = be32_to_cpu(aeth->smsn);
aeth->smsn = cpu_to_be32((AETH_SYN_MASK & (syn << 24)) |
(~AETH_SYN_MASK & smsn));
}
static inline u32 __aeth_msn(void *arg)
{
struct rxe_aeth *aeth = arg;
return AETH_MSN_MASK & be32_to_cpu(aeth->smsn);
}
static inline void __aeth_set_msn(void *arg, u32 msn)
{
struct rxe_aeth *aeth = arg;
u32 smsn = be32_to_cpu(aeth->smsn);
aeth->smsn = cpu_to_be32((AETH_MSN_MASK & msn) |
(~AETH_MSN_MASK & smsn));
}
static inline u8 aeth_syn(struct rxe_pkt_info *pkt)
{
return __aeth_syn(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_AETH]);
}
static inline void aeth_set_syn(struct rxe_pkt_info *pkt, u8 syn)
{
__aeth_set_syn(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_AETH], syn);
}
static inline u32 aeth_msn(struct rxe_pkt_info *pkt)
{
return __aeth_msn(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_AETH]);
}
static inline void aeth_set_msn(struct rxe_pkt_info *pkt, u32 msn)
{
__aeth_set_msn(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_AETH], msn);
}
/******************************************************************************
* Atomic Ack Extended Transport Header
******************************************************************************/
struct rxe_atmack {
__be64 orig;
};
static inline u64 __atmack_orig(void *arg)
{
struct rxe_atmack *atmack = arg;
return be64_to_cpu(atmack->orig);
}
static inline void __atmack_set_orig(void *arg, u64 orig)
{
struct rxe_atmack *atmack = arg;
atmack->orig = cpu_to_be64(orig);
}
static inline u64 atmack_orig(struct rxe_pkt_info *pkt)
{
return __atmack_orig(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_ATMACK]);
}
static inline void atmack_set_orig(struct rxe_pkt_info *pkt, u64 orig)
{
__atmack_set_orig(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig);
}
/******************************************************************************
* Immediate Extended Transport Header
******************************************************************************/
struct rxe_immdt {
__be32 imm;
};
static inline __be32 __immdt_imm(void *arg)
{
struct rxe_immdt *immdt = arg;
return immdt->imm;
}
static inline void __immdt_set_imm(void *arg, __be32 imm)
{
struct rxe_immdt *immdt = arg;
immdt->imm = imm;
}
static inline __be32 immdt_imm(struct rxe_pkt_info *pkt)
{
return __immdt_imm(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_IMMDT]);
}
static inline void immdt_set_imm(struct rxe_pkt_info *pkt, __be32 imm)
{
__immdt_set_imm(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm);
}
/******************************************************************************
* Invalidate Extended Transport Header
******************************************************************************/
struct rxe_ieth {
__be32 rkey;
};
static inline u32 __ieth_rkey(void *arg)
{
struct rxe_ieth *ieth = arg;
return be32_to_cpu(ieth->rkey);
}
static inline void __ieth_set_rkey(void *arg, u32 rkey)
{
struct rxe_ieth *ieth = arg;
ieth->rkey = cpu_to_be32(rkey);
}
static inline u32 ieth_rkey(struct rxe_pkt_info *pkt)
{
return __ieth_rkey(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_IETH]);
}
static inline void ieth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)
{
__ieth_set_rkey(pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey);
}
enum rxe_hdr_length {
RXE_BTH_BYTES = sizeof(struct rxe_bth),
RXE_DETH_BYTES = sizeof(struct rxe_deth),
RXE_IMMDT_BYTES = sizeof(struct rxe_immdt),
RXE_RETH_BYTES = sizeof(struct rxe_reth),
RXE_AETH_BYTES = sizeof(struct rxe_aeth),
RXE_ATMACK_BYTES = sizeof(struct rxe_atmack),
RXE_ATMETH_BYTES = sizeof(struct rxe_atmeth),
RXE_IETH_BYTES = sizeof(struct rxe_ieth),
RXE_RDETH_BYTES = sizeof(struct rxe_rdeth),
};
static inline size_t header_size(struct rxe_pkt_info *pkt)
{
return pkt->offset + rxe_opcode[pkt->opcode].length;
}
static inline void *payload_addr(struct rxe_pkt_info *pkt)
{
return pkt->hdr + pkt->offset
+ rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD];
}
static inline size_t payload_size(struct rxe_pkt_info *pkt)
{
return pkt->paylen - rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]
- bth_pad(pkt) - RXE_ICRC_SIZE;
}
#endif /* RXE_HDR_H */

View file

@ -0,0 +1,96 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "rxe.h"
#include "rxe_loc.h"
/* Compute a partial ICRC for all the IB transport headers. */
u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb)
{
unsigned int bth_offset = 0;
struct iphdr *ip4h = NULL;
struct ipv6hdr *ip6h = NULL;
struct udphdr *udph;
struct rxe_bth *bth;
int crc;
int length;
int hdr_size = sizeof(struct udphdr) +
(skb->protocol == htons(ETH_P_IP) ?
sizeof(struct iphdr) : sizeof(struct ipv6hdr));
/* pseudo header buffer size is calculate using ipv6 header size since
* it is bigger than ipv4
*/
u8 pshdr[sizeof(struct udphdr) +
sizeof(struct ipv6hdr) +
RXE_BTH_BYTES];
/* This seed is the result of computing a CRC with a seed of
* 0xfffffff and 8 bytes of 0xff representing a masked LRH.
*/
crc = 0xdebb20e3;
if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */
memcpy(pshdr, ip_hdr(skb), hdr_size);
ip4h = (struct iphdr *)pshdr;
udph = (struct udphdr *)(ip4h + 1);
ip4h->ttl = 0xff;
ip4h->check = CSUM_MANGLED_0;
ip4h->tos = 0xff;
} else { /* IPv6 */
memcpy(pshdr, ipv6_hdr(skb), hdr_size);
ip6h = (struct ipv6hdr *)pshdr;
udph = (struct udphdr *)(ip6h + 1);
memset(ip6h->flow_lbl, 0xff, sizeof(ip6h->flow_lbl));
ip6h->priority = 0xf;
ip6h->hop_limit = 0xff;
}
udph->check = CSUM_MANGLED_0;
bth_offset += hdr_size;
memcpy(&pshdr[bth_offset], pkt->hdr, RXE_BTH_BYTES);
bth = (struct rxe_bth *)&pshdr[bth_offset];
/* exclude bth.resv8a */
bth->qpn |= cpu_to_be32(~BTH_QPN_MASK);
length = hdr_size + RXE_BTH_BYTES;
crc = crc32_le(crc, pshdr, length);
/* And finish to compute the CRC on the remainder of the headers. */
crc = crc32_le(crc, pkt->hdr + RXE_BTH_BYTES,
rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES);
return crc;
}

View file

@ -0,0 +1,286 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef RXE_LOC_H
#define RXE_LOC_H
/* rxe_av.c */
int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr);
int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,
struct rxe_av *av, struct ib_ah_attr *attr);
int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,
struct ib_ah_attr *attr);
int rxe_av_fill_ip_info(struct rxe_dev *rxe,
struct rxe_av *av,
struct ib_ah_attr *attr,
struct ib_gid_attr *sgid_attr,
union ib_gid *sgid);
struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt);
/* rxe_cq.c */
int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
int cqe, int comp_vector, struct ib_udata *udata);
int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
int comp_vector, struct ib_ucontext *context,
struct ib_udata *udata);
int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata);
int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
void rxe_cq_cleanup(void *arg);
/* rxe_mcast.c */
int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
struct rxe_mc_grp **grp_p);
int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
struct rxe_mc_grp *grp);
int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
union ib_gid *mgid);
void rxe_drop_all_mcast_groups(struct rxe_qp *qp);
void rxe_mc_cleanup(void *arg);
/* rxe_mmap.c */
struct rxe_mmap_info {
struct list_head pending_mmaps;
struct ib_ucontext *context;
struct kref ref;
void *obj;
struct mminfo info;
};
void rxe_mmap_release(struct kref *ref);
struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev,
u32 size,
struct ib_ucontext *context,
void *obj);
int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
/* rxe_mr.c */
enum copy_direction {
to_mem_obj,
from_mem_obj,
};
int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd,
int access, struct rxe_mem *mem);
int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
u64 length, u64 iova, int access, struct ib_udata *udata,
struct rxe_mem *mr);
int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd,
int max_pages, struct rxe_mem *mem);
int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr,
int length, enum copy_direction dir, u32 *crcp);
int copy_data(struct rxe_dev *rxe, struct rxe_pd *pd, int access,
struct rxe_dma_info *dma, void *addr, int length,
enum copy_direction dir, u32 *crcp);
void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length);
enum lookup_type {
lookup_local,
lookup_remote,
};
struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
enum lookup_type type);
int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length);
int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
u64 *page, int num_pages, u64 iova);
void rxe_mem_cleanup(void *arg);
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
/* rxe_qp.c */
int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init);
int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct ib_qp_init_attr *init, struct ib_udata *udata,
struct ib_pd *ibpd);
int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init);
int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_attr *attr, int mask);
int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr,
int mask, struct ib_udata *udata);
int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask);
void rxe_qp_error(struct rxe_qp *qp);
void rxe_qp_destroy(struct rxe_qp *qp);
void rxe_qp_cleanup(void *arg);
static inline int qp_num(struct rxe_qp *qp)
{
return qp->ibqp.qp_num;
}
static inline enum ib_qp_type qp_type(struct rxe_qp *qp)
{
return qp->ibqp.qp_type;
}
static inline enum ib_qp_state qp_state(struct rxe_qp *qp)
{
return qp->attr.qp_state;
}
static inline int qp_mtu(struct rxe_qp *qp)
{
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
return qp->attr.path_mtu;
else
return RXE_PORT_MAX_MTU;
}
static inline int rcv_wqe_size(int max_sge)
{
return sizeof(struct rxe_recv_wqe) +
max_sge * sizeof(struct ib_sge);
}
void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res);
static inline void rxe_advance_resp_resource(struct rxe_qp *qp)
{
qp->resp.res_head++;
if (unlikely(qp->resp.res_head == qp->attr.max_rd_atomic))
qp->resp.res_head = 0;
}
void retransmit_timer(unsigned long data);
void rnr_nak_timer(unsigned long data);
void dump_qp(struct rxe_qp *qp);
/* rxe_srq.c */
#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT)
int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask);
int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_init_attr *init,
struct ib_ucontext *context, struct ib_udata *udata);
int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
struct ib_udata *udata);
extern struct ib_dma_mapping_ops rxe_dma_mapping_ops;
void rxe_release(struct kref *kref);
int rxe_completer(void *arg);
int rxe_requester(void *arg);
int rxe_responder(void *arg);
u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb);
void rxe_resp_queue_pkt(struct rxe_dev *rxe,
struct rxe_qp *qp, struct sk_buff *skb);
void rxe_comp_queue_pkt(struct rxe_dev *rxe,
struct rxe_qp *qp, struct sk_buff *skb);
static inline unsigned wr_opcode_mask(int opcode, struct rxe_qp *qp)
{
return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
}
static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
struct rxe_pkt_info *pkt, struct sk_buff *skb)
{
int err;
int is_request = pkt->mask & RXE_REQ_MASK;
if ((is_request && (qp->req.state != QP_STATE_READY)) ||
(!is_request && (qp->resp.state != QP_STATE_READY))) {
pr_info("Packet dropped. QP is not in ready state\n");
goto drop;
}
if (pkt->mask & RXE_LOOPBACK_MASK) {
memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
err = rxe->ifc_ops->loopback(skb);
} else {
err = rxe->ifc_ops->send(rxe, pkt, skb);
}
if (err) {
rxe->xmit_errors++;
return err;
}
atomic_inc(&qp->skb_out);
if ((qp_type(qp) != IB_QPT_RC) &&
(pkt->mask & RXE_END_MASK)) {
pkt->wqe->state = wqe_state_done;
rxe_run_task(&qp->comp.task, 1);
}
goto done;
drop:
kfree_skb(skb);
err = 0;
done:
return err;
}
#endif /* RXE_LOC_H */

View file

@ -0,0 +1,190 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "rxe.h"
#include "rxe_loc.h"
int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
struct rxe_mc_grp **grp_p)
{
int err;
struct rxe_mc_grp *grp;
if (rxe->attr.max_mcast_qp_attach == 0) {
err = -EINVAL;
goto err1;
}
grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
if (grp)
goto done;
grp = rxe_alloc(&rxe->mc_grp_pool);
if (!grp) {
err = -ENOMEM;
goto err1;
}
INIT_LIST_HEAD(&grp->qp_list);
spin_lock_init(&grp->mcg_lock);
grp->rxe = rxe;
rxe_add_key(grp, mgid);
err = rxe->ifc_ops->mcast_add(rxe, mgid);
if (err)
goto err2;
done:
*grp_p = grp;
return 0;
err2:
rxe_drop_ref(grp);
err1:
return err;
}
int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
struct rxe_mc_grp *grp)
{
int err;
struct rxe_mc_elem *elem;
/* check to see of the qp is already a member of the group */
spin_lock_bh(&qp->grp_lock);
spin_lock_bh(&grp->mcg_lock);
list_for_each_entry(elem, &grp->qp_list, qp_list) {
if (elem->qp == qp) {
err = 0;
goto out;
}
}
if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) {
err = -ENOMEM;
goto out;
}
elem = rxe_alloc(&rxe->mc_elem_pool);
if (!elem) {
err = -ENOMEM;
goto out;
}
/* each qp holds a ref on the grp */
rxe_add_ref(grp);
grp->num_qp++;
elem->qp = qp;
elem->grp = grp;
list_add(&elem->qp_list, &grp->qp_list);
list_add(&elem->grp_list, &qp->grp_list);
err = 0;
out:
spin_unlock_bh(&grp->mcg_lock);
spin_unlock_bh(&qp->grp_lock);
return err;
}
int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
union ib_gid *mgid)
{
struct rxe_mc_grp *grp;
struct rxe_mc_elem *elem, *tmp;
grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
if (!grp)
goto err1;
spin_lock_bh(&qp->grp_lock);
spin_lock_bh(&grp->mcg_lock);
list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) {
if (elem->qp == qp) {
list_del(&elem->qp_list);
list_del(&elem->grp_list);
grp->num_qp--;
spin_unlock_bh(&grp->mcg_lock);
spin_unlock_bh(&qp->grp_lock);
rxe_drop_ref(elem);
rxe_drop_ref(grp); /* ref held by QP */
rxe_drop_ref(grp); /* ref from get_key */
return 0;
}
}
spin_unlock_bh(&grp->mcg_lock);
spin_unlock_bh(&qp->grp_lock);
rxe_drop_ref(grp); /* ref from get_key */
err1:
return -EINVAL;
}
void rxe_drop_all_mcast_groups(struct rxe_qp *qp)
{
struct rxe_mc_grp *grp;
struct rxe_mc_elem *elem;
while (1) {
spin_lock_bh(&qp->grp_lock);
if (list_empty(&qp->grp_list)) {
spin_unlock_bh(&qp->grp_lock);
break;
}
elem = list_first_entry(&qp->grp_list, struct rxe_mc_elem,
grp_list);
list_del(&elem->grp_list);
spin_unlock_bh(&qp->grp_lock);
grp = elem->grp;
spin_lock_bh(&grp->mcg_lock);
list_del(&elem->qp_list);
grp->num_qp--;
spin_unlock_bh(&grp->mcg_lock);
rxe_drop_ref(grp);
rxe_drop_ref(elem);
}
}
void rxe_mc_cleanup(void *arg)
{
struct rxe_mc_grp *grp = arg;
struct rxe_dev *rxe = grp->rxe;
rxe_drop_key(grp);
rxe->ifc_ops->mcast_delete(rxe, &grp->mgid);
}

View file

@ -0,0 +1,173 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <asm/pgtable.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
void rxe_mmap_release(struct kref *ref)
{
struct rxe_mmap_info *ip = container_of(ref,
struct rxe_mmap_info, ref);
struct rxe_dev *rxe = to_rdev(ip->context->device);
spin_lock_bh(&rxe->pending_lock);
if (!list_empty(&ip->pending_mmaps))
list_del(&ip->pending_mmaps);
spin_unlock_bh(&rxe->pending_lock);
vfree(ip->obj); /* buf */
kfree(ip);
}
/*
* open and close keep track of how many times the memory region is mapped,
* to avoid releasing it.
*/
static void rxe_vma_open(struct vm_area_struct *vma)
{
struct rxe_mmap_info *ip = vma->vm_private_data;
kref_get(&ip->ref);
}
static void rxe_vma_close(struct vm_area_struct *vma)
{
struct rxe_mmap_info *ip = vma->vm_private_data;
kref_put(&ip->ref, rxe_mmap_release);
}
static struct vm_operations_struct rxe_vm_ops = {
.open = rxe_vma_open,
.close = rxe_vma_close,
};
/**
* rxe_mmap - create a new mmap region
* @context: the IB user context of the process making the mmap() call
* @vma: the VMA to be initialized
* Return zero if the mmap is OK. Otherwise, return an errno.
*/
int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct rxe_dev *rxe = to_rdev(context->device);
unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
unsigned long size = vma->vm_end - vma->vm_start;
struct rxe_mmap_info *ip, *pp;
int ret;
/*
* Search the device's list of objects waiting for a mmap call.
* Normally, this list is very short since a call to create a
* CQ, QP, or SRQ is soon followed by a call to mmap().
*/
spin_lock_bh(&rxe->pending_lock);
list_for_each_entry_safe(ip, pp, &rxe->pending_mmaps, pending_mmaps) {
if (context != ip->context || (__u64)offset != ip->info.offset)
continue;
/* Don't allow a mmap larger than the object. */
if (size > ip->info.size) {
pr_err("mmap region is larger than the object!\n");
spin_unlock_bh(&rxe->pending_lock);
ret = -EINVAL;
goto done;
}
goto found_it;
}
pr_warn("unable to find pending mmap info\n");
spin_unlock_bh(&rxe->pending_lock);
ret = -EINVAL;
goto done;
found_it:
list_del_init(&ip->pending_mmaps);
spin_unlock_bh(&rxe->pending_lock);
ret = remap_vmalloc_range(vma, ip->obj, 0);
if (ret) {
pr_err("rxe: err %d from remap_vmalloc_range\n", ret);
goto done;
}
vma->vm_ops = &rxe_vm_ops;
vma->vm_private_data = ip;
rxe_vma_open(vma);
done:
return ret;
}
/*
* Allocate information for rxe_mmap
*/
struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe,
u32 size,
struct ib_ucontext *context,
void *obj)
{
struct rxe_mmap_info *ip;
ip = kmalloc(sizeof(*ip), GFP_KERNEL);
if (!ip)
return NULL;
size = PAGE_ALIGN(size);
spin_lock_bh(&rxe->mmap_offset_lock);
if (rxe->mmap_offset == 0)
rxe->mmap_offset = PAGE_SIZE;
ip->info.offset = rxe->mmap_offset;
rxe->mmap_offset += size;
spin_unlock_bh(&rxe->mmap_offset_lock);
INIT_LIST_HEAD(&ip->pending_mmaps);
ip->info.size = size;
ip->context = context;
ip->obj = obj;
kref_init(&ip->ref);
return ip;
}

View file

@ -0,0 +1,643 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "rxe.h"
#include "rxe_loc.h"
/*
* lfsr (linear feedback shift register) with period 255
*/
static u8 rxe_get_key(void)
{
static unsigned key = 1;
key = key << 1;
key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
key &= 0xff;
return key;
}
int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length)
{
switch (mem->type) {
case RXE_MEM_TYPE_DMA:
return 0;
case RXE_MEM_TYPE_MR:
case RXE_MEM_TYPE_FMR:
return ((iova < mem->iova) ||
((iova + length) > (mem->iova + mem->length))) ?
-EFAULT : 0;
default:
return -EFAULT;
}
}
#define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
| IB_ACCESS_REMOTE_WRITE \
| IB_ACCESS_REMOTE_ATOMIC)
static void rxe_mem_init(int access, struct rxe_mem *mem)
{
u32 lkey = mem->pelem.index << 8 | rxe_get_key();
u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
if (mem->pelem.pool->type == RXE_TYPE_MR) {
mem->ibmr.lkey = lkey;
mem->ibmr.rkey = rkey;
}
mem->lkey = lkey;
mem->rkey = rkey;
mem->state = RXE_MEM_STATE_INVALID;
mem->type = RXE_MEM_TYPE_NONE;
mem->map_shift = ilog2(RXE_BUF_PER_MAP);
}
void rxe_mem_cleanup(void *arg)
{
struct rxe_mem *mem = arg;
int i;
if (mem->umem)
ib_umem_release(mem->umem);
if (mem->map) {
for (i = 0; i < mem->num_map; i++)
kfree(mem->map[i]);
kfree(mem->map);
}
}
static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf)
{
int i;
int num_map;
struct rxe_map **map = mem->map;
num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
if (!mem->map)
goto err1;
for (i = 0; i < num_map; i++) {
mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
if (!mem->map[i])
goto err2;
}
WARN_ON(!is_power_of_2(RXE_BUF_PER_MAP));
mem->map_shift = ilog2(RXE_BUF_PER_MAP);
mem->map_mask = RXE_BUF_PER_MAP - 1;
mem->num_buf = num_buf;
mem->num_map = num_map;
mem->max_buf = num_map * RXE_BUF_PER_MAP;
return 0;
err2:
for (i--; i >= 0; i--)
kfree(mem->map[i]);
kfree(mem->map);
err1:
return -ENOMEM;
}
int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd,
int access, struct rxe_mem *mem)
{
rxe_mem_init(access, mem);
mem->pd = pd;
mem->access = access;
mem->state = RXE_MEM_STATE_VALID;
mem->type = RXE_MEM_TYPE_DMA;
return 0;
}
int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
u64 length, u64 iova, int access, struct ib_udata *udata,
struct rxe_mem *mem)
{
int entry;
struct rxe_map **map;
struct rxe_phys_buf *buf = NULL;
struct ib_umem *umem;
struct scatterlist *sg;
int num_buf;
void *vaddr;
int err;
umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0);
if (IS_ERR(umem)) {
pr_warn("err %d from rxe_umem_get\n",
(int)PTR_ERR(umem));
err = -EINVAL;
goto err1;
}
mem->umem = umem;
num_buf = umem->nmap;
rxe_mem_init(access, mem);
err = rxe_mem_alloc(rxe, mem, num_buf);
if (err) {
pr_warn("err %d from rxe_mem_alloc\n", err);
ib_umem_release(umem);
goto err1;
}
WARN_ON(!is_power_of_2(umem->page_size));
mem->page_shift = ilog2(umem->page_size);
mem->page_mask = umem->page_size - 1;
num_buf = 0;
map = mem->map;
if (length > 0) {
buf = map[0]->buf;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
vaddr = page_address(sg_page(sg));
if (!vaddr) {
pr_warn("null vaddr\n");
err = -ENOMEM;
goto err1;
}
buf->addr = (uintptr_t)vaddr;
buf->size = umem->page_size;
num_buf++;
buf++;
if (num_buf >= RXE_BUF_PER_MAP) {
map++;
buf = map[0]->buf;
num_buf = 0;
}
}
}
mem->pd = pd;
mem->umem = umem;
mem->access = access;
mem->length = length;
mem->iova = iova;
mem->va = start;
mem->offset = ib_umem_offset(umem);
mem->state = RXE_MEM_STATE_VALID;
mem->type = RXE_MEM_TYPE_MR;
return 0;
err1:
return err;
}
int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd,
int max_pages, struct rxe_mem *mem)
{
int err;
rxe_mem_init(0, mem);
/* In fastreg, we also set the rkey */
mem->ibmr.rkey = mem->ibmr.lkey;
err = rxe_mem_alloc(rxe, mem, max_pages);
if (err)
goto err1;
mem->pd = pd;
mem->max_buf = max_pages;
mem->state = RXE_MEM_STATE_FREE;
mem->type = RXE_MEM_TYPE_MR;
return 0;
err1:
return err;
}
static void lookup_iova(
struct rxe_mem *mem,
u64 iova,
int *m_out,
int *n_out,
size_t *offset_out)
{
size_t offset = iova - mem->iova + mem->offset;
int map_index;
int buf_index;
u64 length;
if (likely(mem->page_shift)) {
*offset_out = offset & mem->page_mask;
offset >>= mem->page_shift;
*n_out = offset & mem->map_mask;
*m_out = offset >> mem->map_shift;
} else {
map_index = 0;
buf_index = 0;
length = mem->map[map_index]->buf[buf_index].size;
while (offset >= length) {
offset -= length;
buf_index++;
if (buf_index == RXE_BUF_PER_MAP) {
map_index++;
buf_index = 0;
}
length = mem->map[map_index]->buf[buf_index].size;
}
*m_out = map_index;
*n_out = buf_index;
*offset_out = offset;
}
}
void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length)
{
size_t offset;
int m, n;
void *addr;
if (mem->state != RXE_MEM_STATE_VALID) {
pr_warn("mem not in valid state\n");
addr = NULL;
goto out;
}
if (!mem->map) {
addr = (void *)(uintptr_t)iova;
goto out;
}
if (mem_check_range(mem, iova, length)) {
pr_warn("range violation\n");
addr = NULL;
goto out;
}
lookup_iova(mem, iova, &m, &n, &offset);
if (offset + length > mem->map[m]->buf[n].size) {
pr_warn("crosses page boundary\n");
addr = NULL;
goto out;
}
addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset;
out:
return addr;
}
/* copy data from a range (vaddr, vaddr+length-1) to or from
* a mem object starting at iova. Compute incremental value of
* crc32 if crcp is not zero. caller must hold a reference to mem
*/
int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
enum copy_direction dir, u32 *crcp)
{
int err;
int bytes;
u8 *va;
struct rxe_map **map;
struct rxe_phys_buf *buf;
int m;
int i;
size_t offset;
u32 crc = crcp ? (*crcp) : 0;
if (mem->type == RXE_MEM_TYPE_DMA) {
u8 *src, *dest;
src = (dir == to_mem_obj) ?
addr : ((void *)(uintptr_t)iova);
dest = (dir == to_mem_obj) ?
((void *)(uintptr_t)iova) : addr;
if (crcp)
*crcp = crc32_le(*crcp, src, length);
memcpy(dest, src, length);
return 0;
}
WARN_ON(!mem->map);
err = mem_check_range(mem, iova, length);
if (err) {
err = -EFAULT;
goto err1;
}
lookup_iova(mem, iova, &m, &i, &offset);
map = mem->map + m;
buf = map[0]->buf + i;
while (length > 0) {
u8 *src, *dest;
va = (u8 *)(uintptr_t)buf->addr + offset;
src = (dir == to_mem_obj) ? addr : va;
dest = (dir == to_mem_obj) ? va : addr;
bytes = buf->size - offset;
if (bytes > length)
bytes = length;
if (crcp)
crc = crc32_le(crc, src, bytes);
memcpy(dest, src, bytes);
length -= bytes;
addr += bytes;
offset = 0;
buf++;
i++;
if (i == RXE_BUF_PER_MAP) {
i = 0;
map++;
buf = map[0]->buf;
}
}
if (crcp)
*crcp = crc;
return 0;
err1:
return err;
}
/* copy data in or out of a wqe, i.e. sg list
* under the control of a dma descriptor
*/
int copy_data(
struct rxe_dev *rxe,
struct rxe_pd *pd,
int access,
struct rxe_dma_info *dma,
void *addr,
int length,
enum copy_direction dir,
u32 *crcp)
{
int bytes;
struct rxe_sge *sge = &dma->sge[dma->cur_sge];
int offset = dma->sge_offset;
int resid = dma->resid;
struct rxe_mem *mem = NULL;
u64 iova;
int err;
if (length == 0)
return 0;
if (length > resid) {
err = -EINVAL;
goto err2;
}
if (sge->length && (offset < sge->length)) {
mem = lookup_mem(pd, access, sge->lkey, lookup_local);
if (!mem) {
err = -EINVAL;
goto err1;
}
}
while (length > 0) {
bytes = length;
if (offset >= sge->length) {
if (mem) {
rxe_drop_ref(mem);
mem = NULL;
}
sge++;
dma->cur_sge++;
offset = 0;
if (dma->cur_sge >= dma->num_sge) {
err = -ENOSPC;
goto err2;
}
if (sge->length) {
mem = lookup_mem(pd, access, sge->lkey,
lookup_local);
if (!mem) {
err = -EINVAL;
goto err1;
}
} else {
continue;
}
}
if (bytes > sge->length - offset)
bytes = sge->length - offset;
if (bytes > 0) {
iova = sge->addr + offset;
err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp);
if (err)
goto err2;
offset += bytes;
resid -= bytes;
length -= bytes;
addr += bytes;
}
}
dma->sge_offset = offset;
dma->resid = resid;
if (mem)
rxe_drop_ref(mem);
return 0;
err2:
if (mem)
rxe_drop_ref(mem);
err1:
return err;
}
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
{
struct rxe_sge *sge = &dma->sge[dma->cur_sge];
int offset = dma->sge_offset;
int resid = dma->resid;
while (length) {
unsigned int bytes;
if (offset >= sge->length) {
sge++;
dma->cur_sge++;
offset = 0;
if (dma->cur_sge >= dma->num_sge)
return -ENOSPC;
}
bytes = length;
if (bytes > sge->length - offset)
bytes = sge->length - offset;
offset += bytes;
resid -= bytes;
length -= bytes;
}
dma->sge_offset = offset;
dma->resid = resid;
return 0;
}
/* (1) find the mem (mr or mw) corresponding to lkey/rkey
* depending on lookup_type
* (2) verify that the (qp) pd matches the mem pd
* (3) verify that the mem can support the requested access
* (4) verify that mem state is valid
*/
struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
enum lookup_type type)
{
struct rxe_mem *mem;
struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
int index = key >> 8;
if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) {
mem = rxe_pool_get_index(&rxe->mr_pool, index);
if (!mem)
goto err1;
} else {
goto err1;
}
if ((type == lookup_local && mem->lkey != key) ||
(type == lookup_remote && mem->rkey != key))
goto err2;
if (mem->pd != pd)
goto err2;
if (access && !(access & mem->access))
goto err2;
if (mem->state != RXE_MEM_STATE_VALID)
goto err2;
return mem;
err2:
rxe_drop_ref(mem);
err1:
return NULL;
}
int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
u64 *page, int num_pages, u64 iova)
{
int i;
int num_buf;
int err;
struct rxe_map **map;
struct rxe_phys_buf *buf;
int page_size;
if (num_pages > mem->max_buf) {
err = -EINVAL;
goto err1;
}
num_buf = 0;
page_size = 1 << mem->page_shift;
map = mem->map;
buf = map[0]->buf;
for (i = 0; i < num_pages; i++) {
buf->addr = *page++;
buf->size = page_size;
buf++;
num_buf++;
if (num_buf == RXE_BUF_PER_MAP) {
map++;
buf = map[0]->buf;
num_buf = 0;
}
}
mem->iova = iova;
mem->va = iova;
mem->length = num_pages << mem->page_shift;
mem->state = RXE_MEM_STATE_VALID;
return 0;
err1:
return err;
}

View file

@ -0,0 +1,708 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/skbuff.h>
#include <linux/if_arp.h>
#include <linux/netdevice.h>
#include <linux/if.h>
#include <linux/if_vlan.h>
#include <net/udp_tunnel.h>
#include <net/sch_generic.h>
#include <linux/netfilter.h>
#include <rdma/ib_addr.h>
#include "rxe.h"
#include "rxe_net.h"
#include "rxe_loc.h"
static LIST_HEAD(rxe_dev_list);
static spinlock_t dev_list_lock; /* spinlock for device list */
struct rxe_dev *net_to_rxe(struct net_device *ndev)
{
struct rxe_dev *rxe;
struct rxe_dev *found = NULL;
spin_lock_bh(&dev_list_lock);
list_for_each_entry(rxe, &rxe_dev_list, list) {
if (rxe->ndev == ndev) {
found = rxe;
break;
}
}
spin_unlock_bh(&dev_list_lock);
return found;
}
struct rxe_dev *get_rxe_by_name(const char* name)
{
struct rxe_dev *rxe;
struct rxe_dev *found = NULL;
spin_lock_bh(&dev_list_lock);
list_for_each_entry(rxe, &rxe_dev_list, list) {
if (!strcmp(name, rxe->ib_dev.name)) {
found = rxe;
break;
}
}
spin_unlock_bh(&dev_list_lock);
return found;
}
struct rxe_recv_sockets recv_sockets;
static __be64 rxe_mac_to_eui64(struct net_device *ndev)
{
unsigned char *mac_addr = ndev->dev_addr;
__be64 eui64;
unsigned char *dst = (unsigned char *)&eui64;
dst[0] = mac_addr[0] ^ 2;
dst[1] = mac_addr[1];
dst[2] = mac_addr[2];
dst[3] = 0xff;
dst[4] = 0xfe;
dst[5] = mac_addr[3];
dst[6] = mac_addr[4];
dst[7] = mac_addr[5];
return eui64;
}
static __be64 node_guid(struct rxe_dev *rxe)
{
return rxe_mac_to_eui64(rxe->ndev);
}
static __be64 port_guid(struct rxe_dev *rxe)
{
return rxe_mac_to_eui64(rxe->ndev);
}
static struct device *dma_device(struct rxe_dev *rxe)
{
struct net_device *ndev;
ndev = rxe->ndev;
if (ndev->priv_flags & IFF_802_1Q_VLAN)
ndev = vlan_dev_real_dev(ndev);
return ndev->dev.parent;
}
static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
{
int err;
unsigned char ll_addr[ETH_ALEN];
ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
err = dev_mc_add(rxe->ndev, ll_addr);
return err;
}
static int mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
{
int err;
unsigned char ll_addr[ETH_ALEN];
ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
err = dev_mc_del(rxe->ndev, ll_addr);
return err;
}
static struct dst_entry *rxe_find_route4(struct net_device *ndev,
struct in_addr *saddr,
struct in_addr *daddr)
{
struct rtable *rt;
struct flowi4 fl = { { 0 } };
memset(&fl, 0, sizeof(fl));
fl.flowi4_oif = ndev->ifindex;
memcpy(&fl.saddr, saddr, sizeof(*saddr));
memcpy(&fl.daddr, daddr, sizeof(*daddr));
fl.flowi4_proto = IPPROTO_UDP;
rt = ip_route_output_key(&init_net, &fl);
if (IS_ERR(rt)) {
pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr);
return NULL;
}
return &rt->dst;
}
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *rxe_find_route6(struct net_device *ndev,
struct in6_addr *saddr,
struct in6_addr *daddr)
{
struct dst_entry *ndst;
struct flowi6 fl6 = { { 0 } };
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = ndev->ifindex;
memcpy(&fl6.saddr, saddr, sizeof(*saddr));
memcpy(&fl6.daddr, daddr, sizeof(*daddr));
fl6.flowi6_proto = IPPROTO_UDP;
if (unlikely(ipv6_stub->ipv6_dst_lookup(sock_net(recv_sockets.sk6->sk),
recv_sockets.sk6->sk, &ndst, &fl6))) {
pr_err_ratelimited("no route to %pI6\n", daddr);
goto put;
}
if (unlikely(ndst->error)) {
pr_err("no route to %pI6\n", daddr);
goto put;
}
return ndst;
put:
dst_release(ndst);
return NULL;
}
#else
static struct dst_entry *rxe_find_route6(struct net_device *ndev,
struct in6_addr *saddr,
struct in6_addr *daddr)
{
return NULL;
}
#endif
static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct udphdr *udph;
struct net_device *ndev = skb->dev;
struct rxe_dev *rxe = net_to_rxe(ndev);
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
if (!rxe)
goto drop;
if (skb_linearize(skb)) {
pr_err("skb_linearize failed\n");
goto drop;
}
udph = udp_hdr(skb);
pkt->rxe = rxe;
pkt->port_num = 1;
pkt->hdr = (u8 *)(udph + 1);
pkt->mask = RXE_GRH_MASK;
pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph);
return rxe_rcv(skb);
drop:
kfree_skb(skb);
return 0;
}
static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
bool ipv6)
{
int err;
struct socket *sock;
struct udp_port_cfg udp_cfg;
struct udp_tunnel_sock_cfg tnl_cfg;
memset(&udp_cfg, 0, sizeof(udp_cfg));
if (ipv6) {
udp_cfg.family = AF_INET6;
udp_cfg.ipv6_v6only = 1;
} else {
udp_cfg.family = AF_INET;
}
udp_cfg.local_udp_port = port;
/* Create UDP socket */
err = udp_sock_create(net, &udp_cfg, &sock);
if (err < 0) {
pr_err("failed to create udp socket. err = %d\n", err);
return ERR_PTR(err);
}
tnl_cfg.sk_user_data = NULL;
tnl_cfg.encap_type = 1;
tnl_cfg.encap_rcv = rxe_udp_encap_recv;
tnl_cfg.encap_destroy = NULL;
/* Setup UDP tunnel */
setup_udp_tunnel_sock(net, sock, &tnl_cfg);
return sock;
}
static void rxe_release_udp_tunnel(struct socket *sk)
{
udp_tunnel_sock_release(sk);
}
static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port,
__be16 dst_port)
{
struct udphdr *udph;
__skb_push(skb, sizeof(*udph));
skb_reset_transport_header(skb);
udph = udp_hdr(skb);
udph->dest = dst_port;
udph->source = src_port;
udph->len = htons(skb->len);
udph->check = 0;
}
static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb,
__be32 saddr, __be32 daddr, __u8 proto,
__u8 tos, __u8 ttl, __be16 df, bool xnet)
{
struct iphdr *iph;
skb_scrub_packet(skb, xnet);
skb_clear_hash(skb);
skb_dst_set(skb, dst);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
iph->version = IPVERSION;
iph->ihl = sizeof(struct iphdr) >> 2;
iph->frag_off = df;
iph->protocol = proto;
iph->tos = tos;
iph->daddr = daddr;
iph->saddr = saddr;
iph->ttl = ttl;
__ip_select_ident(dev_net(dst->dev), iph,
skb_shinfo(skb)->gso_segs ?: 1);
iph->tot_len = htons(skb->len);
ip_send_check(iph);
}
static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
struct in6_addr *saddr, struct in6_addr *daddr,
__u8 proto, __u8 prio, __u8 ttl)
{
struct ipv6hdr *ip6h;
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
| IPSKB_REROUTED);
skb_dst_set(skb, dst);
__skb_push(skb, sizeof(*ip6h));
skb_reset_network_header(skb);
ip6h = ipv6_hdr(skb);
ip6_flow_hdr(ip6h, prio, htonl(0));
ip6h->payload_len = htons(skb->len);
ip6h->nexthdr = proto;
ip6h->hop_limit = ttl;
ip6h->daddr = *daddr;
ip6h->saddr = *saddr;
ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
}
static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av)
{
struct dst_entry *dst;
bool xnet = false;
__be16 df = htons(IP_DF);
struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
dst = rxe_find_route4(rxe->ndev, saddr, daddr);
if (!dst) {
pr_err("Host not reachable\n");
return -EHOSTUNREACH;
}
if (!memcmp(saddr, daddr, sizeof(*daddr)))
pkt->mask |= RXE_LOOPBACK_MASK;
prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
htons(ROCE_V2_UDP_DPORT));
prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
av->grh.traffic_class, av->grh.hop_limit, df, xnet);
return 0;
}
static int prepare6(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av)
{
struct dst_entry *dst;
struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
dst = rxe_find_route6(rxe->ndev, saddr, daddr);
if (!dst) {
pr_err("Host not reachable\n");
return -EHOSTUNREACH;
}
if (!memcmp(saddr, daddr, sizeof(*daddr)))
pkt->mask |= RXE_LOOPBACK_MASK;
prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
htons(ROCE_V2_UDP_DPORT));
prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP,
av->grh.traffic_class,
av->grh.hop_limit);
return 0;
}
static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb, u32 *crc)
{
int err = 0;
struct rxe_av *av = rxe_get_av(pkt);
if (av->network_type == RDMA_NETWORK_IPV4)
err = prepare4(rxe, skb, av);
else if (av->network_type == RDMA_NETWORK_IPV6)
err = prepare6(rxe, skb, av);
*crc = rxe_icrc_hdr(pkt, skb);
return err;
}
static void rxe_skb_tx_dtor(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct rxe_qp *qp = sk->sk_user_data;
int skb_out = atomic_dec_return(&qp->skb_out);
if (unlikely(qp->need_req_skb &&
skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
rxe_run_task(&qp->req.task, 1);
}
static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb)
{
struct sk_buff *nskb;
struct rxe_av *av;
int err;
av = rxe_get_av(pkt);
nskb = skb_clone(skb, GFP_ATOMIC);
if (!nskb)
return -ENOMEM;
nskb->destructor = rxe_skb_tx_dtor;
nskb->sk = pkt->qp->sk->sk;
if (av->network_type == RDMA_NETWORK_IPV4) {
err = ip_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb);
} else if (av->network_type == RDMA_NETWORK_IPV6) {
err = ip6_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb);
} else {
pr_err("Unknown layer 3 protocol: %d\n", av->network_type);
kfree_skb(nskb);
return -EINVAL;
}
if (unlikely(net_xmit_eval(err))) {
pr_debug("error sending packet: %d\n", err);
return -EAGAIN;
}
kfree_skb(skb);
return 0;
}
static int loopback(struct sk_buff *skb)
{
return rxe_rcv(skb);
}
static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
{
return rxe->port.port_guid == av->grh.dgid.global.interface_id;
}
static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av,
int paylen, struct rxe_pkt_info *pkt)
{
unsigned int hdr_len;
struct sk_buff *skb;
if (av->network_type == RDMA_NETWORK_IPV4)
hdr_len = ETH_HLEN + sizeof(struct udphdr) +
sizeof(struct iphdr);
else
hdr_len = ETH_HLEN + sizeof(struct udphdr) +
sizeof(struct ipv6hdr);
skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(rxe->ndev),
GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev));
skb->dev = rxe->ndev;
if (av->network_type == RDMA_NETWORK_IPV4)
skb->protocol = htons(ETH_P_IP);
else
skb->protocol = htons(ETH_P_IPV6);
pkt->rxe = rxe;
pkt->port_num = 1;
pkt->hdr = skb_put(skb, paylen);
pkt->mask |= RXE_GRH_MASK;
memset(pkt->hdr, 0, paylen);
return skb;
}
/*
* this is required by rxe_cfg to match rxe devices in
* /sys/class/infiniband up with their underlying ethernet devices
*/
static char *parent_name(struct rxe_dev *rxe, unsigned int port_num)
{
return rxe->ndev->name;
}
static enum rdma_link_layer link_layer(struct rxe_dev *rxe,
unsigned int port_num)
{
return IB_LINK_LAYER_ETHERNET;
}
static struct rxe_ifc_ops ifc_ops = {
.node_guid = node_guid,
.port_guid = port_guid,
.dma_device = dma_device,
.mcast_add = mcast_add,
.mcast_delete = mcast_delete,
.prepare = prepare,
.send = send,
.loopback = loopback,
.init_packet = init_packet,
.parent_name = parent_name,
.link_layer = link_layer,
};
struct rxe_dev *rxe_net_add(struct net_device *ndev)
{
int err;
struct rxe_dev *rxe = NULL;
rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe));
if (!rxe)
return NULL;
rxe->ifc_ops = &ifc_ops;
rxe->ndev = ndev;
err = rxe_add(rxe, ndev->mtu);
if (err) {
ib_dealloc_device(&rxe->ib_dev);
return NULL;
}
spin_lock_bh(&dev_list_lock);
list_add_tail(&rxe_dev_list, &rxe->list);
spin_unlock_bh(&dev_list_lock);
return rxe;
}
void rxe_remove_all(void)
{
spin_lock_bh(&dev_list_lock);
while (!list_empty(&rxe_dev_list)) {
struct rxe_dev *rxe =
list_first_entry(&rxe_dev_list, struct rxe_dev, list);
list_del(&rxe->list);
spin_unlock_bh(&dev_list_lock);
rxe_remove(rxe);
spin_lock_bh(&dev_list_lock);
}
spin_unlock_bh(&dev_list_lock);
}
EXPORT_SYMBOL(rxe_remove_all);
static void rxe_port_event(struct rxe_dev *rxe,
enum ib_event_type event)
{
struct ib_event ev;
ev.device = &rxe->ib_dev;
ev.element.port_num = 1;
ev.event = event;
ib_dispatch_event(&ev);
}
/* Caller must hold net_info_lock */
void rxe_port_up(struct rxe_dev *rxe)
{
struct rxe_port *port;
port = &rxe->port;
port->attr.state = IB_PORT_ACTIVE;
port->attr.phys_state = IB_PHYS_STATE_LINK_UP;
rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
pr_info("rxe: set %s active\n", rxe->ib_dev.name);
return;
}
/* Caller must hold net_info_lock */
void rxe_port_down(struct rxe_dev *rxe)
{
struct rxe_port *port;
port = &rxe->port;
port->attr.state = IB_PORT_DOWN;
port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;
rxe_port_event(rxe, IB_EVENT_PORT_ERR);
pr_info("rxe: set %s down\n", rxe->ib_dev.name);
return;
}
static int rxe_notify(struct notifier_block *not_blk,
unsigned long event,
void *arg)
{
struct net_device *ndev = netdev_notifier_info_to_dev(arg);
struct rxe_dev *rxe = net_to_rxe(ndev);
if (!rxe)
goto out;
switch (event) {
case NETDEV_UNREGISTER:
list_del(&rxe->list);
rxe_remove(rxe);
break;
case NETDEV_UP:
rxe_port_up(rxe);
break;
case NETDEV_DOWN:
rxe_port_down(rxe);
break;
case NETDEV_CHANGEMTU:
pr_info("rxe: %s changed mtu to %d\n", ndev->name, ndev->mtu);
rxe_set_mtu(rxe, ndev->mtu);
break;
case NETDEV_REBOOT:
case NETDEV_CHANGE:
case NETDEV_GOING_DOWN:
case NETDEV_CHANGEADDR:
case NETDEV_CHANGENAME:
case NETDEV_FEAT_CHANGE:
default:
pr_info("rxe: ignoring netdev event = %ld for %s\n",
event, ndev->name);
break;
}
out:
return NOTIFY_OK;
}
static struct notifier_block rxe_net_notifier = {
.notifier_call = rxe_notify,
};
int rxe_net_init(void)
{
int err;
spin_lock_init(&dev_list_lock);
recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), true);
if (IS_ERR(recv_sockets.sk6)) {
recv_sockets.sk6 = NULL;
pr_err("rxe: Failed to create IPv6 UDP tunnel\n");
return -1;
}
recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), false);
if (IS_ERR(recv_sockets.sk4)) {
rxe_release_udp_tunnel(recv_sockets.sk6);
recv_sockets.sk4 = NULL;
recv_sockets.sk6 = NULL;
pr_err("rxe: Failed to create IPv4 UDP tunnel\n");
return -1;
}
err = register_netdevice_notifier(&rxe_net_notifier);
if (err) {
rxe_release_udp_tunnel(recv_sockets.sk6);
rxe_release_udp_tunnel(recv_sockets.sk4);
pr_err("rxe: Failed to rigister netdev notifier\n");
}
return err;
}
void rxe_net_exit(void)
{
if (recv_sockets.sk6)
rxe_release_udp_tunnel(recv_sockets.sk6);
if (recv_sockets.sk4)
rxe_release_udp_tunnel(recv_sockets.sk4);
unregister_netdevice_notifier(&rxe_net_notifier);
}

View file

@ -0,0 +1,53 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef RXE_NET_H
#define RXE_NET_H
#include <net/sock.h>
#include <net/if_inet6.h>
#include <linux/module.h>
struct rxe_recv_sockets {
struct socket *sk4;
struct socket *sk6;
};
extern struct rxe_recv_sockets recv_sockets;
struct rxe_dev *rxe_net_add(struct net_device *ndev);
int rxe_net_init(void);
void rxe_net_exit(void);
#endif /* RXE_NET_H */

View file

@ -0,0 +1,961 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <rdma/ib_pack.h>
#include "rxe_opcode.h"
#include "rxe_hdr.h"
/* useful information about work request opcodes and pkt opcodes in
* table form
*/
struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
[IB_WR_RDMA_WRITE] = {
.name = "IB_WR_RDMA_WRITE",
.mask = {
[IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK,
[IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK,
},
},
[IB_WR_RDMA_WRITE_WITH_IMM] = {
.name = "IB_WR_RDMA_WRITE_WITH_IMM",
.mask = {
[IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK,
[IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK,
},
},
[IB_WR_SEND] = {
.name = "IB_WR_SEND",
.mask = {
[IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK,
},
},
[IB_WR_SEND_WITH_IMM] = {
.name = "IB_WR_SEND_WITH_IMM",
.mask = {
[IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK,
},
},
[IB_WR_RDMA_READ] = {
.name = "IB_WR_RDMA_READ",
.mask = {
[IB_QPT_RC] = WR_READ_MASK,
},
},
[IB_WR_ATOMIC_CMP_AND_SWP] = {
.name = "IB_WR_ATOMIC_CMP_AND_SWP",
.mask = {
[IB_QPT_RC] = WR_ATOMIC_MASK,
},
},
[IB_WR_ATOMIC_FETCH_AND_ADD] = {
.name = "IB_WR_ATOMIC_FETCH_AND_ADD",
.mask = {
[IB_QPT_RC] = WR_ATOMIC_MASK,
},
},
[IB_WR_LSO] = {
.name = "IB_WR_LSO",
.mask = {
/* not supported */
},
},
[IB_WR_SEND_WITH_INV] = {
.name = "IB_WR_SEND_WITH_INV",
.mask = {
[IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK,
},
},
[IB_WR_RDMA_READ_WITH_INV] = {
.name = "IB_WR_RDMA_READ_WITH_INV",
.mask = {
[IB_QPT_RC] = WR_READ_MASK,
},
},
[IB_WR_LOCAL_INV] = {
.name = "IB_WR_LOCAL_INV",
.mask = {
[IB_QPT_RC] = WR_REG_MASK,
},
},
[IB_WR_REG_MR] = {
.name = "IB_WR_REG_MR",
.mask = {
[IB_QPT_RC] = WR_REG_MASK,
},
},
};
struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
[IB_OPCODE_RC_SEND_FIRST] = {
.name = "IB_OPCODE_RC_SEND_FIRST",
.mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK
| RXE_SEND_MASK | RXE_START_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_PAYLOAD] = RXE_BTH_BYTES,
}
},
[IB_OPCODE_RC_SEND_MIDDLE] = {
.name = "IB_OPCODE_RC_SEND_MIDDLE]",
.mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK
| RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_PAYLOAD] = RXE_BTH_BYTES,
}
},
[IB_OPCODE_RC_SEND_LAST] = {
.name = "IB_OPCODE_RC_SEND_LAST",
.mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
| RXE_SEND_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_PAYLOAD] = RXE_BTH_BYTES,
}
},
[IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE",
.mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IMMDT] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RC_SEND_ONLY] = {
.name = "IB_OPCODE_RC_SEND_ONLY",
.mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
| RXE_RWR_MASK | RXE_SEND_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_PAYLOAD] = RXE_BTH_BYTES,
}
},
[IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE",
.mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IMMDT] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RC_RDMA_WRITE_FIRST] = {
.name = "IB_OPCODE_RC_RDMA_WRITE_FIRST",
.mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_WRITE_MASK | RXE_START_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = {
.name = "IB_OPCODE_RC_RDMA_WRITE_MIDDLE",
.mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
| RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_PAYLOAD] = RXE_BTH_BYTES,
}
},
[IB_OPCODE_RC_RDMA_WRITE_LAST] = {
.name = "IB_OPCODE_RC_RDMA_WRITE_LAST",
.mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_PAYLOAD] = RXE_BTH_BYTES,
}
},
[IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE",
.mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IMMDT] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RC_RDMA_WRITE_ONLY] = {
.name = "IB_OPCODE_RC_RDMA_WRITE_ONLY",
.mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_WRITE_MASK | RXE_START_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE",
.mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
| RXE_REQ_MASK | RXE_WRITE_MASK
| RXE_COMP_MASK | RXE_RWR_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
[RXE_IMMDT] = RXE_BTH_BYTES
+ RXE_RETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RETH_BYTES
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RC_RDMA_READ_REQUEST] = {
.name = "IB_OPCODE_RC_RDMA_READ_REQUEST",
.mask = RXE_RETH_MASK | RXE_REQ_MASK | RXE_READ_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = {
.name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST",
.mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
| RXE_START_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_AETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = {
.name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE",
.mask = RXE_PAYLOAD_MASK | RXE_ACK_MASK | RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_PAYLOAD] = RXE_BTH_BYTES,
}
},
[IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = {
.name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST",
.mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_AETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = {
.name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY",
.mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_AETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RC_ACKNOWLEDGE] = {
.name = "IB_OPCODE_RC_ACKNOWLEDGE",
.mask = RXE_AETH_MASK | RXE_ACK_MASK | RXE_START_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_AETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = {
.name = "IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE",
.mask = RXE_AETH_MASK | RXE_ATMACK_MASK | RXE_ACK_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_AETH] = RXE_BTH_BYTES,
[RXE_ATMACK] = RXE_BTH_BYTES
+ RXE_AETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_ATMACK_BYTES + RXE_AETH_BYTES,
}
},
[IB_OPCODE_RC_COMPARE_SWAP] = {
.name = "IB_OPCODE_RC_COMPARE_SWAP",
.mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_ATMETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_ATMETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_ATMETH_BYTES,
}
},
[IB_OPCODE_RC_FETCH_ADD] = {
.name = "IB_OPCODE_RC_FETCH_ADD",
.mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_ATMETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_ATMETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_ATMETH_BYTES,
}
},
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = {
.name = "IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE",
.mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_IETH_BYTES,
}
},
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = {
.name = "IB_OPCODE_RC_SEND_ONLY_INV",
.mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_IETH_BYTES,
}
},
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = {
.name = "IB_OPCODE_UC_SEND_FIRST",
.mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK
| RXE_SEND_MASK | RXE_START_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_PAYLOAD] = RXE_BTH_BYTES,
}
},
[IB_OPCODE_UC_SEND_MIDDLE] = {
.name = "IB_OPCODE_UC_SEND_MIDDLE",
.mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK
| RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_PAYLOAD] = RXE_BTH_BYTES,
}
},
[IB_OPCODE_UC_SEND_LAST] = {
.name = "IB_OPCODE_UC_SEND_LAST",
.mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
| RXE_SEND_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_PAYLOAD] = RXE_BTH_BYTES,
}
},
[IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE",
.mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IMMDT] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_UC_SEND_ONLY] = {
.name = "IB_OPCODE_UC_SEND_ONLY",
.mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
| RXE_RWR_MASK | RXE_SEND_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_PAYLOAD] = RXE_BTH_BYTES,
}
},
[IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE",
.mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IMMDT] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_UC_RDMA_WRITE_FIRST] = {
.name = "IB_OPCODE_UC_RDMA_WRITE_FIRST",
.mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_WRITE_MASK | RXE_START_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = {
.name = "IB_OPCODE_UC_RDMA_WRITE_MIDDLE",
.mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
| RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_PAYLOAD] = RXE_BTH_BYTES,
}
},
[IB_OPCODE_UC_RDMA_WRITE_LAST] = {
.name = "IB_OPCODE_UC_RDMA_WRITE_LAST",
.mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_PAYLOAD] = RXE_BTH_BYTES,
}
},
[IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE",
.mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IMMDT] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_UC_RDMA_WRITE_ONLY] = {
.name = "IB_OPCODE_UC_RDMA_WRITE_ONLY",
.mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_WRITE_MASK | RXE_START_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE",
.mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
| RXE_REQ_MASK | RXE_WRITE_MASK
| RXE_COMP_MASK | RXE_RWR_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
[RXE_IMMDT] = RXE_BTH_BYTES
+ RXE_RETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RETH_BYTES
+ RXE_IMMDT_BYTES,
}
},
/* RD */
[IB_OPCODE_RD_SEND_FIRST] = {
.name = "IB_OPCODE_RD_SEND_FIRST",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
| RXE_REQ_MASK | RXE_RWR_MASK | RXE_SEND_MASK
| RXE_START_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_RD_SEND_MIDDLE] = {
.name = "IB_OPCODE_RD_SEND_MIDDLE",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
| RXE_REQ_MASK | RXE_SEND_MASK
| RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_RD_SEND_LAST] = {
.name = "IB_OPCODE_RD_SEND_LAST",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
| RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK
| RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_COMP_MASK | RXE_SEND_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_IMMDT] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RD_SEND_ONLY] = {
.name = "IB_OPCODE_RD_SEND_ONLY",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
| RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK
| RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK
| RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_IMMDT] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RD_RDMA_WRITE_FIRST] = {
.name = "IB_OPCODE_RD_RDMA_WRITE_FIRST",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
| RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_WRITE_MASK | RXE_START_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_RETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_WRITE_MIDDLE] = {
.name = "IB_OPCODE_RD_RDMA_WRITE_MIDDLE",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
| RXE_REQ_MASK | RXE_WRITE_MASK
| RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_WRITE_LAST] = {
.name = "IB_OPCODE_RD_RDMA_WRITE_LAST",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
| RXE_REQ_MASK | RXE_WRITE_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK
| RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_IMMDT] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RD_RDMA_WRITE_ONLY] = {
.name = "IB_OPCODE_RD_RDMA_WRITE_ONLY",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
| RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_WRITE_MASK | RXE_START_MASK
| RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_RETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
| RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
| RXE_REQ_MASK | RXE_WRITE_MASK
| RXE_COMP_MASK | RXE_RWR_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES
+ RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_RETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
[RXE_IMMDT] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES
+ RXE_RETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES
+ RXE_RETH_BYTES
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RD_RDMA_READ_REQUEST] = {
.name = "IB_OPCODE_RD_RDMA_READ_REQUEST",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
| RXE_REQ_MASK | RXE_READ_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_RETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RETH_BYTES
+ RXE_DETH_BYTES
+ RXE_RDETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST] = {
.name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST",
.mask = RXE_RDETH_MASK | RXE_AETH_MASK
| RXE_PAYLOAD_MASK | RXE_ACK_MASK
| RXE_START_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_AETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE] = {
.name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE",
.mask = RXE_RDETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
| RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST] = {
.name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST",
.mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK
| RXE_ACK_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_AETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY] = {
.name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY",
.mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK
| RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_AETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RD_ACKNOWLEDGE] = {
.name = "IB_OPCODE_RD_ACKNOWLEDGE",
.mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ACK_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_AETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
}
},
[IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE] = {
.name = "IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE",
.mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ATMACK_MASK
| RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_AETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_ATMACK] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RD_COMPARE_SWAP] = {
.name = "RD_COMPARE_SWAP",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK
| RXE_REQ_MASK | RXE_ATOMIC_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_ATMETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_ATMETH_BYTES
+ RXE_DETH_BYTES +
+ RXE_RDETH_BYTES,
}
},
[IB_OPCODE_RD_FETCH_ADD] = {
.name = "IB_OPCODE_RD_FETCH_ADD",
.mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK
| RXE_REQ_MASK | RXE_ATOMIC_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
[RXE_DETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES,
[RXE_ATMETH] = RXE_BTH_BYTES
+ RXE_RDETH_BYTES
+ RXE_DETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_ATMETH_BYTES
+ RXE_DETH_BYTES +
+ RXE_RDETH_BYTES,
}
},
/* UD */
[IB_OPCODE_UD_SEND_ONLY] = {
.name = "IB_OPCODE_UD_SEND_ONLY",
.mask = RXE_DETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
| RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_DETH] = RXE_BTH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE",
.mask = RXE_DETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
| RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK
| RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_DETH] = RXE_BTH_BYTES,
[RXE_IMMDT] = RXE_BTH_BYTES
+ RXE_DETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES
+ RXE_DETH_BYTES
+ RXE_IMMDT_BYTES,
}
},
};

View file

@ -0,0 +1,129 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef RXE_OPCODE_H
#define RXE_OPCODE_H
/*
* contains header bit mask definitions and header lengths
* declaration of the rxe_opcode_info struct and
* rxe_wr_opcode_info struct
*/
enum rxe_wr_mask {
WR_INLINE_MASK = BIT(0),
WR_ATOMIC_MASK = BIT(1),
WR_SEND_MASK = BIT(2),
WR_READ_MASK = BIT(3),
WR_WRITE_MASK = BIT(4),
WR_LOCAL_MASK = BIT(5),
WR_REG_MASK = BIT(6),
WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK,
WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK,
WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK,
WR_ATOMIC_OR_READ_MASK = WR_ATOMIC_MASK | WR_READ_MASK,
};
#define WR_MAX_QPT (8)
struct rxe_wr_opcode_info {
char *name;
enum rxe_wr_mask mask[WR_MAX_QPT];
};
extern struct rxe_wr_opcode_info rxe_wr_opcode_info[];
enum rxe_hdr_type {
RXE_LRH,
RXE_GRH,
RXE_BTH,
RXE_RETH,
RXE_AETH,
RXE_ATMETH,
RXE_ATMACK,
RXE_IETH,
RXE_RDETH,
RXE_DETH,
RXE_IMMDT,
RXE_PAYLOAD,
NUM_HDR_TYPES
};
enum rxe_hdr_mask {
RXE_LRH_MASK = BIT(RXE_LRH),
RXE_GRH_MASK = BIT(RXE_GRH),
RXE_BTH_MASK = BIT(RXE_BTH),
RXE_IMMDT_MASK = BIT(RXE_IMMDT),
RXE_RETH_MASK = BIT(RXE_RETH),
RXE_AETH_MASK = BIT(RXE_AETH),
RXE_ATMETH_MASK = BIT(RXE_ATMETH),
RXE_ATMACK_MASK = BIT(RXE_ATMACK),
RXE_IETH_MASK = BIT(RXE_IETH),
RXE_RDETH_MASK = BIT(RXE_RDETH),
RXE_DETH_MASK = BIT(RXE_DETH),
RXE_PAYLOAD_MASK = BIT(RXE_PAYLOAD),
RXE_REQ_MASK = BIT(NUM_HDR_TYPES + 0),
RXE_ACK_MASK = BIT(NUM_HDR_TYPES + 1),
RXE_SEND_MASK = BIT(NUM_HDR_TYPES + 2),
RXE_WRITE_MASK = BIT(NUM_HDR_TYPES + 3),
RXE_READ_MASK = BIT(NUM_HDR_TYPES + 4),
RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5),
RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 6),
RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 7),
RXE_START_MASK = BIT(NUM_HDR_TYPES + 8),
RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9),
RXE_END_MASK = BIT(NUM_HDR_TYPES + 10),
RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12),
RXE_READ_OR_ATOMIC = (RXE_READ_MASK | RXE_ATOMIC_MASK),
RXE_WRITE_OR_SEND = (RXE_WRITE_MASK | RXE_SEND_MASK),
};
#define OPCODE_NONE (-1)
#define RXE_NUM_OPCODE 256
struct rxe_opcode_info {
char *name;
enum rxe_hdr_mask mask;
int length;
int offset[NUM_HDR_TYPES];
};
extern struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE];
#endif /* RXE_OPCODE_H */

View file

@ -0,0 +1,172 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef RXE_PARAM_H
#define RXE_PARAM_H
static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu)
{
if (mtu < 256)
return 0;
else if (mtu < 512)
return IB_MTU_256;
else if (mtu < 1024)
return IB_MTU_512;
else if (mtu < 2048)
return IB_MTU_1024;
else if (mtu < 4096)
return IB_MTU_2048;
else
return IB_MTU_4096;
}
/* Find the IB mtu for a given network MTU. */
static inline enum ib_mtu eth_mtu_int_to_enum(int mtu)
{
mtu -= RXE_MAX_HDR_LENGTH;
return rxe_mtu_int_to_enum(mtu);
}
/* default/initial rxe device parameter settings */
enum rxe_device_param {
RXE_FW_VER = 0,
RXE_MAX_MR_SIZE = -1ull,
RXE_PAGE_SIZE_CAP = 0xfffff000,
RXE_VENDOR_ID = 0,
RXE_VENDOR_PART_ID = 0,
RXE_HW_VER = 0,
RXE_MAX_QP = 0x10000,
RXE_MAX_QP_WR = 0x4000,
RXE_MAX_INLINE_DATA = 400,
RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR
| IB_DEVICE_BAD_QKEY_CNTR
| IB_DEVICE_AUTO_PATH_MIG
| IB_DEVICE_CHANGE_PHY_PORT
| IB_DEVICE_UD_AV_PORT_ENFORCE
| IB_DEVICE_PORT_ACTIVE_EVENT
| IB_DEVICE_SYS_IMAGE_GUID
| IB_DEVICE_RC_RNR_NAK_GEN
| IB_DEVICE_SRQ_RESIZE
| IB_DEVICE_MEM_MGT_EXTENSIONS,
RXE_MAX_SGE = 32,
RXE_MAX_SGE_RD = 32,
RXE_MAX_CQ = 16384,
RXE_MAX_LOG_CQE = 13,
RXE_MAX_MR = 2 * 1024,
RXE_MAX_PD = 0x7ffc,
RXE_MAX_QP_RD_ATOM = 128,
RXE_MAX_EE_RD_ATOM = 0,
RXE_MAX_RES_RD_ATOM = 0x3f000,
RXE_MAX_QP_INIT_RD_ATOM = 128,
RXE_MAX_EE_INIT_RD_ATOM = 0,
RXE_ATOMIC_CAP = 1,
RXE_MAX_EE = 0,
RXE_MAX_RDD = 0,
RXE_MAX_MW = 0,
RXE_MAX_RAW_IPV6_QP = 0,
RXE_MAX_RAW_ETHY_QP = 0,
RXE_MAX_MCAST_GRP = 8192,
RXE_MAX_MCAST_QP_ATTACH = 56,
RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000,
RXE_MAX_AH = 100,
RXE_MAX_FMR = 0,
RXE_MAX_MAP_PER_FMR = 0,
RXE_MAX_SRQ = 960,
RXE_MAX_SRQ_WR = 0x4000,
RXE_MIN_SRQ_WR = 1,
RXE_MAX_SRQ_SGE = 27,
RXE_MIN_SRQ_SGE = 1,
RXE_MAX_FMR_PAGE_LIST_LEN = 512,
RXE_MAX_PKEYS = 64,
RXE_LOCAL_CA_ACK_DELAY = 15,
RXE_MAX_UCONTEXT = 512,
RXE_NUM_PORT = 1,
RXE_NUM_COMP_VECTORS = 1,
RXE_MIN_QP_INDEX = 16,
RXE_MAX_QP_INDEX = 0x00020000,
RXE_MIN_SRQ_INDEX = 0x00020001,
RXE_MAX_SRQ_INDEX = 0x00040000,
RXE_MIN_MR_INDEX = 0x00000001,
RXE_MAX_MR_INDEX = 0x00040000,
RXE_MIN_MW_INDEX = 0x00040001,
RXE_MAX_MW_INDEX = 0x00060000,
RXE_MAX_PKT_PER_ACK = 64,
RXE_MAX_UNACKED_PSNS = 128,
/* Max inflight SKBs per queue pair */
RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64,
RXE_INFLIGHT_SKBS_PER_QP_LOW = 16,
/* Delay before calling arbiter timer */
RXE_NSEC_ARB_TIMER_DELAY = 200,
};
/* default/initial rxe port parameters */
enum rxe_port_param {
RXE_PORT_STATE = IB_PORT_DOWN,
RXE_PORT_MAX_MTU = IB_MTU_4096,
RXE_PORT_ACTIVE_MTU = IB_MTU_256,
RXE_PORT_GID_TBL_LEN = 1024,
RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP,
RXE_PORT_MAX_MSG_SZ = 0x800000,
RXE_PORT_BAD_PKEY_CNTR = 0,
RXE_PORT_QKEY_VIOL_CNTR = 0,
RXE_PORT_LID = 0,
RXE_PORT_SM_LID = 0,
RXE_PORT_SM_SL = 0,
RXE_PORT_LMC = 0,
RXE_PORT_MAX_VL_NUM = 1,
RXE_PORT_SUBNET_TIMEOUT = 0,
RXE_PORT_INIT_TYPE_REPLY = 0,
RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X,
RXE_PORT_ACTIVE_SPEED = 1,
RXE_PORT_PKEY_TBL_LEN = 64,
RXE_PORT_PHYS_STATE = 2,
RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL,
};
/* default/initial port info parameters */
enum rxe_port_info_param {
RXE_PORT_INFO_VL_CAP = 4, /* 1-8 */
RXE_PORT_INFO_MTU_CAP = 5, /* 4096 */
RXE_PORT_INFO_OPER_VL = 1, /* 1 */
};
#endif /* RXE_PARAM_H */

View file

@ -0,0 +1,502 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "rxe.h"
#include "rxe_loc.h"
/* info about object pools
* note that mr and mw share a single index space
* so that one can map an lkey to the correct type of object
*/
struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_UC] = {
.name = "rxe-uc",
.size = sizeof(struct rxe_ucontext),
},
[RXE_TYPE_PD] = {
.name = "rxe-pd",
.size = sizeof(struct rxe_pd),
},
[RXE_TYPE_AH] = {
.name = "rxe-ah",
.size = sizeof(struct rxe_ah),
.flags = RXE_POOL_ATOMIC,
},
[RXE_TYPE_SRQ] = {
.name = "rxe-srq",
.size = sizeof(struct rxe_srq),
.flags = RXE_POOL_INDEX,
.min_index = RXE_MIN_SRQ_INDEX,
.max_index = RXE_MAX_SRQ_INDEX,
},
[RXE_TYPE_QP] = {
.name = "rxe-qp",
.size = sizeof(struct rxe_qp),
.cleanup = rxe_qp_cleanup,
.flags = RXE_POOL_INDEX,
.min_index = RXE_MIN_QP_INDEX,
.max_index = RXE_MAX_QP_INDEX,
},
[RXE_TYPE_CQ] = {
.name = "rxe-cq",
.size = sizeof(struct rxe_cq),
.cleanup = rxe_cq_cleanup,
},
[RXE_TYPE_MR] = {
.name = "rxe-mr",
.size = sizeof(struct rxe_mem),
.cleanup = rxe_mem_cleanup,
.flags = RXE_POOL_INDEX,
.max_index = RXE_MAX_MR_INDEX,
.min_index = RXE_MIN_MR_INDEX,
},
[RXE_TYPE_MW] = {
.name = "rxe-mw",
.size = sizeof(struct rxe_mem),
.flags = RXE_POOL_INDEX,
.max_index = RXE_MAX_MW_INDEX,
.min_index = RXE_MIN_MW_INDEX,
},
[RXE_TYPE_MC_GRP] = {
.name = "rxe-mc_grp",
.size = sizeof(struct rxe_mc_grp),
.cleanup = rxe_mc_cleanup,
.flags = RXE_POOL_KEY,
.key_offset = offsetof(struct rxe_mc_grp, mgid),
.key_size = sizeof(union ib_gid),
},
[RXE_TYPE_MC_ELEM] = {
.name = "rxe-mc_elem",
.size = sizeof(struct rxe_mc_elem),
.flags = RXE_POOL_ATOMIC,
},
};
static inline char *pool_name(struct rxe_pool *pool)
{
return rxe_type_info[pool->type].name;
}
static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
{
return rxe_type_info[pool->type].cache;
}
static inline enum rxe_elem_type rxe_type(void *arg)
{
struct rxe_pool_entry *elem = arg;
return elem->pool->type;
}
int rxe_cache_init(void)
{
int err;
int i;
size_t size;
struct rxe_type_info *type;
for (i = 0; i < RXE_NUM_TYPES; i++) {
type = &rxe_type_info[i];
size = ALIGN(type->size, RXE_POOL_ALIGN);
type->cache = kmem_cache_create(type->name, size,
RXE_POOL_ALIGN,
RXE_POOL_CACHE_FLAGS, NULL);
if (!type->cache) {
pr_err("Unable to init kmem cache for %s\n",
type->name);
err = -ENOMEM;
goto err1;
}
}
return 0;
err1:
while (--i >= 0) {
kmem_cache_destroy(type->cache);
type->cache = NULL;
}
return err;
}
void rxe_cache_exit(void)
{
int i;
struct rxe_type_info *type;
for (i = 0; i < RXE_NUM_TYPES; i++) {
type = &rxe_type_info[i];
kmem_cache_destroy(type->cache);
type->cache = NULL;
}
}
static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
{
int err = 0;
size_t size;
if ((max - min + 1) < pool->max_elem) {
pr_warn("not enough indices for max_elem\n");
err = -EINVAL;
goto out;
}
pool->max_index = max;
pool->min_index = min;
size = BITS_TO_LONGS(max - min + 1) * sizeof(long);
pool->table = kmalloc(size, GFP_KERNEL);
if (!pool->table) {
pr_warn("no memory for bit table\n");
err = -ENOMEM;
goto out;
}
pool->table_size = size;
bitmap_zero(pool->table, max - min + 1);
out:
return err;
}
int rxe_pool_init(
struct rxe_dev *rxe,
struct rxe_pool *pool,
enum rxe_elem_type type,
unsigned max_elem)
{
int err = 0;
size_t size = rxe_type_info[type].size;
memset(pool, 0, sizeof(*pool));
pool->rxe = rxe;
pool->type = type;
pool->max_elem = max_elem;
pool->elem_size = ALIGN(size, RXE_POOL_ALIGN);
pool->flags = rxe_type_info[type].flags;
pool->tree = RB_ROOT;
pool->cleanup = rxe_type_info[type].cleanup;
atomic_set(&pool->num_elem, 0);
kref_init(&pool->ref_cnt);
spin_lock_init(&pool->pool_lock);
if (rxe_type_info[type].flags & RXE_POOL_INDEX) {
err = rxe_pool_init_index(pool,
rxe_type_info[type].max_index,
rxe_type_info[type].min_index);
if (err)
goto out;
}
if (rxe_type_info[type].flags & RXE_POOL_KEY) {
pool->key_offset = rxe_type_info[type].key_offset;
pool->key_size = rxe_type_info[type].key_size;
}
pool->state = rxe_pool_valid;
out:
return err;
}
static void rxe_pool_release(struct kref *kref)
{
struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
pool->state = rxe_pool_invalid;
kfree(pool->table);
}
static void rxe_pool_put(struct rxe_pool *pool)
{
kref_put(&pool->ref_cnt, rxe_pool_release);
}
int rxe_pool_cleanup(struct rxe_pool *pool)
{
unsigned long flags;
spin_lock_irqsave(&pool->pool_lock, flags);
pool->state = rxe_pool_invalid;
if (atomic_read(&pool->num_elem) > 0)
pr_warn("%s pool destroyed with unfree'd elem\n",
pool_name(pool));
spin_unlock_irqrestore(&pool->pool_lock, flags);
rxe_pool_put(pool);
return 0;
}
static u32 alloc_index(struct rxe_pool *pool)
{
u32 index;
u32 range = pool->max_index - pool->min_index + 1;
index = find_next_zero_bit(pool->table, range, pool->last);
if (index >= range)
index = find_first_zero_bit(pool->table, range);
set_bit(index, pool->table);
pool->last = index;
return index + pool->min_index;
}
static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
{
struct rb_node **link = &pool->tree.rb_node;
struct rb_node *parent = NULL;
struct rxe_pool_entry *elem;
while (*link) {
parent = *link;
elem = rb_entry(parent, struct rxe_pool_entry, node);
if (elem->index == new->index) {
pr_warn("element already exists!\n");
goto out;
}
if (elem->index > new->index)
link = &(*link)->rb_left;
else
link = &(*link)->rb_right;
}
rb_link_node(&new->node, parent, link);
rb_insert_color(&new->node, &pool->tree);
out:
return;
}
static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
{
struct rb_node **link = &pool->tree.rb_node;
struct rb_node *parent = NULL;
struct rxe_pool_entry *elem;
int cmp;
while (*link) {
parent = *link;
elem = rb_entry(parent, struct rxe_pool_entry, node);
cmp = memcmp((u8 *)elem + pool->key_offset,
(u8 *)new + pool->key_offset, pool->key_size);
if (cmp == 0) {
pr_warn("key already exists!\n");
goto out;
}
if (cmp > 0)
link = &(*link)->rb_left;
else
link = &(*link)->rb_right;
}
rb_link_node(&new->node, parent, link);
rb_insert_color(&new->node, &pool->tree);
out:
return;
}
void rxe_add_key(void *arg, void *key)
{
struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
unsigned long flags;
spin_lock_irqsave(&pool->pool_lock, flags);
memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
insert_key(pool, elem);
spin_unlock_irqrestore(&pool->pool_lock, flags);
}
void rxe_drop_key(void *arg)
{
struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
unsigned long flags;
spin_lock_irqsave(&pool->pool_lock, flags);
rb_erase(&elem->node, &pool->tree);
spin_unlock_irqrestore(&pool->pool_lock, flags);
}
void rxe_add_index(void *arg)
{
struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
unsigned long flags;
spin_lock_irqsave(&pool->pool_lock, flags);
elem->index = alloc_index(pool);
insert_index(pool, elem);
spin_unlock_irqrestore(&pool->pool_lock, flags);
}
void rxe_drop_index(void *arg)
{
struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
unsigned long flags;
spin_lock_irqsave(&pool->pool_lock, flags);
clear_bit(elem->index - pool->min_index, pool->table);
rb_erase(&elem->node, &pool->tree);
spin_unlock_irqrestore(&pool->pool_lock, flags);
}
void *rxe_alloc(struct rxe_pool *pool)
{
struct rxe_pool_entry *elem;
unsigned long flags;
might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
spin_lock_irqsave(&pool->pool_lock, flags);
if (pool->state != rxe_pool_valid) {
spin_unlock_irqrestore(&pool->pool_lock, flags);
return NULL;
}
kref_get(&pool->ref_cnt);
spin_unlock_irqrestore(&pool->pool_lock, flags);
kref_get(&pool->rxe->ref_cnt);
if (atomic_inc_return(&pool->num_elem) > pool->max_elem) {
atomic_dec(&pool->num_elem);
rxe_dev_put(pool->rxe);
rxe_pool_put(pool);
return NULL;
}
elem = kmem_cache_zalloc(pool_cache(pool),
(pool->flags & RXE_POOL_ATOMIC) ?
GFP_ATOMIC : GFP_KERNEL);
elem->pool = pool;
kref_init(&elem->ref_cnt);
return elem;
}
void rxe_elem_release(struct kref *kref)
{
struct rxe_pool_entry *elem =
container_of(kref, struct rxe_pool_entry, ref_cnt);
struct rxe_pool *pool = elem->pool;
if (pool->cleanup)
pool->cleanup(elem);
kmem_cache_free(pool_cache(pool), elem);
atomic_dec(&pool->num_elem);
rxe_dev_put(pool->rxe);
rxe_pool_put(pool);
}
void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
{
struct rb_node *node = NULL;
struct rxe_pool_entry *elem = NULL;
unsigned long flags;
spin_lock_irqsave(&pool->pool_lock, flags);
if (pool->state != rxe_pool_valid)
goto out;
node = pool->tree.rb_node;
while (node) {
elem = rb_entry(node, struct rxe_pool_entry, node);
if (elem->index > index)
node = node->rb_left;
else if (elem->index < index)
node = node->rb_right;
else
break;
}
if (node)
kref_get(&elem->ref_cnt);
out:
spin_unlock_irqrestore(&pool->pool_lock, flags);
return node ? (void *)elem : NULL;
}
void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
{
struct rb_node *node = NULL;
struct rxe_pool_entry *elem = NULL;
int cmp;
unsigned long flags;
spin_lock_irqsave(&pool->pool_lock, flags);
if (pool->state != rxe_pool_valid)
goto out;
node = pool->tree.rb_node;
while (node) {
elem = rb_entry(node, struct rxe_pool_entry, node);
cmp = memcmp((u8 *)elem + pool->key_offset,
key, pool->key_size);
if (cmp > 0)
node = node->rb_left;
else if (cmp < 0)
node = node->rb_right;
else
break;
}
if (node)
kref_get(&elem->ref_cnt);
out:
spin_unlock_irqrestore(&pool->pool_lock, flags);
return node ? ((void *)elem) : NULL;
}

View file

@ -0,0 +1,163 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef RXE_POOL_H
#define RXE_POOL_H
#define RXE_POOL_ALIGN (16)
#define RXE_POOL_CACHE_FLAGS (0)
enum rxe_pool_flags {
RXE_POOL_ATOMIC = BIT(0),
RXE_POOL_INDEX = BIT(1),
RXE_POOL_KEY = BIT(2),
};
enum rxe_elem_type {
RXE_TYPE_UC,
RXE_TYPE_PD,
RXE_TYPE_AH,
RXE_TYPE_SRQ,
RXE_TYPE_QP,
RXE_TYPE_CQ,
RXE_TYPE_MR,
RXE_TYPE_MW,
RXE_TYPE_MC_GRP,
RXE_TYPE_MC_ELEM,
RXE_NUM_TYPES, /* keep me last */
};
struct rxe_type_info {
char *name;
size_t size;
void (*cleanup)(void *obj);
enum rxe_pool_flags flags;
u32 max_index;
u32 min_index;
size_t key_offset;
size_t key_size;
struct kmem_cache *cache;
};
extern struct rxe_type_info rxe_type_info[];
enum rxe_pool_state {
rxe_pool_invalid,
rxe_pool_valid,
};
struct rxe_pool_entry {
struct rxe_pool *pool;
struct kref ref_cnt;
struct list_head list;
/* only used if indexed or keyed */
struct rb_node node;
u32 index;
};
struct rxe_pool {
struct rxe_dev *rxe;
spinlock_t pool_lock; /* pool spinlock */
size_t elem_size;
struct kref ref_cnt;
void (*cleanup)(void *obj);
enum rxe_pool_state state;
enum rxe_pool_flags flags;
enum rxe_elem_type type;
unsigned int max_elem;
atomic_t num_elem;
/* only used if indexed or keyed */
struct rb_root tree;
unsigned long *table;
size_t table_size;
u32 max_index;
u32 min_index;
u32 last;
size_t key_offset;
size_t key_size;
};
/* initialize slab caches for managed objects */
int rxe_cache_init(void);
/* cleanup slab caches for managed objects */
void rxe_cache_exit(void);
/* initialize a pool of objects with given limit on
* number of elements. gets parameters from rxe_type_info
* pool elements will be allocated out of a slab cache
*/
int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
enum rxe_elem_type type, u32 max_elem);
/* free resources from object pool */
int rxe_pool_cleanup(struct rxe_pool *pool);
/* allocate an object from pool */
void *rxe_alloc(struct rxe_pool *pool);
/* assign an index to an indexed object and insert object into
* pool's rb tree
*/
void rxe_add_index(void *elem);
/* drop an index and remove object from rb tree */
void rxe_drop_index(void *elem);
/* assign a key to a keyed object and insert object into
* pool's rb tree
*/
void rxe_add_key(void *elem, void *key);
/* remove elem from rb tree */
void rxe_drop_key(void *elem);
/* lookup an indexed object from index. takes a reference on object */
void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);
/* lookup keyed object from key. takes a reference on the object */
void *rxe_pool_get_key(struct rxe_pool *pool, void *key);
/* cleanup an object when all references are dropped */
void rxe_elem_release(struct kref *kref);
/* take a reference on an object */
#define rxe_add_ref(elem) kref_get(&(elem)->pelem.ref_cnt)
/* drop a reference on an object */
#define rxe_drop_ref(elem) kref_put(&(elem)->pelem.ref_cnt, rxe_elem_release)
#endif /* RXE_POOL_H */

View file

@ -0,0 +1,851 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/skbuff.h>
#include <linux/delay.h>
#include <linux/sched.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
#include "rxe_task.h"
char *rxe_qp_state_name[] = {
[QP_STATE_RESET] = "RESET",
[QP_STATE_INIT] = "INIT",
[QP_STATE_READY] = "READY",
[QP_STATE_DRAIN] = "DRAIN",
[QP_STATE_DRAINED] = "DRAINED",
[QP_STATE_ERROR] = "ERROR",
};
static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
int has_srq)
{
if (cap->max_send_wr > rxe->attr.max_qp_wr) {
pr_warn("invalid send wr = %d > %d\n",
cap->max_send_wr, rxe->attr.max_qp_wr);
goto err1;
}
if (cap->max_send_sge > rxe->attr.max_sge) {
pr_warn("invalid send sge = %d > %d\n",
cap->max_send_sge, rxe->attr.max_sge);
goto err1;
}
if (!has_srq) {
if (cap->max_recv_wr > rxe->attr.max_qp_wr) {
pr_warn("invalid recv wr = %d > %d\n",
cap->max_recv_wr, rxe->attr.max_qp_wr);
goto err1;
}
if (cap->max_recv_sge > rxe->attr.max_sge) {
pr_warn("invalid recv sge = %d > %d\n",
cap->max_recv_sge, rxe->attr.max_sge);
goto err1;
}
}
if (cap->max_inline_data > rxe->max_inline_data) {
pr_warn("invalid max inline data = %d > %d\n",
cap->max_inline_data, rxe->max_inline_data);
goto err1;
}
return 0;
err1:
return -EINVAL;
}
int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
{
struct ib_qp_cap *cap = &init->cap;
struct rxe_port *port;
int port_num = init->port_num;
if (!init->recv_cq || !init->send_cq) {
pr_warn("missing cq\n");
goto err1;
}
if (rxe_qp_chk_cap(rxe, cap, !!init->srq))
goto err1;
if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {
if (port_num != 1) {
pr_warn("invalid port = %d\n", port_num);
goto err1;
}
port = &rxe->port;
if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) {
pr_warn("SMI QP exists for port %d\n", port_num);
goto err1;
}
if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {
pr_warn("GSI QP exists for port %d\n", port_num);
goto err1;
}
}
return 0;
err1:
return -EINVAL;
}
static int alloc_rd_atomic_resources(struct rxe_qp *qp, unsigned int n)
{
qp->resp.res_head = 0;
qp->resp.res_tail = 0;
qp->resp.resources = kcalloc(n, sizeof(struct resp_res), GFP_KERNEL);
if (!qp->resp.resources)
return -ENOMEM;
return 0;
}
static void free_rd_atomic_resources(struct rxe_qp *qp)
{
if (qp->resp.resources) {
int i;
for (i = 0; i < qp->attr.max_rd_atomic; i++) {
struct resp_res *res = &qp->resp.resources[i];
free_rd_atomic_resource(qp, res);
}
kfree(qp->resp.resources);
qp->resp.resources = NULL;
}
}
void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res)
{
if (res->type == RXE_ATOMIC_MASK) {
rxe_drop_ref(qp);
kfree_skb(res->atomic.skb);
} else if (res->type == RXE_READ_MASK) {
if (res->read.mr)
rxe_drop_ref(res->read.mr);
}
res->type = 0;
}
static void cleanup_rd_atomic_resources(struct rxe_qp *qp)
{
int i;
struct resp_res *res;
if (qp->resp.resources) {
for (i = 0; i < qp->attr.max_rd_atomic; i++) {
res = &qp->resp.resources[i];
free_rd_atomic_resource(qp, res);
}
}
}
static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_init_attr *init)
{
struct rxe_port *port;
u32 qpn;
qp->sq_sig_type = init->sq_sig_type;
qp->attr.path_mtu = 1;
qp->mtu = ib_mtu_enum_to_int(qp->attr.path_mtu);
qpn = qp->pelem.index;
port = &rxe->port;
switch (init->qp_type) {
case IB_QPT_SMI:
qp->ibqp.qp_num = 0;
port->qp_smi_index = qpn;
qp->attr.port_num = init->port_num;
break;
case IB_QPT_GSI:
qp->ibqp.qp_num = 1;
port->qp_gsi_index = qpn;
qp->attr.port_num = init->port_num;
break;
default:
qp->ibqp.qp_num = qpn;
break;
}
INIT_LIST_HEAD(&qp->grp_list);
skb_queue_head_init(&qp->send_pkts);
spin_lock_init(&qp->grp_lock);
spin_lock_init(&qp->state_lock);
atomic_set(&qp->ssn, 0);
atomic_set(&qp->skb_out, 0);
}
static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_init_attr *init,
struct ib_ucontext *context, struct ib_udata *udata)
{
int err;
int wqe_size;
err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
if (err < 0)
return err;
qp->sk->sk->sk_user_data = qp;
qp->sq.max_wr = init->cap.max_send_wr;
qp->sq.max_sge = init->cap.max_send_sge;
qp->sq.max_inline = init->cap.max_inline_data;
wqe_size = max_t(int, sizeof(struct rxe_send_wqe) +
qp->sq.max_sge * sizeof(struct ib_sge),
sizeof(struct rxe_send_wqe) +
qp->sq.max_inline);
qp->sq.queue = rxe_queue_init(rxe,
&qp->sq.max_wr,
wqe_size);
if (!qp->sq.queue)
return -ENOMEM;
err = do_mmap_info(rxe, udata, true,
context, qp->sq.queue->buf,
qp->sq.queue->buf_size, &qp->sq.queue->ip);
if (err) {
kvfree(qp->sq.queue->buf);
kfree(qp->sq.queue);
return err;
}
qp->req.wqe_index = producer_index(qp->sq.queue);
qp->req.state = QP_STATE_RESET;
qp->req.opcode = -1;
qp->comp.opcode = -1;
spin_lock_init(&qp->sq.sq_lock);
skb_queue_head_init(&qp->req_pkts);
rxe_init_task(rxe, &qp->req.task, qp,
rxe_requester, "req");
rxe_init_task(rxe, &qp->comp.task, qp,
rxe_completer, "comp");
init_timer(&qp->rnr_nak_timer);
qp->rnr_nak_timer.function = rnr_nak_timer;
qp->rnr_nak_timer.data = (unsigned long)qp;
init_timer(&qp->retrans_timer);
qp->retrans_timer.function = retransmit_timer;
qp->retrans_timer.data = (unsigned long)qp;
qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
return 0;
}
static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_init_attr *init,
struct ib_ucontext *context, struct ib_udata *udata)
{
int err;
int wqe_size;
if (!qp->srq) {
qp->rq.max_wr = init->cap.max_recv_wr;
qp->rq.max_sge = init->cap.max_recv_sge;
wqe_size = rcv_wqe_size(qp->rq.max_sge);
pr_debug("max_wr = %d, max_sge = %d, wqe_size = %d\n",
qp->rq.max_wr, qp->rq.max_sge, wqe_size);
qp->rq.queue = rxe_queue_init(rxe,
&qp->rq.max_wr,
wqe_size);
if (!qp->rq.queue)
return -ENOMEM;
err = do_mmap_info(rxe, udata, false, context,
qp->rq.queue->buf,
qp->rq.queue->buf_size,
&qp->rq.queue->ip);
if (err) {
kvfree(qp->rq.queue->buf);
kfree(qp->rq.queue);
return err;
}
}
spin_lock_init(&qp->rq.producer_lock);
spin_lock_init(&qp->rq.consumer_lock);
skb_queue_head_init(&qp->resp_pkts);
rxe_init_task(rxe, &qp->resp.task, qp,
rxe_responder, "resp");
qp->resp.opcode = OPCODE_NONE;
qp->resp.msn = 0;
qp->resp.state = QP_STATE_RESET;
return 0;
}
/* called by the create qp verb */
int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct ib_qp_init_attr *init, struct ib_udata *udata,
struct ib_pd *ibpd)
{
int err;
struct rxe_cq *rcq = to_rcq(init->recv_cq);
struct rxe_cq *scq = to_rcq(init->send_cq);
struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
rxe_add_ref(pd);
rxe_add_ref(rcq);
rxe_add_ref(scq);
if (srq)
rxe_add_ref(srq);
qp->pd = pd;
qp->rcq = rcq;
qp->scq = scq;
qp->srq = srq;
rxe_qp_init_misc(rxe, qp, init);
err = rxe_qp_init_req(rxe, qp, init, context, udata);
if (err)
goto err1;
err = rxe_qp_init_resp(rxe, qp, init, context, udata);
if (err)
goto err2;
qp->attr.qp_state = IB_QPS_RESET;
qp->valid = 1;
return 0;
err2:
rxe_queue_cleanup(qp->sq.queue);
err1:
if (srq)
rxe_drop_ref(srq);
rxe_drop_ref(scq);
rxe_drop_ref(rcq);
rxe_drop_ref(pd);
return err;
}
/* called by the query qp verb */
int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init)
{
init->event_handler = qp->ibqp.event_handler;
init->qp_context = qp->ibqp.qp_context;
init->send_cq = qp->ibqp.send_cq;
init->recv_cq = qp->ibqp.recv_cq;
init->srq = qp->ibqp.srq;
init->cap.max_send_wr = qp->sq.max_wr;
init->cap.max_send_sge = qp->sq.max_sge;
init->cap.max_inline_data = qp->sq.max_inline;
if (!qp->srq) {
init->cap.max_recv_wr = qp->rq.max_wr;
init->cap.max_recv_sge = qp->rq.max_sge;
}
init->sq_sig_type = qp->sq_sig_type;
init->qp_type = qp->ibqp.qp_type;
init->port_num = 1;
return 0;
}
/* called by the modify qp verb, this routine checks all the parameters before
* making any changes
*/
int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_attr *attr, int mask)
{
enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ?
attr->cur_qp_state : qp->attr.qp_state;
enum ib_qp_state new_state = (mask & IB_QP_STATE) ?
attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask,
IB_LINK_LAYER_ETHERNET)) {
pr_warn("invalid mask or state for qp\n");
goto err1;
}
if (mask & IB_QP_STATE) {
if (cur_state == IB_QPS_SQD) {
if (qp->req.state == QP_STATE_DRAIN &&
new_state != IB_QPS_ERR)
goto err1;
}
}
if (mask & IB_QP_PORT) {
if (attr->port_num != 1) {
pr_warn("invalid port %d\n", attr->port_num);
goto err1;
}
}
if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq))
goto err1;
if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr))
goto err1;
if (mask & IB_QP_ALT_PATH) {
if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
goto err1;
if (attr->alt_port_num != 1) {
pr_warn("invalid alt port %d\n", attr->alt_port_num);
goto err1;
}
if (attr->alt_timeout > 31) {
pr_warn("invalid QP alt timeout %d > 31\n",
attr->alt_timeout);
goto err1;
}
}
if (mask & IB_QP_PATH_MTU) {
struct rxe_port *port = &rxe->port;
enum ib_mtu max_mtu = port->attr.max_mtu;
enum ib_mtu mtu = attr->path_mtu;
if (mtu > max_mtu) {
pr_debug("invalid mtu (%d) > (%d)\n",
ib_mtu_enum_to_int(mtu),
ib_mtu_enum_to_int(max_mtu));
goto err1;
}
}
if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) {
pr_warn("invalid max_rd_atomic %d > %d\n",
attr->max_rd_atomic,
rxe->attr.max_qp_rd_atom);
goto err1;
}
}
if (mask & IB_QP_TIMEOUT) {
if (attr->timeout > 31) {
pr_warn("invalid QP timeout %d > 31\n",
attr->timeout);
goto err1;
}
}
return 0;
err1:
return -EINVAL;
}
/* move the qp to the reset state */
static void rxe_qp_reset(struct rxe_qp *qp)
{
/* stop tasks from running */
rxe_disable_task(&qp->resp.task);
/* stop request/comp */
if (qp->sq.queue) {
if (qp_type(qp) == IB_QPT_RC)
rxe_disable_task(&qp->comp.task);
rxe_disable_task(&qp->req.task);
}
/* move qp to the reset state */
qp->req.state = QP_STATE_RESET;
qp->resp.state = QP_STATE_RESET;
/* let state machines reset themselves drain work and packet queues
* etc.
*/
__rxe_do_task(&qp->resp.task);
if (qp->sq.queue) {
__rxe_do_task(&qp->comp.task);
__rxe_do_task(&qp->req.task);
}
/* cleanup attributes */
atomic_set(&qp->ssn, 0);
qp->req.opcode = -1;
qp->req.need_retry = 0;
qp->req.noack_pkts = 0;
qp->resp.msn = 0;
qp->resp.opcode = -1;
qp->resp.drop_msg = 0;
qp->resp.goto_error = 0;
qp->resp.sent_psn_nak = 0;
if (qp->resp.mr) {
rxe_drop_ref(qp->resp.mr);
qp->resp.mr = NULL;
}
cleanup_rd_atomic_resources(qp);
/* reenable tasks */
rxe_enable_task(&qp->resp.task);
if (qp->sq.queue) {
if (qp_type(qp) == IB_QPT_RC)
rxe_enable_task(&qp->comp.task);
rxe_enable_task(&qp->req.task);
}
}
/* drain the send queue */
static void rxe_qp_drain(struct rxe_qp *qp)
{
if (qp->sq.queue) {
if (qp->req.state != QP_STATE_DRAINED) {
qp->req.state = QP_STATE_DRAIN;
if (qp_type(qp) == IB_QPT_RC)
rxe_run_task(&qp->comp.task, 1);
else
__rxe_do_task(&qp->comp.task);
rxe_run_task(&qp->req.task, 1);
}
}
}
/* move the qp to the error state */
void rxe_qp_error(struct rxe_qp *qp)
{
qp->req.state = QP_STATE_ERROR;
qp->resp.state = QP_STATE_ERROR;
/* drain work and packet queues */
rxe_run_task(&qp->resp.task, 1);
if (qp_type(qp) == IB_QPT_RC)
rxe_run_task(&qp->comp.task, 1);
else
__rxe_do_task(&qp->comp.task);
rxe_run_task(&qp->req.task, 1);
}
/* called by the modify qp verb */
int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
struct ib_udata *udata)
{
int err;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic);
free_rd_atomic_resources(qp);
err = alloc_rd_atomic_resources(qp, max_rd_atomic);
if (err)
return err;
qp->attr.max_rd_atomic = max_rd_atomic;
atomic_set(&qp->req.rd_atomic, max_rd_atomic);
}
if (mask & IB_QP_CUR_STATE)
qp->attr.cur_qp_state = attr->qp_state;
if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify;
if (mask & IB_QP_ACCESS_FLAGS)
qp->attr.qp_access_flags = attr->qp_access_flags;
if (mask & IB_QP_PKEY_INDEX)
qp->attr.pkey_index = attr->pkey_index;
if (mask & IB_QP_PORT)
qp->attr.port_num = attr->port_num;
if (mask & IB_QP_QKEY)
qp->attr.qkey = attr->qkey;
if (mask & IB_QP_AV) {
ib_get_cached_gid(&rxe->ib_dev, 1,
attr->ah_attr.grh.sgid_index, &sgid,
&sgid_attr);
rxe_av_from_attr(rxe, attr->port_num, &qp->pri_av,
&attr->ah_attr);
rxe_av_fill_ip_info(rxe, &qp->pri_av, &attr->ah_attr,
&sgid_attr, &sgid);
if (sgid_attr.ndev)
dev_put(sgid_attr.ndev);
}
if (mask & IB_QP_ALT_PATH) {
ib_get_cached_gid(&rxe->ib_dev, 1,
attr->alt_ah_attr.grh.sgid_index, &sgid,
&sgid_attr);
rxe_av_from_attr(rxe, attr->alt_port_num, &qp->alt_av,
&attr->alt_ah_attr);
rxe_av_fill_ip_info(rxe, &qp->alt_av, &attr->alt_ah_attr,
&sgid_attr, &sgid);
if (sgid_attr.ndev)
dev_put(sgid_attr.ndev);
qp->attr.alt_port_num = attr->alt_port_num;
qp->attr.alt_pkey_index = attr->alt_pkey_index;
qp->attr.alt_timeout = attr->alt_timeout;
}
if (mask & IB_QP_PATH_MTU) {
qp->attr.path_mtu = attr->path_mtu;
qp->mtu = ib_mtu_enum_to_int(attr->path_mtu);
}
if (mask & IB_QP_TIMEOUT) {
qp->attr.timeout = attr->timeout;
if (attr->timeout == 0) {
qp->qp_timeout_jiffies = 0;
} else {
/* According to the spec, timeout = 4.096 * 2 ^ attr->timeout [us] */
int j = nsecs_to_jiffies(4096ULL << attr->timeout);
qp->qp_timeout_jiffies = j ? j : 1;
}
}
if (mask & IB_QP_RETRY_CNT) {
qp->attr.retry_cnt = attr->retry_cnt;
qp->comp.retry_cnt = attr->retry_cnt;
pr_debug("set retry count = %d\n", attr->retry_cnt);
}
if (mask & IB_QP_RNR_RETRY) {
qp->attr.rnr_retry = attr->rnr_retry;
qp->comp.rnr_retry = attr->rnr_retry;
pr_debug("set rnr retry count = %d\n", attr->rnr_retry);
}
if (mask & IB_QP_RQ_PSN) {
qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK);
qp->resp.psn = qp->attr.rq_psn;
pr_debug("set resp psn = 0x%x\n", qp->resp.psn);
}
if (mask & IB_QP_MIN_RNR_TIMER) {
qp->attr.min_rnr_timer = attr->min_rnr_timer;
pr_debug("set min rnr timer = 0x%x\n",
attr->min_rnr_timer);
}
if (mask & IB_QP_SQ_PSN) {
qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK);
qp->req.psn = qp->attr.sq_psn;
qp->comp.psn = qp->attr.sq_psn;
pr_debug("set req psn = 0x%x\n", qp->req.psn);
}
if (mask & IB_QP_MAX_DEST_RD_ATOMIC) {
qp->attr.max_dest_rd_atomic =
__roundup_pow_of_two(attr->max_dest_rd_atomic);
}
if (mask & IB_QP_PATH_MIG_STATE)
qp->attr.path_mig_state = attr->path_mig_state;
if (mask & IB_QP_DEST_QPN)
qp->attr.dest_qp_num = attr->dest_qp_num;
if (mask & IB_QP_STATE) {
qp->attr.qp_state = attr->qp_state;
switch (attr->qp_state) {
case IB_QPS_RESET:
pr_debug("qp state -> RESET\n");
rxe_qp_reset(qp);
break;
case IB_QPS_INIT:
pr_debug("qp state -> INIT\n");
qp->req.state = QP_STATE_INIT;
qp->resp.state = QP_STATE_INIT;
break;
case IB_QPS_RTR:
pr_debug("qp state -> RTR\n");
qp->resp.state = QP_STATE_READY;
break;
case IB_QPS_RTS:
pr_debug("qp state -> RTS\n");
qp->req.state = QP_STATE_READY;
break;
case IB_QPS_SQD:
pr_debug("qp state -> SQD\n");
rxe_qp_drain(qp);
break;
case IB_QPS_SQE:
pr_warn("qp state -> SQE !!?\n");
/* Not possible from modify_qp. */
break;
case IB_QPS_ERR:
pr_debug("qp state -> ERR\n");
rxe_qp_error(qp);
break;
}
}
return 0;
}
/* called by the query qp verb */
int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
*attr = qp->attr;
attr->rq_psn = qp->resp.psn;
attr->sq_psn = qp->req.psn;
attr->cap.max_send_wr = qp->sq.max_wr;
attr->cap.max_send_sge = qp->sq.max_sge;
attr->cap.max_inline_data = qp->sq.max_inline;
if (!qp->srq) {
attr->cap.max_recv_wr = qp->rq.max_wr;
attr->cap.max_recv_sge = qp->rq.max_sge;
}
rxe_av_to_attr(rxe, &qp->pri_av, &attr->ah_attr);
rxe_av_to_attr(rxe, &qp->alt_av, &attr->alt_ah_attr);
if (qp->req.state == QP_STATE_DRAIN) {
attr->sq_draining = 1;
/* applications that get this state
* typically spin on it. yield the
* processor
*/
cond_resched();
} else {
attr->sq_draining = 0;
}
pr_debug("attr->sq_draining = %d\n", attr->sq_draining);
return 0;
}
/* called by the destroy qp verb */
void rxe_qp_destroy(struct rxe_qp *qp)
{
qp->valid = 0;
qp->qp_timeout_jiffies = 0;
rxe_cleanup_task(&qp->resp.task);
del_timer_sync(&qp->retrans_timer);
del_timer_sync(&qp->rnr_nak_timer);
rxe_cleanup_task(&qp->req.task);
if (qp_type(qp) == IB_QPT_RC)
rxe_cleanup_task(&qp->comp.task);
/* flush out any receive wr's or pending requests */
__rxe_do_task(&qp->req.task);
if (qp->sq.queue) {
__rxe_do_task(&qp->comp.task);
__rxe_do_task(&qp->req.task);
}
}
/* called when the last reference to the qp is dropped */
void rxe_qp_cleanup(void *arg)
{
struct rxe_qp *qp = arg;
rxe_drop_all_mcast_groups(qp);
if (qp->sq.queue)
rxe_queue_cleanup(qp->sq.queue);
if (qp->srq)
rxe_drop_ref(qp->srq);
if (qp->rq.queue)
rxe_queue_cleanup(qp->rq.queue);
if (qp->scq)
rxe_drop_ref(qp->scq);
if (qp->rcq)
rxe_drop_ref(qp->rcq);
if (qp->pd)
rxe_drop_ref(qp->pd);
if (qp->resp.mr) {
rxe_drop_ref(qp->resp.mr);
qp->resp.mr = NULL;
}
free_rd_atomic_resources(qp);
kernel_sock_shutdown(qp->sk, SHUT_RDWR);
}

View file

@ -0,0 +1,217 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must retailuce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/vmalloc.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
int do_mmap_info(struct rxe_dev *rxe,
struct ib_udata *udata,
bool is_req,
struct ib_ucontext *context,
struct rxe_queue_buf *buf,
size_t buf_size,
struct rxe_mmap_info **ip_p)
{
int err;
u32 len, offset;
struct rxe_mmap_info *ip = NULL;
if (udata) {
if (is_req) {
len = udata->outlen - sizeof(struct mminfo);
offset = sizeof(struct mminfo);
} else {
len = udata->outlen;
offset = 0;
}
if (len < sizeof(ip->info))
goto err1;
ip = rxe_create_mmap_info(rxe, buf_size, context, buf);
if (!ip)
goto err1;
err = copy_to_user(udata->outbuf + offset, &ip->info,
sizeof(ip->info));
if (err)
goto err2;
spin_lock_bh(&rxe->pending_lock);
list_add(&ip->pending_mmaps, &rxe->pending_mmaps);
spin_unlock_bh(&rxe->pending_lock);
}
*ip_p = ip;
return 0;
err2:
kfree(ip);
err1:
return -EINVAL;
}
struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
int *num_elem,
unsigned int elem_size)
{
struct rxe_queue *q;
size_t buf_size;
unsigned int num_slots;
/* num_elem == 0 is allowed, but uninteresting */
if (*num_elem < 0)
goto err1;
q = kmalloc(sizeof(*q), GFP_KERNEL);
if (!q)
goto err1;
q->rxe = rxe;
/* used in resize, only need to copy used part of queue */
q->elem_size = elem_size;
/* pad element up to at least a cacheline and always a power of 2 */
if (elem_size < cache_line_size())
elem_size = cache_line_size();
elem_size = roundup_pow_of_two(elem_size);
q->log2_elem_size = order_base_2(elem_size);
num_slots = *num_elem + 1;
num_slots = roundup_pow_of_two(num_slots);
q->index_mask = num_slots - 1;
buf_size = sizeof(struct rxe_queue_buf) + num_slots * elem_size;
q->buf = vmalloc_user(buf_size);
if (!q->buf)
goto err2;
q->buf->log2_elem_size = q->log2_elem_size;
q->buf->index_mask = q->index_mask;
q->buf_size = buf_size;
*num_elem = num_slots - 1;
return q;
err2:
kfree(q);
err1:
return NULL;
}
/* copies elements from original q to new q and then swaps the contents of the
* two q headers. This is so that if anyone is holding a pointer to q it will
* still work
*/
static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q,
unsigned int num_elem)
{
if (!queue_empty(q) && (num_elem < queue_count(q)))
return -EINVAL;
while (!queue_empty(q)) {
memcpy(producer_addr(new_q), consumer_addr(q),
new_q->elem_size);
advance_producer(new_q);
advance_consumer(q);
}
swap(*q, *new_q);
return 0;
}
int rxe_queue_resize(struct rxe_queue *q,
unsigned int *num_elem_p,
unsigned int elem_size,
struct ib_ucontext *context,
struct ib_udata *udata,
spinlock_t *producer_lock,
spinlock_t *consumer_lock)
{
struct rxe_queue *new_q;
unsigned int num_elem = *num_elem_p;
int err;
unsigned long flags = 0, flags1;
new_q = rxe_queue_init(q->rxe, &num_elem, elem_size);
if (!new_q)
return -ENOMEM;
err = do_mmap_info(new_q->rxe, udata, false, context, new_q->buf,
new_q->buf_size, &new_q->ip);
if (err) {
vfree(new_q->buf);
kfree(new_q);
goto err1;
}
spin_lock_irqsave(consumer_lock, flags1);
if (producer_lock) {
spin_lock_irqsave(producer_lock, flags);
err = resize_finish(q, new_q, num_elem);
spin_unlock_irqrestore(producer_lock, flags);
} else {
err = resize_finish(q, new_q, num_elem);
}
spin_unlock_irqrestore(consumer_lock, flags1);
rxe_queue_cleanup(new_q); /* new/old dep on err */
if (err)
goto err1;
*num_elem_p = num_elem;
return 0;
err1:
return err;
}
void rxe_queue_cleanup(struct rxe_queue *q)
{
if (q->ip)
kref_put(&q->ip->ref, rxe_mmap_release);
else
vfree(q->buf);
kfree(q);
}

View file

@ -0,0 +1,178 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef RXE_QUEUE_H
#define RXE_QUEUE_H
/* implements a simple circular buffer that can optionally be
* shared between user space and the kernel and can be resized
* the requested element size is rounded up to a power of 2
* and the number of elements in the buffer is also rounded
* up to a power of 2. Since the queue is empty when the
* producer and consumer indices match the maximum capacity
* of the queue is one less than the number of element slots
*/
/* this data structure is shared between user space and kernel
* space for those cases where the queue is shared. It contains
* the producer and consumer indices. Is also contains a copy
* of the queue size parameters for user space to use but the
* kernel must use the parameters in the rxe_queue struct
* this MUST MATCH the corresponding librxe struct
* for performance reasons arrange to have producer and consumer
* pointers in separate cache lines
* the kernel should always mask the indices to avoid accessing
* memory outside of the data area
*/
struct rxe_queue_buf {
__u32 log2_elem_size;
__u32 index_mask;
__u32 pad_1[30];
__u32 producer_index;
__u32 pad_2[31];
__u32 consumer_index;
__u32 pad_3[31];
__u8 data[0];
};
struct rxe_queue {
struct rxe_dev *rxe;
struct rxe_queue_buf *buf;
struct rxe_mmap_info *ip;
size_t buf_size;
size_t elem_size;
unsigned int log2_elem_size;
unsigned int index_mask;
};
int do_mmap_info(struct rxe_dev *rxe,
struct ib_udata *udata,
bool is_req,
struct ib_ucontext *context,
struct rxe_queue_buf *buf,
size_t buf_size,
struct rxe_mmap_info **ip_p);
struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
int *num_elem,
unsigned int elem_size);
int rxe_queue_resize(struct rxe_queue *q,
unsigned int *num_elem_p,
unsigned int elem_size,
struct ib_ucontext *context,
struct ib_udata *udata,
/* Protect producers while resizing queue */
spinlock_t *producer_lock,
/* Protect consumers while resizing queue */
spinlock_t *consumer_lock);
void rxe_queue_cleanup(struct rxe_queue *queue);
static inline int next_index(struct rxe_queue *q, int index)
{
return (index + 1) & q->buf->index_mask;
}
static inline int queue_empty(struct rxe_queue *q)
{
return ((q->buf->producer_index - q->buf->consumer_index)
& q->index_mask) == 0;
}
static inline int queue_full(struct rxe_queue *q)
{
return ((q->buf->producer_index + 1 - q->buf->consumer_index)
& q->index_mask) == 0;
}
static inline void advance_producer(struct rxe_queue *q)
{
q->buf->producer_index = (q->buf->producer_index + 1)
& q->index_mask;
}
static inline void advance_consumer(struct rxe_queue *q)
{
q->buf->consumer_index = (q->buf->consumer_index + 1)
& q->index_mask;
}
static inline void *producer_addr(struct rxe_queue *q)
{
return q->buf->data + ((q->buf->producer_index & q->index_mask)
<< q->log2_elem_size);
}
static inline void *consumer_addr(struct rxe_queue *q)
{
return q->buf->data + ((q->buf->consumer_index & q->index_mask)
<< q->log2_elem_size);
}
static inline unsigned int producer_index(struct rxe_queue *q)
{
return q->buf->producer_index;
}
static inline unsigned int consumer_index(struct rxe_queue *q)
{
return q->buf->consumer_index;
}
static inline void *addr_from_index(struct rxe_queue *q, unsigned int index)
{
return q->buf->data + ((index & q->index_mask)
<< q->buf->log2_elem_size);
}
static inline unsigned int index_from_addr(const struct rxe_queue *q,
const void *addr)
{
return (((u8 *)addr - q->buf->data) >> q->log2_elem_size)
& q->index_mask;
}
static inline unsigned int queue_count(const struct rxe_queue *q)
{
return (q->buf->producer_index - q->buf->consumer_index)
& q->index_mask;
}
static inline void *queue_head(struct rxe_queue *q)
{
return queue_empty(q) ? NULL : consumer_addr(q);
}
#endif /* RXE_QUEUE_H */

View file

@ -0,0 +1,420 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/skbuff.h>
#include "rxe.h"
#include "rxe_loc.h"
static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct rxe_qp *qp)
{
if (unlikely(!qp->valid))
goto err1;
switch (qp_type(qp)) {
case IB_QPT_RC:
if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) {
pr_warn_ratelimited("bad qp type\n");
goto err1;
}
break;
case IB_QPT_UC:
if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) {
pr_warn_ratelimited("bad qp type\n");
goto err1;
}
break;
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) {
pr_warn_ratelimited("bad qp type\n");
goto err1;
}
break;
default:
pr_warn_ratelimited("unsupported qp type\n");
goto err1;
}
if (pkt->mask & RXE_REQ_MASK) {
if (unlikely(qp->resp.state != QP_STATE_READY))
goto err1;
} else if (unlikely(qp->req.state < QP_STATE_READY ||
qp->req.state > QP_STATE_DRAINED)) {
goto err1;
}
return 0;
err1:
return -EINVAL;
}
static void set_bad_pkey_cntr(struct rxe_port *port)
{
spin_lock_bh(&port->port_lock);
port->attr.bad_pkey_cntr = min((u32)0xffff,
port->attr.bad_pkey_cntr + 1);
spin_unlock_bh(&port->port_lock);
}
static void set_qkey_viol_cntr(struct rxe_port *port)
{
spin_lock_bh(&port->port_lock);
port->attr.qkey_viol_cntr = min((u32)0xffff,
port->attr.qkey_viol_cntr + 1);
spin_unlock_bh(&port->port_lock);
}
static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
u32 qpn, struct rxe_qp *qp)
{
int i;
int found_pkey = 0;
struct rxe_port *port = &rxe->port;
u16 pkey = bth_pkey(pkt);
pkt->pkey_index = 0;
if (qpn == 1) {
for (i = 0; i < port->attr.pkey_tbl_len; i++) {
if (pkey_match(pkey, port->pkey_tbl[i])) {
pkt->pkey_index = i;
found_pkey = 1;
break;
}
}
if (!found_pkey) {
pr_warn_ratelimited("bad pkey = 0x%x\n", pkey);
set_bad_pkey_cntr(port);
goto err1;
}
} else if (qpn != 0) {
if (unlikely(!pkey_match(pkey,
port->pkey_tbl[qp->attr.pkey_index]
))) {
pr_warn_ratelimited("bad pkey = 0x%0x\n", pkey);
set_bad_pkey_cntr(port);
goto err1;
}
pkt->pkey_index = qp->attr.pkey_index;
}
if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) &&
qpn != 0 && pkt->mask) {
u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey;
if (unlikely(deth_qkey(pkt) != qkey)) {
pr_warn_ratelimited("bad qkey, got 0x%x expected 0x%x for qpn 0x%x\n",
deth_qkey(pkt), qkey, qpn);
set_qkey_viol_cntr(port);
goto err1;
}
}
return 0;
err1:
return -EINVAL;
}
static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct rxe_qp *qp)
{
struct sk_buff *skb = PKT_TO_SKB(pkt);
if (qp_type(qp) != IB_QPT_RC && qp_type(qp) != IB_QPT_UC)
goto done;
if (unlikely(pkt->port_num != qp->attr.port_num)) {
pr_warn_ratelimited("port %d != qp port %d\n",
pkt->port_num, qp->attr.port_num);
goto err1;
}
if (skb->protocol == htons(ETH_P_IP)) {
struct in_addr *saddr =
&qp->pri_av.sgid_addr._sockaddr_in.sin_addr;
struct in_addr *daddr =
&qp->pri_av.dgid_addr._sockaddr_in.sin_addr;
if (ip_hdr(skb)->daddr != saddr->s_addr) {
pr_warn_ratelimited("dst addr %pI4 != qp source addr %pI4\n",
&ip_hdr(skb)->daddr,
&saddr->s_addr);
goto err1;
}
if (ip_hdr(skb)->saddr != daddr->s_addr) {
pr_warn_ratelimited("source addr %pI4 != qp dst addr %pI4\n",
&ip_hdr(skb)->saddr,
&daddr->s_addr);
goto err1;
}
} else if (skb->protocol == htons(ETH_P_IPV6)) {
struct in6_addr *saddr =
&qp->pri_av.sgid_addr._sockaddr_in6.sin6_addr;
struct in6_addr *daddr =
&qp->pri_av.dgid_addr._sockaddr_in6.sin6_addr;
if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr))) {
pr_warn_ratelimited("dst addr %pI6 != qp source addr %pI6\n",
&ipv6_hdr(skb)->daddr, saddr);
goto err1;
}
if (memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) {
pr_warn_ratelimited("source addr %pI6 != qp dst addr %pI6\n",
&ipv6_hdr(skb)->saddr, daddr);
goto err1;
}
}
done:
return 0;
err1:
return -EINVAL;
}
static int hdr_check(struct rxe_pkt_info *pkt)
{
struct rxe_dev *rxe = pkt->rxe;
struct rxe_port *port = &rxe->port;
struct rxe_qp *qp = NULL;
u32 qpn = bth_qpn(pkt);
int index;
int err;
if (unlikely(bth_tver(pkt) != BTH_TVER)) {
pr_warn_ratelimited("bad tver\n");
goto err1;
}
if (qpn != IB_MULTICAST_QPN) {
index = (qpn == 0) ? port->qp_smi_index :
((qpn == 1) ? port->qp_gsi_index : qpn);
qp = rxe_pool_get_index(&rxe->qp_pool, index);
if (unlikely(!qp)) {
pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn);
goto err1;
}
err = check_type_state(rxe, pkt, qp);
if (unlikely(err))
goto err2;
err = check_addr(rxe, pkt, qp);
if (unlikely(err))
goto err2;
err = check_keys(rxe, pkt, qpn, qp);
if (unlikely(err))
goto err2;
} else {
if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) {
pr_warn_ratelimited("no grh for mcast qpn\n");
goto err1;
}
}
pkt->qp = qp;
return 0;
err2:
if (qp)
rxe_drop_ref(qp);
err1:
return -EINVAL;
}
static inline void rxe_rcv_pkt(struct rxe_dev *rxe,
struct rxe_pkt_info *pkt,
struct sk_buff *skb)
{
if (pkt->mask & RXE_REQ_MASK)
rxe_resp_queue_pkt(rxe, pkt->qp, skb);
else
rxe_comp_queue_pkt(rxe, pkt->qp, skb);
}
static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
{
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
struct rxe_mc_grp *mcg;
struct sk_buff *skb_copy;
struct rxe_mc_elem *mce;
struct rxe_qp *qp;
union ib_gid dgid;
int err;
if (skb->protocol == htons(ETH_P_IP))
ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
(struct in6_addr *)&dgid);
else if (skb->protocol == htons(ETH_P_IPV6))
memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid));
/* lookup mcast group corresponding to mgid, takes a ref */
mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);
if (!mcg)
goto err1; /* mcast group not registered */
spin_lock_bh(&mcg->mcg_lock);
list_for_each_entry(mce, &mcg->qp_list, qp_list) {
qp = mce->qp;
pkt = SKB_TO_PKT(skb);
/* validate qp for incoming packet */
err = check_type_state(rxe, pkt, qp);
if (err)
continue;
err = check_keys(rxe, pkt, bth_qpn(pkt), qp);
if (err)
continue;
/* if *not* the last qp in the list
* make a copy of the skb to post to the next qp
*/
skb_copy = (mce->qp_list.next != &mcg->qp_list) ?
skb_clone(skb, GFP_KERNEL) : NULL;
pkt->qp = qp;
rxe_add_ref(qp);
rxe_rcv_pkt(rxe, pkt, skb);
skb = skb_copy;
if (!skb)
break;
}
spin_unlock_bh(&mcg->mcg_lock);
rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
err1:
if (skb)
kfree_skb(skb);
}
static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
{
union ib_gid dgid;
union ib_gid *pdgid;
u16 index;
if (skb->protocol == htons(ETH_P_IP)) {
ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
(struct in6_addr *)&dgid);
pdgid = &dgid;
} else {
pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr;
}
return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid,
IB_GID_TYPE_ROCE_UDP_ENCAP,
1, rxe->ndev, &index);
}
/* rxe_rcv is called from the interface driver */
int rxe_rcv(struct sk_buff *skb)
{
int err;
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
struct rxe_dev *rxe = pkt->rxe;
__be32 *icrcp;
u32 calc_icrc, pack_icrc;
pkt->offset = 0;
if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES))
goto drop;
if (unlikely(rxe_match_dgid(rxe, skb) < 0)) {
pr_warn_ratelimited("failed matching dgid\n");
goto drop;
}
pkt->opcode = bth_opcode(pkt);
pkt->psn = bth_psn(pkt);
pkt->qp = NULL;
pkt->mask |= rxe_opcode[pkt->opcode].mask;
if (unlikely(skb->len < header_size(pkt)))
goto drop;
err = hdr_check(pkt);
if (unlikely(err))
goto drop;
/* Verify ICRC */
icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE);
pack_icrc = be32_to_cpu(*icrcp);
calc_icrc = rxe_icrc_hdr(pkt, skb);
calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), payload_size(pkt));
calc_icrc = cpu_to_be32(~calc_icrc);
if (unlikely(calc_icrc != pack_icrc)) {
char saddr[sizeof(struct in6_addr)];
if (skb->protocol == htons(ETH_P_IPV6))
sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr);
else if (skb->protocol == htons(ETH_P_IP))
sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr);
else
sprintf(saddr, "unknown");
pr_warn_ratelimited("bad ICRC from %s\n", saddr);
goto drop;
}
if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN))
rxe_rcv_mcast_pkt(rxe, skb);
else
rxe_rcv_pkt(rxe, pkt, skb);
return 0;
drop:
if (pkt->qp)
rxe_drop_ref(pkt->qp);
kfree_skb(skb);
return 0;
}
EXPORT_SYMBOL(rxe_rcv);

View file

@ -0,0 +1,726 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/skbuff.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
unsigned opcode);
static inline void retry_first_write_send(struct rxe_qp *qp,
struct rxe_send_wqe *wqe,
unsigned mask, int npsn)
{
int i;
for (i = 0; i < npsn; i++) {
int to_send = (wqe->dma.resid > qp->mtu) ?
qp->mtu : wqe->dma.resid;
qp->req.opcode = next_opcode(qp, wqe,
wqe->wr.opcode);
if (wqe->wr.send_flags & IB_SEND_INLINE) {
wqe->dma.resid -= to_send;
wqe->dma.sge_offset += to_send;
} else {
advance_dma_data(&wqe->dma, to_send);
}
if (mask & WR_WRITE_MASK)
wqe->iova += qp->mtu;
}
}
static void req_retry(struct rxe_qp *qp)
{
struct rxe_send_wqe *wqe;
unsigned int wqe_index;
unsigned int mask;
int npsn;
int first = 1;
wqe = queue_head(qp->sq.queue);
npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK;
qp->req.wqe_index = consumer_index(qp->sq.queue);
qp->req.psn = qp->comp.psn;
qp->req.opcode = -1;
for (wqe_index = consumer_index(qp->sq.queue);
wqe_index != producer_index(qp->sq.queue);
wqe_index = next_index(qp->sq.queue, wqe_index)) {
wqe = addr_from_index(qp->sq.queue, wqe_index);
mask = wr_opcode_mask(wqe->wr.opcode, qp);
if (wqe->state == wqe_state_posted)
break;
if (wqe->state == wqe_state_done)
continue;
wqe->iova = (mask & WR_ATOMIC_MASK) ?
wqe->wr.wr.atomic.remote_addr :
(mask & WR_READ_OR_WRITE_MASK) ?
wqe->wr.wr.rdma.remote_addr :
0;
if (!first || (mask & WR_READ_MASK) == 0) {
wqe->dma.resid = wqe->dma.length;
wqe->dma.cur_sge = 0;
wqe->dma.sge_offset = 0;
}
if (first) {
first = 0;
if (mask & WR_WRITE_OR_SEND_MASK)
retry_first_write_send(qp, wqe, mask, npsn);
if (mask & WR_READ_MASK)
wqe->iova += npsn * qp->mtu;
}
wqe->state = wqe_state_posted;
}
}
void rnr_nak_timer(unsigned long data)
{
struct rxe_qp *qp = (struct rxe_qp *)data;
pr_debug("rnr nak timer fired\n");
rxe_run_task(&qp->req.task, 1);
}
static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
{
struct rxe_send_wqe *wqe = queue_head(qp->sq.queue);
unsigned long flags;
if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
/* check to see if we are drained;
* state_lock used by requester and completer
*/
spin_lock_irqsave(&qp->state_lock, flags);
do {
if (qp->req.state != QP_STATE_DRAIN) {
/* comp just finished */
spin_unlock_irqrestore(&qp->state_lock,
flags);
break;
}
if (wqe && ((qp->req.wqe_index !=
consumer_index(qp->sq.queue)) ||
(wqe->state != wqe_state_posted))) {
/* comp not done yet */
spin_unlock_irqrestore(&qp->state_lock,
flags);
break;
}
qp->req.state = QP_STATE_DRAINED;
spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->ibqp.event_handler) {
struct ib_event ev;
ev.device = qp->ibqp.device;
ev.element.qp = &qp->ibqp;
ev.event = IB_EVENT_SQ_DRAINED;
qp->ibqp.event_handler(&ev,
qp->ibqp.qp_context);
}
} while (0);
}
if (qp->req.wqe_index == producer_index(qp->sq.queue))
return NULL;
wqe = addr_from_index(qp->sq.queue, qp->req.wqe_index);
if (unlikely((qp->req.state == QP_STATE_DRAIN ||
qp->req.state == QP_STATE_DRAINED) &&
(wqe->state != wqe_state_processing)))
return NULL;
if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) &&
(qp->req.wqe_index != consumer_index(qp->sq.queue)))) {
qp->req.wait_fence = 1;
return NULL;
}
wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
return wqe;
}
static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits)
{
switch (opcode) {
case IB_WR_RDMA_WRITE:
if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
return fits ?
IB_OPCODE_RC_RDMA_WRITE_LAST :
IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
else
return fits ?
IB_OPCODE_RC_RDMA_WRITE_ONLY :
IB_OPCODE_RC_RDMA_WRITE_FIRST;
case IB_WR_RDMA_WRITE_WITH_IMM:
if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
return fits ?
IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
else
return fits ?
IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
IB_OPCODE_RC_RDMA_WRITE_FIRST;
case IB_WR_SEND:
if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
return fits ?
IB_OPCODE_RC_SEND_LAST :
IB_OPCODE_RC_SEND_MIDDLE;
else
return fits ?
IB_OPCODE_RC_SEND_ONLY :
IB_OPCODE_RC_SEND_FIRST;
case IB_WR_SEND_WITH_IMM:
if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
return fits ?
IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE :
IB_OPCODE_RC_SEND_MIDDLE;
else
return fits ?
IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE :
IB_OPCODE_RC_SEND_FIRST;
case IB_WR_RDMA_READ:
return IB_OPCODE_RC_RDMA_READ_REQUEST;
case IB_WR_ATOMIC_CMP_AND_SWP:
return IB_OPCODE_RC_COMPARE_SWAP;
case IB_WR_ATOMIC_FETCH_AND_ADD:
return IB_OPCODE_RC_FETCH_ADD;
case IB_WR_SEND_WITH_INV:
if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
return fits ? IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE :
IB_OPCODE_RC_SEND_MIDDLE;
else
return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :
IB_OPCODE_RC_SEND_FIRST;
case IB_WR_REG_MR:
case IB_WR_LOCAL_INV:
return opcode;
}
return -EINVAL;
}
static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits)
{
switch (opcode) {
case IB_WR_RDMA_WRITE:
if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
return fits ?
IB_OPCODE_UC_RDMA_WRITE_LAST :
IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
else
return fits ?
IB_OPCODE_UC_RDMA_WRITE_ONLY :
IB_OPCODE_UC_RDMA_WRITE_FIRST;
case IB_WR_RDMA_WRITE_WITH_IMM:
if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
return fits ?
IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
else
return fits ?
IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
IB_OPCODE_UC_RDMA_WRITE_FIRST;
case IB_WR_SEND:
if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
return fits ?
IB_OPCODE_UC_SEND_LAST :
IB_OPCODE_UC_SEND_MIDDLE;
else
return fits ?
IB_OPCODE_UC_SEND_ONLY :
IB_OPCODE_UC_SEND_FIRST;
case IB_WR_SEND_WITH_IMM:
if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
return fits ?
IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE :
IB_OPCODE_UC_SEND_MIDDLE;
else
return fits ?
IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE :
IB_OPCODE_UC_SEND_FIRST;
}
return -EINVAL;
}
static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
unsigned opcode)
{
int fits = (wqe->dma.resid <= qp->mtu);
switch (qp_type(qp)) {
case IB_QPT_RC:
return next_opcode_rc(qp, opcode, fits);
case IB_QPT_UC:
return next_opcode_uc(qp, opcode, fits);
case IB_QPT_SMI:
case IB_QPT_UD:
case IB_QPT_GSI:
switch (opcode) {
case IB_WR_SEND:
return IB_OPCODE_UD_SEND_ONLY;
case IB_WR_SEND_WITH_IMM:
return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
}
break;
default:
break;
}
return -EINVAL;
}
static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
int depth;
if (wqe->has_rd_atomic)
return 0;
qp->req.need_rd_atomic = 1;
depth = atomic_dec_return(&qp->req.rd_atomic);
if (depth >= 0) {
qp->req.need_rd_atomic = 0;
wqe->has_rd_atomic = 1;
return 0;
}
atomic_inc(&qp->req.rd_atomic);
return -EAGAIN;
}
static inline int get_mtu(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_port *port;
struct rxe_av *av;
if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC))
return qp->mtu;
av = &wqe->av;
port = &rxe->port;
return port->mtu_cap;
}
static struct sk_buff *init_req_packet(struct rxe_qp *qp,
struct rxe_send_wqe *wqe,
int opcode, int payload,
struct rxe_pkt_info *pkt)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_port *port = &rxe->port;
struct sk_buff *skb;
struct rxe_send_wr *ibwr = &wqe->wr;
struct rxe_av *av;
int pad = (-payload) & 0x3;
int paylen;
int solicited;
u16 pkey;
u32 qp_num;
int ack_req;
/* length from start of bth to end of icrc */
paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
/* pkt->hdr, rxe, port_num and mask are initialized in ifc
* layer
*/
pkt->opcode = opcode;
pkt->qp = qp;
pkt->psn = qp->req.psn;
pkt->mask = rxe_opcode[opcode].mask;
pkt->paylen = paylen;
pkt->offset = 0;
pkt->wqe = wqe;
/* init skb */
av = rxe_get_av(pkt);
skb = rxe->ifc_ops->init_packet(rxe, av, paylen, pkt);
if (unlikely(!skb))
return NULL;
/* init bth */
solicited = (ibwr->send_flags & IB_SEND_SOLICITED) &&
(pkt->mask & RXE_END_MASK) &&
((pkt->mask & (RXE_SEND_MASK)) ||
(pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==
(RXE_WRITE_MASK | RXE_IMMDT_MASK));
pkey = (qp_type(qp) == IB_QPT_GSI) ?
port->pkey_tbl[ibwr->wr.ud.pkey_index] :
port->pkey_tbl[qp->attr.pkey_index];
qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
qp->attr.dest_qp_num;
ack_req = ((pkt->mask & RXE_END_MASK) ||
(qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK));
if (ack_req)
qp->req.noack_pkts = 0;
bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num,
ack_req, pkt->psn);
/* init optional headers */
if (pkt->mask & RXE_RETH_MASK) {
reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
reth_set_va(pkt, wqe->iova);
reth_set_len(pkt, wqe->dma.length);
}
if (pkt->mask & RXE_IMMDT_MASK)
immdt_set_imm(pkt, ibwr->ex.imm_data);
if (pkt->mask & RXE_IETH_MASK)
ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey);
if (pkt->mask & RXE_ATMETH_MASK) {
atmeth_set_va(pkt, wqe->iova);
if (opcode == IB_OPCODE_RC_COMPARE_SWAP ||
opcode == IB_OPCODE_RD_COMPARE_SWAP) {
atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);
atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);
} else {
atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add);
}
atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey);
}
if (pkt->mask & RXE_DETH_MASK) {
if (qp->ibqp.qp_num == 1)
deth_set_qkey(pkt, GSI_QKEY);
else
deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey);
deth_set_sqp(pkt, qp->ibqp.qp_num);
}
return skb;
}
static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_pkt_info *pkt, struct sk_buff *skb,
int paylen)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
u32 crc = 0;
u32 *p;
int err;
err = rxe->ifc_ops->prepare(rxe, pkt, skb, &crc);
if (err)
return err;
if (pkt->mask & RXE_WRITE_OR_SEND) {
if (wqe->wr.send_flags & IB_SEND_INLINE) {
u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
crc = crc32_le(crc, tmp, paylen);
memcpy(payload_addr(pkt), tmp, paylen);
wqe->dma.resid -= paylen;
wqe->dma.sge_offset += paylen;
} else {
err = copy_data(rxe, qp->pd, 0, &wqe->dma,
payload_addr(pkt), paylen,
from_mem_obj,
&crc);
if (err)
return err;
}
}
p = payload_addr(pkt) + paylen + bth_pad(pkt);
*p = ~crc;
return 0;
}
static void update_wqe_state(struct rxe_qp *qp,
struct rxe_send_wqe *wqe,
struct rxe_pkt_info *pkt,
enum wqe_state *prev_state)
{
enum wqe_state prev_state_ = wqe->state;
if (pkt->mask & RXE_END_MASK) {
if (qp_type(qp) == IB_QPT_RC)
wqe->state = wqe_state_pending;
} else {
wqe->state = wqe_state_processing;
}
*prev_state = prev_state_;
}
static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_pkt_info *pkt, int payload)
{
/* number of packets left to send including current one */
int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
/* handle zero length packet case */
if (num_pkt == 0)
num_pkt = 1;
if (pkt->mask & RXE_START_MASK) {
wqe->first_psn = qp->req.psn;
wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK;
}
if (pkt->mask & RXE_READ_MASK)
qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK;
else
qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
qp->req.opcode = pkt->opcode;
if (pkt->mask & RXE_END_MASK)
qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
qp->need_req_skb = 0;
if (qp->qp_timeout_jiffies && !timer_pending(&qp->retrans_timer))
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
}
int rxe_requester(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
struct rxe_pkt_info pkt;
struct sk_buff *skb;
struct rxe_send_wqe *wqe;
unsigned mask;
int payload;
int mtu;
int opcode;
int ret;
enum wqe_state prev_state;
next_wqe:
if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
goto exit;
if (unlikely(qp->req.state == QP_STATE_RESET)) {
qp->req.wqe_index = consumer_index(qp->sq.queue);
qp->req.opcode = -1;
qp->req.need_rd_atomic = 0;
qp->req.wait_psn = 0;
qp->req.need_retry = 0;
goto exit;
}
if (unlikely(qp->req.need_retry)) {
req_retry(qp);
qp->req.need_retry = 0;
}
wqe = req_next_wqe(qp);
if (unlikely(!wqe))
goto exit;
if (wqe->mask & WR_REG_MASK) {
if (wqe->wr.opcode == IB_WR_LOCAL_INV) {
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_mem *rmr;
rmr = rxe_pool_get_index(&rxe->mr_pool,
wqe->wr.ex.invalidate_rkey >> 8);
if (!rmr) {
pr_err("No mr for key %#x\n", wqe->wr.ex.invalidate_rkey);
wqe->state = wqe_state_error;
wqe->status = IB_WC_MW_BIND_ERR;
goto exit;
}
rmr->state = RXE_MEM_STATE_FREE;
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
} else if (wqe->wr.opcode == IB_WR_REG_MR) {
struct rxe_mem *rmr = to_rmr(wqe->wr.wr.reg.mr);
rmr->state = RXE_MEM_STATE_VALID;
rmr->access = wqe->wr.wr.reg.access;
rmr->lkey = wqe->wr.wr.reg.key;
rmr->rkey = wqe->wr.wr.reg.key;
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
} else {
goto exit;
}
qp->req.wqe_index = next_index(qp->sq.queue,
qp->req.wqe_index);
goto next_wqe;
}
if (unlikely(qp_type(qp) == IB_QPT_RC &&
qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) {
qp->req.wait_psn = 1;
goto exit;
}
/* Limit the number of inflight SKBs per QP */
if (unlikely(atomic_read(&qp->skb_out) >
RXE_INFLIGHT_SKBS_PER_QP_HIGH)) {
qp->need_req_skb = 1;
goto exit;
}
opcode = next_opcode(qp, wqe, wqe->wr.opcode);
if (unlikely(opcode < 0)) {
wqe->status = IB_WC_LOC_QP_OP_ERR;
goto exit;
}
mask = rxe_opcode[opcode].mask;
if (unlikely(mask & RXE_READ_OR_ATOMIC)) {
if (check_init_depth(qp, wqe))
goto exit;
}
mtu = get_mtu(qp, wqe);
payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0;
if (payload > mtu) {
if (qp_type(qp) == IB_QPT_UD) {
/* C10-93.1.1: If the total sum of all the buffer lengths specified for a
* UD message exceeds the MTU of the port as returned by QueryHCA, the CI
* shall not emit any packets for this message. Further, the CI shall not
* generate an error due to this condition.
*/
/* fake a successful UD send */
wqe->first_psn = qp->req.psn;
wqe->last_psn = qp->req.psn;
qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
qp->req.opcode = IB_OPCODE_UD_SEND_ONLY;
qp->req.wqe_index = next_index(qp->sq.queue,
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
goto complete;
}
payload = mtu;
}
skb = init_req_packet(qp, wqe, opcode, payload, &pkt);
if (unlikely(!skb)) {
pr_err("Failed allocating skb\n");
goto err;
}
if (fill_packet(qp, wqe, &pkt, skb, payload)) {
pr_debug("Error during fill packet\n");
goto err;
}
update_wqe_state(qp, wqe, &pkt, &prev_state);
ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
if (ret) {
qp->need_req_skb = 1;
kfree_skb(skb);
wqe->state = prev_state;
if (ret == -EAGAIN) {
rxe_run_task(&qp->req.task, 1);
goto exit;
}
goto err;
}
update_state(qp, wqe, &pkt, payload);
goto next_wqe;
err:
kfree_skb(skb);
wqe->status = IB_WC_LOC_PROT_ERR;
wqe->state = wqe_state_error;
complete:
if (qp_type(qp) != IB_QPT_RC) {
while (rxe_completer(qp) == 0)
;
}
return 0;
exit:
return -EAGAIN;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,193 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
{
if (srq && srq->error) {
pr_warn("srq in error state\n");
goto err1;
}
if (mask & IB_SRQ_MAX_WR) {
if (attr->max_wr > rxe->attr.max_srq_wr) {
pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr <= 0) {
pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
if (srq && srq->limit && (attr->max_wr < srq->limit)) {
pr_warn("max_wr (%d) < srq->limit (%d)\n",
attr->max_wr, srq->limit);
goto err1;
}
if (attr->max_wr < RXE_MIN_SRQ_WR)
attr->max_wr = RXE_MIN_SRQ_WR;
}
if (mask & IB_SRQ_LIMIT) {
if (attr->srq_limit > rxe->attr.max_srq_wr) {
pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
attr->srq_limit, rxe->attr.max_srq_wr);
goto err1;
}
if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) {
pr_warn("srq_limit (%d) > cur limit(%d)\n",
attr->srq_limit,
srq->rq.queue->buf->index_mask);
goto err1;
}
}
if (mask == IB_SRQ_INIT_MASK) {
if (attr->max_sge > rxe->attr.max_srq_sge) {
pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
attr->max_sge, rxe->attr.max_srq_sge);
goto err1;
}
if (attr->max_sge < RXE_MIN_SRQ_SGE)
attr->max_sge = RXE_MIN_SRQ_SGE;
}
return 0;
err1:
return -EINVAL;
}
int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_init_attr *init,
struct ib_ucontext *context, struct ib_udata *udata)
{
int err;
int srq_wqe_size;
struct rxe_queue *q;
srq->ibsrq.event_handler = init->event_handler;
srq->ibsrq.srq_context = init->srq_context;
srq->limit = init->attr.srq_limit;
srq->srq_num = srq->pelem.index;
srq->rq.max_wr = init->attr.max_wr;
srq->rq.max_sge = init->attr.max_sge;
srq_wqe_size = rcv_wqe_size(srq->rq.max_sge);
spin_lock_init(&srq->rq.producer_lock);
spin_lock_init(&srq->rq.consumer_lock);
q = rxe_queue_init(rxe, &srq->rq.max_wr,
srq_wqe_size);
if (!q) {
pr_warn("unable to allocate queue for srq\n");
return -ENOMEM;
}
srq->rq.queue = q;
err = do_mmap_info(rxe, udata, false, context, q->buf,
q->buf_size, &q->ip);
if (err)
return err;
if (udata && udata->outlen >= sizeof(struct mminfo) + sizeof(u32)) {
if (copy_to_user(udata->outbuf + sizeof(struct mminfo),
&srq->srq_num, sizeof(u32)))
return -EFAULT;
}
return 0;
}
int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
struct ib_udata *udata)
{
int err;
struct rxe_queue *q = srq->rq.queue;
struct mminfo mi = { .offset = 1, .size = 0};
if (mask & IB_SRQ_MAX_WR) {
/* Check that we can write the mminfo struct to user space */
if (udata && udata->inlen >= sizeof(__u64)) {
__u64 mi_addr;
/* Get address of user space mminfo struct */
err = ib_copy_from_udata(&mi_addr, udata,
sizeof(mi_addr));
if (err)
goto err1;
udata->outbuf = (void __user *)(unsigned long)mi_addr;
udata->outlen = sizeof(mi);
if (!access_ok(VERIFY_WRITE,
(void __user *)udata->outbuf,
udata->outlen)) {
err = -EFAULT;
goto err1;
}
}
err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr,
rcv_wqe_size(srq->rq.max_sge),
srq->rq.queue->ip ?
srq->rq.queue->ip->context :
NULL,
udata, &srq->rq.producer_lock,
&srq->rq.consumer_lock);
if (err)
goto err2;
}
if (mask & IB_SRQ_LIMIT)
srq->limit = attr->srq_limit;
return 0;
err2:
rxe_queue_cleanup(q);
srq->rq.queue = NULL;
err1:
return err;
}

View file

@ -0,0 +1,157 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "rxe.h"
#include "rxe_net.h"
/* Copy argument and remove trailing CR. Return the new length. */
static int sanitize_arg(const char *val, char *intf, int intf_len)
{
int len;
if (!val)
return 0;
/* Remove newline. */
for (len = 0; len < intf_len - 1 && val[len] && val[len] != '\n'; len++)
intf[len] = val[len];
intf[len] = 0;
if (len == 0 || (val[len] != 0 && val[len] != '\n'))
return 0;
return len;
}
static void rxe_set_port_state(struct net_device *ndev)
{
struct rxe_dev *rxe = net_to_rxe(ndev);
bool is_up = netif_running(ndev) && netif_carrier_ok(ndev);
if (!rxe)
goto out;
if (is_up)
rxe_port_up(rxe);
else
rxe_port_down(rxe); /* down for unknown state */
out:
return;
}
static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
{
int len;
int err = 0;
char intf[32];
struct net_device *ndev = NULL;
struct rxe_dev *rxe;
len = sanitize_arg(val, intf, sizeof(intf));
if (!len) {
pr_err("rxe: add: invalid interface name\n");
err = -EINVAL;
goto err;
}
ndev = dev_get_by_name(&init_net, intf);
if (!ndev) {
pr_err("interface %s not found\n", intf);
err = -EINVAL;
goto err;
}
if (net_to_rxe(ndev)) {
pr_err("rxe: already configured on %s\n", intf);
err = -EINVAL;
goto err;
}
rxe = rxe_net_add(ndev);
if (!rxe) {
pr_err("rxe: failed to add %s\n", intf);
err = -EINVAL;
goto err;
}
rxe_set_port_state(ndev);
pr_info("rxe: added %s to %s\n", rxe->ib_dev.name, intf);
err:
if (ndev)
dev_put(ndev);
return err;
}
static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)
{
int len;
char intf[32];
struct rxe_dev *rxe;
len = sanitize_arg(val, intf, sizeof(intf));
if (!len) {
pr_err("rxe: add: invalid interface name\n");
return -EINVAL;
}
if (strncmp("all", intf, len) == 0) {
pr_info("rxe_sys: remove all");
rxe_remove_all();
return 0;
}
rxe = get_rxe_by_name(intf);
if (!rxe) {
pr_err("rxe: not configured on %s\n", intf);
return -EINVAL;
}
list_del(&rxe->list);
rxe_remove(rxe);
return 0;
}
static const struct kernel_param_ops rxe_add_ops = {
.set = rxe_param_set_add,
};
static const struct kernel_param_ops rxe_remove_ops = {
.set = rxe_param_set_remove,
};
module_param_cb(add, &rxe_add_ops, NULL, 0200);
MODULE_PARM_DESC(add, "Create RXE device over network interface");
module_param_cb(remove, &rxe_remove_ops, NULL, 0200);
MODULE_PARM_DESC(remove, "Remove RXE device over network interface");

View file

@ -0,0 +1,154 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/hardirq.h>
#include "rxe_task.h"
int __rxe_do_task(struct rxe_task *task)
{
int ret;
while ((ret = task->func(task->arg)) == 0)
;
task->ret = ret;
return ret;
}
/*
* this locking is due to a potential race where
* a second caller finds the task already running
* but looks just after the last call to func
*/
void rxe_do_task(unsigned long data)
{
int cont;
int ret;
unsigned long flags;
struct rxe_task *task = (struct rxe_task *)data;
spin_lock_irqsave(&task->state_lock, flags);
switch (task->state) {
case TASK_STATE_START:
task->state = TASK_STATE_BUSY;
spin_unlock_irqrestore(&task->state_lock, flags);
break;
case TASK_STATE_BUSY:
task->state = TASK_STATE_ARMED;
/* fall through to */
case TASK_STATE_ARMED:
spin_unlock_irqrestore(&task->state_lock, flags);
return;
default:
spin_unlock_irqrestore(&task->state_lock, flags);
pr_warn("bad state = %d in rxe_do_task\n", task->state);
return;
}
do {
cont = 0;
ret = task->func(task->arg);
spin_lock_irqsave(&task->state_lock, flags);
switch (task->state) {
case TASK_STATE_BUSY:
if (ret)
task->state = TASK_STATE_START;
else
cont = 1;
break;
/* soneone tried to run the task since the last time we called
* func, so we will call one more time regardless of the
* return value
*/
case TASK_STATE_ARMED:
task->state = TASK_STATE_BUSY;
cont = 1;
break;
default:
pr_warn("bad state = %d in rxe_do_task\n",
task->state);
}
spin_unlock_irqrestore(&task->state_lock, flags);
} while (cont);
task->ret = ret;
}
int rxe_init_task(void *obj, struct rxe_task *task,
void *arg, int (*func)(void *), char *name)
{
task->obj = obj;
task->arg = arg;
task->func = func;
snprintf(task->name, sizeof(task->name), "%s", name);
tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task);
task->state = TASK_STATE_START;
spin_lock_init(&task->state_lock);
return 0;
}
void rxe_cleanup_task(struct rxe_task *task)
{
tasklet_kill(&task->tasklet);
}
void rxe_run_task(struct rxe_task *task, int sched)
{
if (sched)
tasklet_schedule(&task->tasklet);
else
rxe_do_task((unsigned long)task);
}
void rxe_disable_task(struct rxe_task *task)
{
tasklet_disable(&task->tasklet);
}
void rxe_enable_task(struct rxe_task *task)
{
tasklet_enable(&task->tasklet);
}

View file

@ -0,0 +1,95 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef RXE_TASK_H
#define RXE_TASK_H
enum {
TASK_STATE_START = 0,
TASK_STATE_BUSY = 1,
TASK_STATE_ARMED = 2,
};
/*
* data structure to describe a 'task' which is a short
* function that returns 0 as long as it needs to be
* called again.
*/
struct rxe_task {
void *obj;
struct tasklet_struct tasklet;
int state;
spinlock_t state_lock; /* spinlock for task state */
void *arg;
int (*func)(void *arg);
int ret;
char name[16];
};
/*
* init rxe_task structure
* arg => parameter to pass to fcn
* fcn => function to call until it returns != 0
*/
int rxe_init_task(void *obj, struct rxe_task *task,
void *arg, int (*func)(void *), char *name);
/* cleanup task */
void rxe_cleanup_task(struct rxe_task *task);
/*
* raw call to func in loop without any checking
* can call when tasklets are disabled
*/
int __rxe_do_task(struct rxe_task *task);
/*
* common function called by any of the main tasklets
* If there is any chance that there is additional
* work to do someone must reschedule the task before
* leaving
*/
void rxe_do_task(unsigned long data);
/* run a task, else schedule it to run as a tasklet, The decision
* to run or schedule tasklet is based on the parameter sched.
*/
void rxe_run_task(struct rxe_task *task, int sched);
/* keep a task from scheduling */
void rxe_disable_task(struct rxe_task *task);
/* allow task to run */
void rxe_enable_task(struct rxe_task *task);
#endif /* RXE_TASK_H */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,480 @@
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef RXE_VERBS_H
#define RXE_VERBS_H
#include <linux/interrupt.h>
#include <rdma/rdma_user_rxe.h>
#include "rxe_pool.h"
#include "rxe_task.h"
static inline int pkey_match(u16 key1, u16 key2)
{
return (((key1 & 0x7fff) != 0) &&
((key1 & 0x7fff) == (key2 & 0x7fff)) &&
((key1 & 0x8000) || (key2 & 0x8000))) ? 1 : 0;
}
/* Return >0 if psn_a > psn_b
* 0 if psn_a == psn_b
* <0 if psn_a < psn_b
*/
static inline int psn_compare(u32 psn_a, u32 psn_b)
{
s32 diff;
diff = (psn_a - psn_b) << 8;
return diff;
}
struct rxe_ucontext {
struct rxe_pool_entry pelem;
struct ib_ucontext ibuc;
};
struct rxe_pd {
struct rxe_pool_entry pelem;
struct ib_pd ibpd;
};
struct rxe_ah {
struct rxe_pool_entry pelem;
struct ib_ah ibah;
struct rxe_pd *pd;
struct rxe_av av;
};
struct rxe_cqe {
union {
struct ib_wc ibwc;
struct ib_uverbs_wc uibwc;
};
};
struct rxe_cq {
struct rxe_pool_entry pelem;
struct ib_cq ibcq;
struct rxe_queue *queue;
spinlock_t cq_lock;
u8 notify;
int is_user;
struct tasklet_struct comp_task;
};
enum wqe_state {
wqe_state_posted,
wqe_state_processing,
wqe_state_pending,
wqe_state_done,
wqe_state_error,
};
struct rxe_sq {
int max_wr;
int max_sge;
int max_inline;
spinlock_t sq_lock; /* guard queue */
struct rxe_queue *queue;
};
struct rxe_rq {
int max_wr;
int max_sge;
spinlock_t producer_lock; /* guard queue producer */
spinlock_t consumer_lock; /* guard queue consumer */
struct rxe_queue *queue;
};
struct rxe_srq {
struct rxe_pool_entry pelem;
struct ib_srq ibsrq;
struct rxe_pd *pd;
struct rxe_rq rq;
u32 srq_num;
int limit;
int error;
};
enum rxe_qp_state {
QP_STATE_RESET,
QP_STATE_INIT,
QP_STATE_READY,
QP_STATE_DRAIN, /* req only */
QP_STATE_DRAINED, /* req only */
QP_STATE_ERROR
};
extern char *rxe_qp_state_name[];
struct rxe_req_info {
enum rxe_qp_state state;
int wqe_index;
u32 psn;
int opcode;
atomic_t rd_atomic;
int wait_fence;
int need_rd_atomic;
int wait_psn;
int need_retry;
int noack_pkts;
struct rxe_task task;
};
struct rxe_comp_info {
u32 psn;
int opcode;
int timeout;
int timeout_retry;
u32 retry_cnt;
u32 rnr_retry;
struct rxe_task task;
};
enum rdatm_res_state {
rdatm_res_state_next,
rdatm_res_state_new,
rdatm_res_state_replay,
};
struct resp_res {
int type;
u32 first_psn;
u32 last_psn;
u32 cur_psn;
enum rdatm_res_state state;
union {
struct {
struct sk_buff *skb;
} atomic;
struct {
struct rxe_mem *mr;
u64 va_org;
u32 rkey;
u32 length;
u64 va;
u32 resid;
} read;
};
};
struct rxe_resp_info {
enum rxe_qp_state state;
u32 msn;
u32 psn;
int opcode;
int drop_msg;
int goto_error;
int sent_psn_nak;
enum ib_wc_status status;
u8 aeth_syndrome;
/* Receive only */
struct rxe_recv_wqe *wqe;
/* RDMA read / atomic only */
u64 va;
struct rxe_mem *mr;
u32 resid;
u32 rkey;
u64 atomic_orig;
/* SRQ only */
struct {
struct rxe_recv_wqe wqe;
struct ib_sge sge[RXE_MAX_SGE];
} srq_wqe;
/* Responder resources. It's a circular list where the oldest
* resource is dropped first.
*/
struct resp_res *resources;
unsigned int res_head;
unsigned int res_tail;
struct resp_res *res;
struct rxe_task task;
};
struct rxe_qp {
struct rxe_pool_entry pelem;
struct ib_qp ibqp;
struct ib_qp_attr attr;
unsigned int valid;
unsigned int mtu;
int is_user;
struct rxe_pd *pd;
struct rxe_srq *srq;
struct rxe_cq *scq;
struct rxe_cq *rcq;
enum ib_sig_type sq_sig_type;
struct rxe_sq sq;
struct rxe_rq rq;
struct socket *sk;
struct rxe_av pri_av;
struct rxe_av alt_av;
/* list of mcast groups qp has joined (for cleanup) */
struct list_head grp_list;
spinlock_t grp_lock; /* guard grp_list */
struct sk_buff_head req_pkts;
struct sk_buff_head resp_pkts;
struct sk_buff_head send_pkts;
struct rxe_req_info req;
struct rxe_comp_info comp;
struct rxe_resp_info resp;
atomic_t ssn;
atomic_t skb_out;
int need_req_skb;
/* Timer for retranmitting packet when ACKs have been lost. RC
* only. The requester sets it when it is not already
* started. The responder resets it whenever an ack is
* received.
*/
struct timer_list retrans_timer;
u64 qp_timeout_jiffies;
/* Timer for handling RNR NAKS. */
struct timer_list rnr_nak_timer;
spinlock_t state_lock; /* guard requester and completer */
};
enum rxe_mem_state {
RXE_MEM_STATE_ZOMBIE,
RXE_MEM_STATE_INVALID,
RXE_MEM_STATE_FREE,
RXE_MEM_STATE_VALID,
};
enum rxe_mem_type {
RXE_MEM_TYPE_NONE,
RXE_MEM_TYPE_DMA,
RXE_MEM_TYPE_MR,
RXE_MEM_TYPE_FMR,
RXE_MEM_TYPE_MW,
};
#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf))
struct rxe_phys_buf {
u64 addr;
u64 size;
};
struct rxe_map {
struct rxe_phys_buf buf[RXE_BUF_PER_MAP];
};
struct rxe_mem {
struct rxe_pool_entry pelem;
union {
struct ib_mr ibmr;
struct ib_mw ibmw;
};
struct rxe_pd *pd;
struct ib_umem *umem;
u32 lkey;
u32 rkey;
enum rxe_mem_state state;
enum rxe_mem_type type;
u64 va;
u64 iova;
size_t length;
u32 offset;
int access;
int page_shift;
int page_mask;
int map_shift;
int map_mask;
u32 num_buf;
u32 nbuf;
u32 max_buf;
u32 num_map;
struct rxe_map **map;
};
struct rxe_mc_grp {
struct rxe_pool_entry pelem;
spinlock_t mcg_lock; /* guard group */
struct rxe_dev *rxe;
struct list_head qp_list;
union ib_gid mgid;
int num_qp;
u32 qkey;
u16 pkey;
};
struct rxe_mc_elem {
struct rxe_pool_entry pelem;
struct list_head qp_list;
struct list_head grp_list;
struct rxe_qp *qp;
struct rxe_mc_grp *grp;
};
struct rxe_port {
struct ib_port_attr attr;
u16 *pkey_tbl;
__be64 port_guid;
__be64 subnet_prefix;
spinlock_t port_lock; /* guard port */
unsigned int mtu_cap;
/* special QPs */
u32 qp_smi_index;
u32 qp_gsi_index;
};
/* callbacks from rdma_rxe to network interface layer */
struct rxe_ifc_ops {
void (*release)(struct rxe_dev *rxe);
__be64 (*node_guid)(struct rxe_dev *rxe);
__be64 (*port_guid)(struct rxe_dev *rxe);
struct device *(*dma_device)(struct rxe_dev *rxe);
int (*mcast_add)(struct rxe_dev *rxe, union ib_gid *mgid);
int (*mcast_delete)(struct rxe_dev *rxe, union ib_gid *mgid);
int (*prepare)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb, u32 *crc);
int (*send)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb);
int (*loopback)(struct sk_buff *skb);
struct sk_buff *(*init_packet)(struct rxe_dev *rxe, struct rxe_av *av,
int paylen, struct rxe_pkt_info *pkt);
char *(*parent_name)(struct rxe_dev *rxe, unsigned int port_num);
enum rdma_link_layer (*link_layer)(struct rxe_dev *rxe,
unsigned int port_num);
};
struct rxe_dev {
struct ib_device ib_dev;
struct ib_device_attr attr;
int max_ucontext;
int max_inline_data;
struct kref ref_cnt;
struct mutex usdev_lock;
struct rxe_ifc_ops *ifc_ops;
struct net_device *ndev;
int xmit_errors;
struct rxe_pool uc_pool;
struct rxe_pool pd_pool;
struct rxe_pool ah_pool;
struct rxe_pool srq_pool;
struct rxe_pool qp_pool;
struct rxe_pool cq_pool;
struct rxe_pool mr_pool;
struct rxe_pool mw_pool;
struct rxe_pool mc_grp_pool;
struct rxe_pool mc_elem_pool;
spinlock_t pending_lock; /* guard pending_mmaps */
struct list_head pending_mmaps;
spinlock_t mmap_offset_lock; /* guard mmap_offset */
int mmap_offset;
struct rxe_port port;
struct list_head list;
};
static inline struct rxe_dev *to_rdev(struct ib_device *dev)
{
return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;
}
static inline struct rxe_ucontext *to_ruc(struct ib_ucontext *uc)
{
return uc ? container_of(uc, struct rxe_ucontext, ibuc) : NULL;
}
static inline struct rxe_pd *to_rpd(struct ib_pd *pd)
{
return pd ? container_of(pd, struct rxe_pd, ibpd) : NULL;
}
static inline struct rxe_ah *to_rah(struct ib_ah *ah)
{
return ah ? container_of(ah, struct rxe_ah, ibah) : NULL;
}
static inline struct rxe_srq *to_rsrq(struct ib_srq *srq)
{
return srq ? container_of(srq, struct rxe_srq, ibsrq) : NULL;
}
static inline struct rxe_qp *to_rqp(struct ib_qp *qp)
{
return qp ? container_of(qp, struct rxe_qp, ibqp) : NULL;
}
static inline struct rxe_cq *to_rcq(struct ib_cq *cq)
{
return cq ? container_of(cq, struct rxe_cq, ibcq) : NULL;
}
static inline struct rxe_mem *to_rmr(struct ib_mr *mr)
{
return mr ? container_of(mr, struct rxe_mem, ibmr) : NULL;
}
static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
{
return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL;
}
int rxe_register_device(struct rxe_dev *rxe);
int rxe_unregister_device(struct rxe_dev *rxe);
void rxe_mc_cleanup(void *arg);
#endif /* RXE_VERBS_H */

View file

@ -62,10 +62,8 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
{
struct ipoib_dev_priv *priv = netdev_priv(netdev);
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32),
(int)(priv->ca->attrs.fw_ver >> 16) & 0xffff,
(int)priv->ca->attrs.fw_ver & 0xffff);
ib_get_device_fw_str(priv->ca, drvinfo->fw_version,
sizeof(drvinfo->fw_version));
strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
sizeof(drvinfo->bus_info));

View file

@ -1967,8 +1967,7 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
priv->hca_caps = hca->attrs.device_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
priv->dev->hw_features = NETIF_F_SG |
NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
priv->dev->hw_features |= NETIF_F_TSO;

View file

@ -135,7 +135,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
.cap = {
.max_send_wr = ipoib_sendq_size,
.max_recv_wr = ipoib_recvq_size,
.max_send_sge = 1,
.max_send_sge = min_t(u32, priv->ca->attrs.max_sge,
MAX_SKB_FRAGS + 1),
.max_recv_sge = IPOIB_UD_RX_SG
},
.sq_sig_type = IB_SIGNAL_ALL_WR,
@ -205,10 +206,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING)
init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
if (dev->features & NETIF_F_SG)
init_attr.cap.max_send_sge =
min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) {
printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
@ -234,6 +231,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->rx_wr.next = NULL;
priv->rx_wr.sg_list = priv->rx_sge;
if (init_attr.cap.max_send_sge > 1)
dev->features |= NETIF_F_SG;
priv->max_send_sge = init_attr.cap.max_send_sge;
return 0;

View file

@ -104,6 +104,8 @@ enum {
enum CPL_error {
CPL_ERR_NONE = 0,
CPL_ERR_TCAM_PARITY = 1,
CPL_ERR_TCAM_MISS = 2,
CPL_ERR_TCAM_FULL = 3,
CPL_ERR_BAD_LENGTH = 15,
CPL_ERR_BAD_ROUTE = 18,

View file

@ -721,6 +721,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98
#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
#define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c
#define QUERY_DEV_CAP_DIAG_RPRT_PER_PORT 0x9c
#define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d
#define QUERY_DEV_CAP_VXLAN 0x9e
#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0
@ -935,6 +936,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP;
if (field32 & (1 << 7))
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT;
MLX4_GET(field32, outbox, QUERY_DEV_CAP_DIAG_RPRT_PER_PORT);
if (field32 & (1 << 17))
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT;
MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
if (field & 1<<6)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN;
@ -2457,6 +2461,42 @@ int mlx4_NOP(struct mlx4_dev *dev)
MLX4_CMD_NATIVE);
}
int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier,
const u32 offset[],
u32 value[], size_t array_len, u8 port)
{
struct mlx4_cmd_mailbox *mailbox;
u32 *outbox;
size_t i;
int ret;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
outbox = mailbox->buf;
ret = mlx4_cmd_box(dev, 0, mailbox->dma, port, op_modifier,
MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_NATIVE);
if (ret)
goto out;
for (i = 0; i < array_len; i++) {
if (offset[i] > MLX4_MAILBOX_SIZE) {
ret = -EINVAL;
goto out;
}
MLX4_GET(value[i], outbox, offset[i]);
}
out:
mlx4_free_cmd_mailbox(dev, mailbox);
return ret;
}
EXPORT_SYMBOL(mlx4_query_diag_counters);
int mlx4_get_phys_port_id(struct mlx4_dev *dev)
{
u8 port;

View file

@ -63,12 +63,12 @@ void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
complete(&srq->free);
}
static int get_pas_size(void *srqc)
static int get_pas_size(struct mlx5_srq_attr *in)
{
u32 log_page_size = MLX5_GET(srqc, srqc, log_page_size) + 12;
u32 log_srq_size = MLX5_GET(srqc, srqc, log_srq_size);
u32 log_rq_stride = MLX5_GET(srqc, srqc, log_rq_stride);
u32 page_offset = MLX5_GET(srqc, srqc, page_offset);
u32 log_page_size = in->log_page_size + 12;
u32 log_srq_size = in->log_size;
u32 log_rq_stride = in->wqe_shift;
u32 page_offset = in->page_offset;
u32 po_quanta = 1 << (log_page_size - 6);
u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride);
u32 page_size = 1 << log_page_size;
@ -78,57 +78,58 @@ static int get_pas_size(void *srqc)
return rq_num_pas * sizeof(u64);
}
static void rmpc_srqc_reformat(void *srqc, void *rmpc, bool srqc_to_rmpc)
static void set_wq(void *wq, struct mlx5_srq_attr *in)
{
void *wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
MLX5_SET(wq, wq, wq_signature, !!(in->flags
& MLX5_SRQ_FLAG_WQ_SIG));
MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size);
MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4);
MLX5_SET(wq, wq, log_wq_sz, in->log_size);
MLX5_SET(wq, wq, page_offset, in->page_offset);
MLX5_SET(wq, wq, lwm, in->lwm);
MLX5_SET(wq, wq, pd, in->pd);
MLX5_SET64(wq, wq, dbr_addr, in->db_record);
}
if (srqc_to_rmpc) {
switch (MLX5_GET(srqc, srqc, state)) {
case MLX5_SRQC_STATE_GOOD:
MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
break;
case MLX5_SRQC_STATE_ERROR:
MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_ERR);
break;
default:
pr_warn("%s: %d: Unknown srq state = 0x%x\n", __func__,
__LINE__, MLX5_GET(srqc, srqc, state));
MLX5_SET(rmpc, rmpc, state, MLX5_GET(srqc, srqc, state));
}
static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
{
MLX5_SET(srqc, srqc, wq_signature, !!(in->flags
& MLX5_SRQ_FLAG_WQ_SIG));
MLX5_SET(srqc, srqc, log_page_size, in->log_page_size);
MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift);
MLX5_SET(srqc, srqc, log_srq_size, in->log_size);
MLX5_SET(srqc, srqc, page_offset, in->page_offset);
MLX5_SET(srqc, srqc, lwm, in->lwm);
MLX5_SET(srqc, srqc, pd, in->pd);
MLX5_SET64(srqc, srqc, dbr_addr, in->db_record);
MLX5_SET(srqc, srqc, xrcd, in->xrcd);
MLX5_SET(srqc, srqc, cqn, in->cqn);
}
MLX5_SET(wq, wq, wq_signature, MLX5_GET(srqc, srqc, wq_signature));
MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(srqc, srqc, log_page_size));
MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(srqc, srqc, log_rq_stride) + 4);
MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(srqc, srqc, log_srq_size));
MLX5_SET(wq, wq, page_offset, MLX5_GET(srqc, srqc, page_offset));
MLX5_SET(wq, wq, lwm, MLX5_GET(srqc, srqc, lwm));
MLX5_SET(wq, wq, pd, MLX5_GET(srqc, srqc, pd));
MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(srqc, srqc, dbr_addr));
} else {
switch (MLX5_GET(rmpc, rmpc, state)) {
case MLX5_RMPC_STATE_RDY:
MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_GOOD);
break;
case MLX5_RMPC_STATE_ERR:
MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_ERROR);
break;
default:
pr_warn("%s: %d: Unknown rmp state = 0x%x\n",
__func__, __LINE__,
MLX5_GET(rmpc, rmpc, state));
MLX5_SET(srqc, srqc, state,
MLX5_GET(rmpc, rmpc, state));
}
static void get_wq(void *wq, struct mlx5_srq_attr *in)
{
if (MLX5_GET(wq, wq, wq_signature))
in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz);
in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4;
in->log_size = MLX5_GET(wq, wq, log_wq_sz);
in->page_offset = MLX5_GET(wq, wq, page_offset);
in->lwm = MLX5_GET(wq, wq, lwm);
in->pd = MLX5_GET(wq, wq, pd);
in->db_record = MLX5_GET64(wq, wq, dbr_addr);
}
MLX5_SET(srqc, srqc, wq_signature, MLX5_GET(wq, wq, wq_signature));
MLX5_SET(srqc, srqc, log_page_size, MLX5_GET(wq, wq, log_wq_pg_sz));
MLX5_SET(srqc, srqc, log_rq_stride, MLX5_GET(wq, wq, log_wq_stride) - 4);
MLX5_SET(srqc, srqc, log_srq_size, MLX5_GET(wq, wq, log_wq_sz));
MLX5_SET(srqc, srqc, page_offset, MLX5_GET(wq, wq, page_offset));
MLX5_SET(srqc, srqc, lwm, MLX5_GET(wq, wq, lwm));
MLX5_SET(srqc, srqc, pd, MLX5_GET(wq, wq, pd));
MLX5_SET64(srqc, srqc, dbr_addr, MLX5_GET64(wq, wq, dbr_addr));
}
static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
{
if (MLX5_GET(srqc, srqc, wq_signature))
in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
in->log_page_size = MLX5_GET(srqc, srqc, log_page_size);
in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride);
in->log_size = MLX5_GET(srqc, srqc, log_srq_size);
in->page_offset = MLX5_GET(srqc, srqc, page_offset);
in->lwm = MLX5_GET(srqc, srqc, lwm);
in->pd = MLX5_GET(srqc, srqc, pd);
in->db_record = MLX5_GET64(srqc, srqc, dbr_addr);
}
struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
@ -149,19 +150,36 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
EXPORT_SYMBOL(mlx5_core_get_srq);
static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_create_srq_mbox_in *in, int inlen)
struct mlx5_srq_attr *in)
{
struct mlx5_create_srq_mbox_out out;
u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
void *create_in;
void *srqc;
void *pas;
int pas_size;
int inlen;
int err;
memset(&out, 0, sizeof(out));
pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
create_in = mlx5_vzalloc(inlen);
if (!create_in)
return -ENOMEM;
in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ);
srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
err = mlx5_cmd_exec_check_status(dev, (u32 *)in, inlen, (u32 *)(&out),
sizeof(out));
set_srqc(srqc, in);
memcpy(pas, in->pas, pas_size);
srq->srqn = be32_to_cpu(out.srqn) & 0xffffff;
MLX5_SET(create_srq_in, create_in, opcode,
MLX5_CMD_OP_CREATE_SRQ);
err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out,
sizeof(create_out));
kvfree(create_in);
if (!err)
srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
return err;
}
@ -169,67 +187,75 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
static int destroy_srq_cmd(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq)
{
struct mlx5_destroy_srq_mbox_in in;
struct mlx5_destroy_srq_mbox_out out;
u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
memset(&in, 0, sizeof(in));
memset(&out, 0, sizeof(out));
in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ);
in.srqn = cpu_to_be32(srq->srqn);
MLX5_SET(destroy_srq_in, srq_in, opcode,
MLX5_CMD_OP_DESTROY_SRQ);
MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in),
(u32 *)(&out), sizeof(out));
return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
srq_out, sizeof(srq_out));
}
static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
u16 lwm, int is_srq)
{
struct mlx5_arm_srq_mbox_in in;
struct mlx5_arm_srq_mbox_out out;
/* arm_srq structs missing using identical xrc ones */
u32 srq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
u32 srq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
memset(&in, 0, sizeof(in));
memset(&out, 0, sizeof(out));
MLX5_SET(arm_xrc_srq_in, srq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
MLX5_SET(arm_xrc_srq_in, srq_in, xrc_srqn, srq->srqn);
MLX5_SET(arm_xrc_srq_in, srq_in, lwm, lwm);
in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ);
in.hdr.opmod = cpu_to_be16(!!is_srq);
in.srqn = cpu_to_be32(srq->srqn);
in.lwm = cpu_to_be16(lwm);
return mlx5_cmd_exec_check_status(dev, (u32 *)(&in),
sizeof(in), (u32 *)(&out),
sizeof(out));
return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
srq_out, sizeof(srq_out));
}
static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_query_srq_mbox_out *out)
struct mlx5_srq_attr *out)
{
struct mlx5_query_srq_mbox_in in;
u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
u32 *srq_out;
void *srqc;
int err;
memset(&in, 0, sizeof(in));
srq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_srq_out));
if (!srq_out)
return -ENOMEM;
in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ);
in.srqn = cpu_to_be32(srq->srqn);
MLX5_SET(query_srq_in, srq_in, opcode,
MLX5_CMD_OP_QUERY_SRQ);
MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
err = mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
srq_out,
MLX5_ST_SZ_BYTES(query_srq_out));
if (err)
goto out;
return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in),
(u32 *)out, sizeof(*out));
srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
get_srqc(srqc, out);
if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
out->flags |= MLX5_SRQ_FLAG_ERR;
out:
kvfree(srq_out);
return err;
}
static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq,
struct mlx5_create_srq_mbox_in *in,
int srq_inlen)
struct mlx5_srq_attr *in)
{
u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
void *create_in;
void *srqc;
void *xrc_srqc;
void *pas;
int pas_size;
int inlen;
int err;
srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry);
pas_size = get_pas_size(srqc);
pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
create_in = mlx5_vzalloc(inlen);
if (!create_in)
@ -239,7 +265,8 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
xrc_srq_context_entry);
pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc));
set_srqc(xrc_srqc, in);
MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
memcpy(pas, in->pas, pas_size);
MLX5_SET(create_xrc_srq_in, create_in, opcode,
MLX5_CMD_OP_CREATE_XRC_SRQ);
@ -293,11 +320,10 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq,
struct mlx5_query_srq_mbox_out *out)
struct mlx5_srq_attr *out)
{
u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
u32 *xrcsrq_out;
void *srqc;
void *xrc_srqc;
int err;
@ -317,8 +343,9 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
xrc_srq_context_entry);
srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry);
memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc));
get_srqc(xrc_srqc, out);
if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
out->flags |= MLX5_SRQ_FLAG_ERR;
out:
kvfree(xrcsrq_out);
@ -326,26 +353,27 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
}
static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_create_srq_mbox_in *in, int srq_inlen)
struct mlx5_srq_attr *in)
{
void *create_in;
void *rmpc;
void *srqc;
void *wq;
int pas_size;
int inlen;
int err;
srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry);
pas_size = get_pas_size(srqc);
pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
create_in = mlx5_vzalloc(inlen);
if (!create_in)
return -ENOMEM;
rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
set_wq(wq, in);
memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
rmpc_srqc_reformat(srqc, rmpc, true);
err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
@ -390,11 +418,10 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev,
}
static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_query_srq_mbox_out *out)
struct mlx5_srq_attr *out)
{
u32 *rmp_out;
void *rmpc;
void *srqc;
int err;
rmp_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_rmp_out));
@ -405,9 +432,10 @@ static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
if (err)
goto out;
srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry);
rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
rmpc_srqc_reformat(srqc, rmpc, false);
get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
out->flags |= MLX5_SRQ_FLAG_ERR;
out:
kvfree(rmp_out);
@ -416,15 +444,14 @@ static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
static int create_srq_split(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq,
struct mlx5_create_srq_mbox_in *in,
int inlen, int is_xrc)
struct mlx5_srq_attr *in)
{
if (!dev->issi)
return create_srq_cmd(dev, srq, in, inlen);
return create_srq_cmd(dev, srq, in);
else if (srq->common.res == MLX5_RES_XSRQ)
return create_xrc_srq_cmd(dev, srq, in, inlen);
return create_xrc_srq_cmd(dev, srq, in);
else
return create_rmp_cmd(dev, srq, in, inlen);
return create_rmp_cmd(dev, srq, in);
}
static int destroy_srq_split(struct mlx5_core_dev *dev,
@ -439,15 +466,17 @@ static int destroy_srq_split(struct mlx5_core_dev *dev,
}
int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_create_srq_mbox_in *in, int inlen,
int is_xrc)
struct mlx5_srq_attr *in)
{
int err;
struct mlx5_srq_table *table = &dev->priv.srq_table;
srq->common.res = is_xrc ? MLX5_RES_XSRQ : MLX5_RES_SRQ;
if (in->type == IB_SRQT_XRC)
srq->common.res = MLX5_RES_XSRQ;
else
srq->common.res = MLX5_RES_SRQ;
err = create_srq_split(dev, srq, in, inlen, is_xrc);
err = create_srq_split(dev, srq, in);
if (err)
return err;
@ -502,7 +531,7 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
EXPORT_SYMBOL(mlx5_core_destroy_srq);
int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_query_srq_mbox_out *out)
struct mlx5_srq_attr *out)
{
if (!dev->issi)
return query_srq_cmd(dev, srq, out);

View file

@ -85,6 +85,7 @@ int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
return err;
}
EXPORT_SYMBOL(mlx5_core_create_rq);
int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
{
@ -110,6 +111,7 @@ void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
EXPORT_SYMBOL(mlx5_core_destroy_rq);
int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
{
@ -430,6 +432,7 @@ int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
return err;
}
EXPORT_SYMBOL(mlx5_core_create_rqt);
int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
int inlen)
@ -455,3 +458,4 @@ void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
EXPORT_SYMBOL(mlx5_core_destroy_rqt);

View file

@ -220,6 +220,7 @@ enum {
MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32,
MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33,
MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34,
MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35,
};
enum {
@ -1342,6 +1343,9 @@ enum {
VXLAN_STEER_BY_INNER_VLAN = 1 << 4,
};
enum {
MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS = 0x2,
};
int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn,
enum mlx4_net_trans_promisc_mode mode);
@ -1382,6 +1386,9 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
int mlx4_SYNC_TPT(struct mlx4_dev *dev);
int mlx4_test_interrupts(struct mlx4_dev *dev);
int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier,
const u32 offset[], u32 value[],
size_t array_len, u8 port);
u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port);
bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector);
struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port);

View file

@ -58,6 +58,8 @@ struct mlx5_core_cq {
void (*comp)(struct mlx5_core_cq *);
void *priv;
} tasklet_ctx;
int reset_notify_added;
struct list_head reset_notify;
};

View file

@ -46,6 +46,7 @@
#include <linux/mlx5/device.h>
#include <linux/mlx5/doorbell.h>
#include <linux/mlx5/srq.h>
enum {
MLX5_RQ_BITMASK_VSD = 1 << 1,
@ -798,11 +799,10 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
struct mlx5_cmd_mailbox *head);
int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_create_srq_mbox_in *in, int inlen,
int is_xrc);
struct mlx5_srq_attr *in);
int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq);
int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_query_srq_mbox_out *out);
struct mlx5_srq_attr *out);
int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
u16 lwm, int is_srq);
void mlx5_init_mkey_table(struct mlx5_core_dev *dev);

View file

@ -556,9 +556,9 @@ struct mlx5_destroy_qp_mbox_out {
struct mlx5_modify_qp_mbox_in {
struct mlx5_inbox_hdr hdr;
__be32 qpn;
u8 rsvd1[4];
__be32 optparam;
u8 rsvd0[4];
__be32 optparam;
u8 rsvd1[4];
struct mlx5_qp_context ctx;
u8 rsvd2[16];
};

View file

@ -35,6 +35,31 @@
#include <linux/mlx5/driver.h>
enum {
MLX5_SRQ_FLAG_ERR = (1 << 0),
MLX5_SRQ_FLAG_WQ_SIG = (1 << 1),
};
struct mlx5_srq_attr {
u32 type;
u32 flags;
u32 log_size;
u32 wqe_shift;
u32 log_page_size;
u32 wqe_cnt;
u32 srqn;
u32 xrcd;
u32 page_offset;
u32 cqn;
u32 pd;
u32 lwm;
u32 user_index;
u64 db_record;
u64 *pas;
};
struct mlx5_core_dev;
void mlx5_init_srq_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev);

Some files were not shown because too many files have changed in this diff Show more