Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband: (65 commits) IB: Fix typo in kerneldoc for ib_set_client_data() IPoIB: Add some likely/unlikely annotations in hot path IPoIB: Remove unused include of vmalloc.h IPoIB: Rejoin all multicast groups after a port event IPoIB: Create MCGs with all attributes required by RFC IB/sa: fix ib_sa_selector names IB/iser: INFINIBAND_ISER depends on INET IB/mthca: Simplify calls to mthca_cq_clean() RDMA/cma: Document rdma_accept() error handling IB/mthca: Recover from catastrophic errors RDMA/cma: Document rdma_destroy_id() function IB/cm: Do not track remote QPN in timewait state IB/sa: Require SA registration IPoIB: Refactor completion handling IB/iser: Do not use FMR for a single dma entry sg IB/iser: fix some debug prints IB/iser: make FMR "page size" be 4K and not PAGE_SIZE IB/iser: Limit the max size of a scsi command IB/iser: fix a check of SG alignment for RDMA RDMA/cma: Protect against adding device during destruction ...
2024-10-02 15:18:19 +00:00 · 2006-09-22 15:47:06 -07:00 · 2006-09-22 15:47:06 -07:00 · db392219c5
commit db392219c5
parent 3e2ab46df6 9cd330d36b
136 changed files with 25637 additions and 2715 deletions
--- a/16
+++ b/16
@ -298,6 +298,14 @@ L:	info-linux@geode.amd.com
 W:	http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
 S:	Supported

+AMSO1100 RNIC DRIVER
+P:	Tom Tucker
+M:	tom@opengridcomputing.com
+P:	Steve Wise
+M:	swise@opengridcomputing.com
+L:	openib-general@openib.org
+S:	Maintained
+
 AOA (Apple Onboard Audio) ALSA DRIVER
 P:	Johannes Berg
 M:	johannes@sipsolutions.net
@ -991,6 +999,14 @@ EFS FILESYSTEM
 W:	http://aeschi.ch.eu.org/efs/
 S:	Orphan

+EHCA (IBM GX bus InfiniBand adapter) DRIVER:
+P:	Hoang-Nam Nguyen
+M:	hnguyen@de.ibm.com
+P:	Christoph Raisch
+M:	raisch@de.ibm.com
+L:	openib-general@openib.org
+S:	Supported
+
 EMU10K1 SOUND DRIVER
 P:	James Courtier-Dutton
 M:	James@superbug.demon.co.uk
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@ -14,7 +14,7 @@ config INFINIBAND_USER_MAD
 	---help---
 	  Userspace InfiniBand Management Datagram (MAD) support.  This
 	  is the kernel side of the userspace MAD support, which allows
-	  userspace processes to send and receive MADs. You will also 
+	  userspace processes to send and receive MADs. You will also
 	  need libibumad from <http://www.openib.org>.

 config INFINIBAND_USER_ACCESS
@ -36,6 +36,8 @@ config INFINIBAND_ADDR_TRANS

 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/ipath/Kconfig"
+source "drivers/infiniband/hw/ehca/Kconfig"
+source "drivers/infiniband/hw/amso1100/Kconfig"

 source "drivers/infiniband/ulp/ipoib/Kconfig"

--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@ -1,6 +1,8 @@
 obj-$(CONFIG_INFINIBAND)		+= core/
 obj-$(CONFIG_INFINIBAND_MTHCA)		+= hw/mthca/
-obj-$(CONFIG_IPATH_CORE)		+= hw/ipath/
+obj-$(CONFIG_INFINIBAND_IPATH)		+= hw/ipath/
+obj-$(CONFIG_INFINIBAND_EHCA)		+= hw/ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100)	+= hw/amso1100/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
 obj-$(CONFIG_INFINIBAND_ISER)		+= ulp/iser/
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@ -1,7 +1,7 @@
 infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= ib_addr.o rdma_cm.o

 obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_mad.o ib_sa.o \
-					ib_cm.o $(infiniband-y)
+					ib_cm.o iw_cm.o $(infiniband-y)
 obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o

@ -14,6 +14,8 @@ ib_sa-y :=			sa_query.o

 ib_cm-y :=			cm.o

+iw_cm-y :=			iwcm.o
+
 rdma_cm-y :=			cma.o

 ib_addr-y :=			addr.o
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@ -61,12 +61,15 @@ static LIST_HEAD(req_list);
 static DECLARE_WORK(work, process_req, NULL);
 static struct workqueue_struct *addr_wq;

-static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
-		     unsigned char *dst_dev_addr)
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+		     const unsigned char *dst_dev_addr)
 {
 	switch (dev->type) {
 	case ARPHRD_INFINIBAND:
-		dev_addr->dev_type = IB_NODE_CA;
+		dev_addr->dev_type = RDMA_NODE_IB_CA;
+		break;
+	case ARPHRD_ETHER:
+		dev_addr->dev_type = RDMA_NODE_RNIC;
 		break;
 	default:
 		return -EADDRNOTAVAIL;
@ -78,6 +81,7 @@ static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
 	return 0;
 }
+EXPORT_SYMBOL(rdma_copy_addr);

 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 {
@ -89,7 +93,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 	if (!dev)
 		return -EADDRNOTAVAIL;

-	ret = copy_addr(dev_addr, dev, NULL);
+	ret = rdma_copy_addr(dev_addr, dev, NULL);
 	dev_put(dev);
 	return ret;
 }
@ -161,7 +165,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,

 	/* If the device does ARP internally, return 'done' */
 	if (rt->idev->dev->flags & IFF_NOARP) {
-		copy_addr(addr, rt->idev->dev, NULL);
+		rdma_copy_addr(addr, rt->idev->dev, NULL);
 		goto put;
 	}

@ -181,7 +185,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
 		src_in->sin_addr.s_addr = rt->rt_src;
 	}

-	ret = copy_addr(addr, neigh->dev, neigh->ha);
+	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
 release:
 	neigh_release(neigh);
 put:
@ -245,7 +249,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in,
 	if (ZERONET(src_ip)) {
 		src_in->sin_family = dst_in->sin_family;
 		src_in->sin_addr.s_addr = dst_ip;
-		ret = copy_addr(addr, dev, dev->dev_addr);
+		ret = rdma_copy_addr(addr, dev, dev->dev_addr);
 	} else if (LOOPBACK(src_ip)) {
 		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
 		if (!ret)
@ -327,10 +331,10 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
 }
 EXPORT_SYMBOL(rdma_addr_cancel);

-static int netevent_callback(struct notifier_block *self, unsigned long event, 
+static int netevent_callback(struct notifier_block *self, unsigned long event,
 	void *ctx)
 {
-	if (event == NETEVENT_NEIGH_UPDATE) {  
+	if (event == NETEVENT_NEIGH_UPDATE) {
 		struct neighbour *neigh = ctx;

 		if (neigh->dev->type == ARPHRD_INFINIBAND &&
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@ -62,12 +62,13 @@ struct ib_update_work {

 static inline int start_port(struct ib_device *device)
 {
-	return device->node_type == IB_NODE_SWITCH ? 0 : 1;
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
 }

 static inline int end_port(struct ib_device *device)
 {
-	return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt;
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+		0 : device->phys_port_cnt;
 }

 int ib_get_cached_gid(struct ib_device *device,
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004-2006 Intel Corporation.  All rights reserved.
 * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
 * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@ -41,6 +41,7 @@
 #include <linux/idr.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
+#include <linux/random.h>
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
@ -73,6 +74,7 @@ static struct ib_cm {
 	struct rb_root remote_id_table;
 	struct rb_root remote_sidr_table;
 	struct idr local_id_table;
+	__be32 random_id_operand;
 	struct workqueue_struct *wq;
 } cm;

@ -177,7 +179,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
 	if (IS_ERR(ah))
 		return PTR_ERR(ah);

-	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, 
+	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
 			       cm_id_priv->av.pkey_index,
 			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 			       GFP_ATOMIC);
@ -299,15 +301,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 {
 	unsigned long flags;
-	int ret;
+	int ret, id;
 	static int next_id;

 	do {
 		spin_lock_irqsave(&cm.lock, flags);
-		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++,
-					(__force int *) &cm_id_priv->id.local_id);
+		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
+					next_id++, &id);
 		spin_unlock_irqrestore(&cm.lock, flags);
 	} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
+
+	cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand);
 	return ret;
 }

@ -316,7 +320,8 @@ static void cm_free_id(__be32 local_id)
 	unsigned long flags;

 	spin_lock_irqsave(&cm.lock, flags);
-	idr_remove(&cm.local_id_table, (__force int) local_id);
+	idr_remove(&cm.local_id_table,
+		   (__force int) (local_id ^ cm.random_id_operand));
 	spin_unlock_irqrestore(&cm.lock, flags);
 }

@ -324,7 +329,8 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
 {
 	struct cm_id_private *cm_id_priv;

-	cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id);
+	cm_id_priv = idr_find(&cm.local_id_table,
+			      (__force int) (local_id ^ cm.random_id_operand));
 	if (cm_id_priv) {
 		if (cm_id_priv->id.remote_id == remote_id)
 			atomic_inc(&cm_id_priv->refcount);
@ -679,6 +685,8 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
 {
 	int wait_time;

+	cm_cleanup_timewait(cm_id_priv->timewait_info);
+
 	/*
 	 * The cm_id could be destroyed by the user before we exit timewait.
 	 * To protect against this, we search for the cm_id after exiting
@ -1354,7 +1362,7 @@ static int cm_req_handler(struct cm_work *work)
 							    id.local_id);
 	if (IS_ERR(cm_id_priv->timewait_info)) {
 		ret = PTR_ERR(cm_id_priv->timewait_info);
-		goto error1;
+		goto destroy;
 	}
 	cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
 	cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
@ -1363,7 +1371,8 @@ static int cm_req_handler(struct cm_work *work)
 	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
 	if (!listen_cm_id_priv) {
 		ret = -EINVAL;
-		goto error2;
+		kfree(cm_id_priv->timewait_info);
+		goto destroy;
 	}

 	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@ -1373,12 +1382,22 @@ static int cm_req_handler(struct cm_work *work)

 	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
 	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
-	if (ret)
-		goto error3;
+	if (ret) {
+		ib_get_cached_gid(work->port->cm_dev->device,
+				  work->port->port_num, 0, &work->path[0].sgid);
+		ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+			       &work->path[0].sgid, sizeof work->path[0].sgid,
+			       NULL, 0);
+		goto rejected;
+	}
 	if (req_msg->alt_local_lid) {
 		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
-		if (ret)
-			goto error3;
+		if (ret) {
+			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+				       &work->path[0].sgid,
+				       sizeof work->path[0].sgid, NULL, 0);
+			goto rejected;
+		}
 	}
 	cm_id_priv->tid = req_msg->hdr.tid;
 	cm_id_priv->timeout_ms = cm_convert_to_ms(
@ -1400,12 +1419,11 @@ static int cm_req_handler(struct cm_work *work)
 	cm_deref_id(listen_cm_id_priv);
 	return 0;

-error3:	atomic_dec(&cm_id_priv->refcount);
+rejected:
+	atomic_dec(&cm_id_priv->refcount);
 	cm_deref_id(listen_cm_id_priv);
-	cm_cleanup_timewait(cm_id_priv->timewait_info);
-error2:	kfree(cm_id_priv->timewait_info);
-	cm_id_priv->timewait_info = NULL;
-error1:	ib_destroy_cm_id(&cm_id_priv->id);
+destroy:
+	ib_destroy_cm_id(cm_id);
 	return ret;
 }

@ -2072,8 +2090,9 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
 			spin_unlock_irqrestore(&cm.lock, flags);
 			return NULL;
 		}
-		cm_id_priv = idr_find(&cm.local_id_table,
-				      (__force int) timewait_info->work.local_id);
+		cm_id_priv = idr_find(&cm.local_id_table, (__force int)
+				      (timewait_info->work.local_id ^
+				       cm.random_id_operand));
 		if (cm_id_priv) {
 			if (cm_id_priv->id.remote_id == remote_id)
 				atomic_inc(&cm_id_priv->refcount);
@ -3125,7 +3144,8 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
 		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
 					   IB_ACCESS_REMOTE_WRITE;
 		if (cm_id_priv->responder_resources)
-			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
+			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
+						    IB_ACCESS_REMOTE_ATOMIC;
 		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
 		qp_attr->port_num = cm_id_priv->av.port->port_num;
 		ret = 0;
@ -3262,6 +3282,9 @@ static void cm_add_one(struct ib_device *device)
 	int ret;
 	u8 i;

+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
 			 device->phys_port_cnt, GFP_KERNEL);
 	if (!cm_dev)
@ -3349,6 +3372,7 @@ static int __init ib_cm_init(void)
 	cm.remote_qp_table = RB_ROOT;
 	cm.remote_sidr_table = RB_ROOT;
 	idr_init(&cm.local_id_table);
+	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
 	idr_pre_get(&cm.local_id_table, GFP_KERNEL);

 	cm.wq = create_workqueue("ib_cm");
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@ -35,6 +35,7 @@
 #include <linux/mutex.h>
 #include <linux/random.h>
 #include <linux/idr.h>
+#include <linux/inetdevice.h>

 #include <net/tcp.h>

@ -43,6 +44,7 @@
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_sa.h>
+#include <rdma/iw_cm.h>

 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
@ -60,6 +62,7 @@ static struct ib_client cma_client = {
 	.remove = cma_remove_one
 };

+static struct ib_sa_client sa_client;
 static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
@ -124,6 +127,7 @@ struct rdma_id_private {
 	int			query_id;
 	union {
 		struct ib_cm_id	*ib;
+		struct iw_cm_id	*iw;
 	} cm_id;

 	u32			seq_num;
@ -259,15 +263,24 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv)
 	id_priv->cma_dev = NULL;
 }

-static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
+static int cma_acquire_dev(struct rdma_id_private *id_priv)
 {
+	enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type;
 	struct cma_device *cma_dev;
 	union ib_gid gid;
 	int ret = -ENODEV;

-	ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid),
+	switch (rdma_node_get_transport(dev_type)) {
+	case RDMA_TRANSPORT_IB:
+		ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+		break;
+	case RDMA_TRANSPORT_IWARP:
+		iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+		break;
+	default:
+		return -ENODEV;
+	}

-	mutex_lock(&lock);
 	list_for_each_entry(cma_dev, &dev_list, list) {
 		ret = ib_find_cached_gid(cma_dev->device, &gid,
 					 &id_priv->id.port_num, NULL);
@ -276,20 +289,9 @@ static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
 			break;
 		}
 	}
-	mutex_unlock(&lock);
 	return ret;
 }

-static int cma_acquire_dev(struct rdma_id_private *id_priv)
-{
-	switch (id_priv->id.route.addr.dev_addr.dev_type) {
-	case IB_NODE_CA:
-		return cma_acquire_ib_dev(id_priv);
-	default:
-		return -ENODEV;
-	}
-}
-
 static void cma_deref_id(struct rdma_id_private *id_priv)
 {
 	if (atomic_dec_and_test(&id_priv->refcount))
@ -347,6 +349,16 @@ static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 					  IB_QP_PKEY_INDEX | IB_QP_PORT);
 }

+static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS);
+}
+
 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 		   struct ib_qp_init_attr *qp_init_attr)
 {
@ -362,10 +374,13 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 	if (IS_ERR(qp))
 		return PTR_ERR(qp);

-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = cma_init_ib_qp(id_priv, qp);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_init_iw_qp(id_priv, qp);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@ -451,13 +466,17 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 	int ret;

 	id_priv = container_of(id, struct rdma_id_private, id);
-	switch (id_priv->id.device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
 					 qp_attr_mask);
 		if (qp_attr->qp_state == IB_QPS_RTR)
 			qp_attr->rq_psn = id_priv->seq_num;
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
+					qp_attr_mask);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@ -590,8 +609,8 @@ static int cma_notify_user(struct rdma_id_private *id_priv,

 static void cma_cancel_route(struct rdma_id_private *id_priv)
 {
-	switch (id_priv->id.device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		if (id_priv->query)
 			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
 		break;
@ -611,11 +630,15 @@ static void cma_destroy_listen(struct rdma_id_private *id_priv)
 	cma_exch(id_priv, CMA_DESTROYING);

 	if (id_priv->cma_dev) {
-		switch (id_priv->id.device->node_type) {
-		case IB_NODE_CA:
-	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+		case RDMA_TRANSPORT_IB:
+			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
 				ib_destroy_cm_id(id_priv->cm_id.ib);
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+				iw_destroy_cm_id(id_priv->cm_id.iw);
+			break;
 		default:
 			break;
 		}
@ -689,19 +712,25 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 	state = cma_exch(id_priv, CMA_DESTROYING);
 	cma_cancel_operation(id_priv, state);

+	mutex_lock(&lock);
 	if (id_priv->cma_dev) {
-		switch (id->device->node_type) {
-		case IB_NODE_CA:
-	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+		mutex_unlock(&lock);
+		switch (rdma_node_get_transport(id->device->node_type)) {
+		case RDMA_TRANSPORT_IB:
+			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
 				ib_destroy_cm_id(id_priv->cm_id.ib);
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+				iw_destroy_cm_id(id_priv->cm_id.iw);
+			break;
 		default:
 			break;
 		}
-	  	mutex_lock(&lock);
+		mutex_lock(&lock);
 		cma_detach_from_dev(id_priv);
-		mutex_unlock(&lock);
 	}
+	mutex_unlock(&lock);

 	cma_release_port(id_priv);
 	cma_deref_id(id_priv);
@ -869,7 +898,7 @@ static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id,
 	ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
 	ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
 	ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
-	rt->addr.dev_addr.dev_type = IB_NODE_CA;
+	rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;

 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->state = CMA_CONNECT;
@ -898,7 +927,9 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	}

 	atomic_inc(&conn_id->dev_remove);
-	ret = cma_acquire_ib_dev(conn_id);
+	mutex_lock(&lock);
+	ret = cma_acquire_dev(conn_id);
+	mutex_unlock(&lock);
 	if (ret) {
 		ret = -ENODEV;
 		cma_release_remove(conn_id);
@ -982,6 +1013,130 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
 	}
 }

+static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
+{
+	struct rdma_id_private *id_priv = iw_id->context;
+	enum rdma_cm_event_type event = 0;
+	struct sockaddr_in *sin;
+	int ret = 0;
+
+	atomic_inc(&id_priv->dev_remove);
+
+	switch (iw_event->event) {
+	case IW_CM_EVENT_CLOSE:
+		event = RDMA_CM_EVENT_DISCONNECTED;
+		break;
+	case IW_CM_EVENT_CONNECT_REPLY:
+		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+		*sin = iw_event->local_addr;
+		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
+		*sin = iw_event->remote_addr;
+		if (iw_event->status)
+			event = RDMA_CM_EVENT_REJECTED;
+		else
+			event = RDMA_CM_EVENT_ESTABLISHED;
+		break;
+	case IW_CM_EVENT_ESTABLISHED:
+		event = RDMA_CM_EVENT_ESTABLISHED;
+		break;
+	default:
+		BUG_ON(1);
+	}
+
+	ret = cma_notify_user(id_priv, event, iw_event->status,
+			      iw_event->private_data,
+			      iw_event->private_data_len);
+	if (ret) {
+		/* Destroy the CM ID by returning a non-zero value. */
+		id_priv->cm_id.iw = NULL;
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_release_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return ret;
+	}
+
+	cma_release_remove(id_priv);
+	return ret;
+}
+
+static int iw_conn_req_handler(struct iw_cm_id *cm_id,
+			       struct iw_cm_event *iw_event)
+{
+	struct rdma_cm_id *new_cm_id;
+	struct rdma_id_private *listen_id, *conn_id;
+	struct sockaddr_in *sin;
+	struct net_device *dev = NULL;
+	int ret;
+
+	listen_id = cm_id->context;
+	atomic_inc(&listen_id->dev_remove);
+	if (!cma_comp(listen_id, CMA_LISTEN)) {
+		ret = -ECONNABORTED;
+		goto out;
+	}
+
+	/* Create a new RDMA id for the new IW CM ID */
+	new_cm_id = rdma_create_id(listen_id->id.event_handler,
+				   listen_id->id.context,
+				   RDMA_PS_TCP);
+	if (!new_cm_id) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
+	atomic_inc(&conn_id->dev_remove);
+	conn_id->state = CMA_CONNECT;
+
+	dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
+	if (!dev) {
+		ret = -EADDRNOTAVAIL;
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
+	if (ret) {
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+
+	mutex_lock(&lock);
+	ret = cma_acquire_dev(conn_id);
+	mutex_unlock(&lock);
+	if (ret) {
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+
+	conn_id->cm_id.iw = cm_id;
+	cm_id->context = conn_id;
+	cm_id->cm_handler = cma_iw_handler;
+
+	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
+	*sin = iw_event->local_addr;
+	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
+	*sin = iw_event->remote_addr;
+
+	ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
+			      iw_event->private_data,
+			      iw_event->private_data_len);
+	if (ret) {
+		/* User wants to destroy the CM ID */
+		conn_id->cm_id.iw = NULL;
+		cma_exch(conn_id, CMA_DESTROYING);
+		cma_release_remove(conn_id);
+		rdma_destroy_id(&conn_id->id);
+	}
+
+out:
+	if (dev)
+		dev_put(dev);
+	cma_release_remove(listen_id);
+	return ret;
+}
+
 static int cma_ib_listen(struct rdma_id_private *id_priv)
 {
 	struct ib_cm_compare_data compare_data;
@ -1011,6 +1166,30 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
 	return ret;
 }

+static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
+{
+	int ret;
+	struct sockaddr_in *sin;
+
+	id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
+					    iw_conn_req_handler,
+					    id_priv);
+	if (IS_ERR(id_priv->cm_id.iw))
+		return PTR_ERR(id_priv->cm_id.iw);
+
+	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	id_priv->cm_id.iw->local_addr = *sin;
+
+	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
+
+	if (ret) {
+		iw_destroy_cm_id(id_priv->cm_id.iw);
+		id_priv->cm_id.iw = NULL;
+	}
+
+	return ret;
+}
+
 static int cma_listen_handler(struct rdma_cm_id *id,
 			      struct rdma_cm_event *event)
 {
@ -1087,12 +1266,17 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)

 	id_priv->backlog = backlog;
 	if (id->device) {
-		switch (id->device->node_type) {
-		case IB_NODE_CA:
+		switch (rdma_node_get_transport(id->device->node_type)) {
+		case RDMA_TRANSPORT_IB:
 			ret = cma_ib_listen(id_priv);
 			if (ret)
 				goto err;
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			ret = cma_iw_listen(id_priv, backlog);
+			if (ret)
+				goto err;
+			break;
 		default:
 			ret = -ENOSYS;
 			goto err;
@ -1140,7 +1324,7 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
 	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
 	path_rec.numb_path = 1;

-	id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device,
+	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
 				id_priv->id.port_num, &path_rec,
 				IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
 				IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
@ -1231,6 +1415,23 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
 }
 EXPORT_SYMBOL(rdma_set_ib_paths);

+static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
+{
+	struct cma_work *work;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (!work)
+		return -ENOMEM;
+
+	work->id = id_priv;
+	INIT_WORK(&work->work, cma_work_handler, work);
+	work->old_state = CMA_ROUTE_QUERY;
+	work->new_state = CMA_ROUTE_RESOLVED;
+	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+	queue_work(cma_wq, &work->work);
+	return 0;
+}
+
 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 {
 	struct rdma_id_private *id_priv;
@ -1241,10 +1442,13 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 		return -EINVAL;

 	atomic_inc(&id_priv->refcount);
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = cma_resolve_ib_route(id_priv, timeout_ms);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_resolve_iw_route(id_priv, timeout_ms);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@ -1309,16 +1513,26 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 	enum rdma_cm_event_type event;

 	atomic_inc(&id_priv->dev_remove);
-	if (!id_priv->cma_dev && !status)
+
+	/*
+	 * Grab mutex to block rdma_destroy_id() from removing the device while
+	 * we're trying to acquire it.
+	 */
+	mutex_lock(&lock);
+	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
+		mutex_unlock(&lock);
+		goto out;
+	}
+
+	if (!status && !id_priv->cma_dev)
 		status = cma_acquire_dev(id_priv);
+	mutex_unlock(&lock);

 	if (status) {
-		if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND))
+		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
 			goto out;
 		event = RDMA_CM_EVENT_ADDR_ERROR;
 	} else {
-		if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
-			goto out;
 		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
 		       ip_addr_size(src_addr));
 		event = RDMA_CM_EVENT_ADDR_RESOLVED;
@ -1492,7 +1706,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
 	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
 		if (cma_any_addr(&cur_id->id.route.addr.src_addr))
 			return -EADDRNOTAVAIL;
-		
+
 		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
 		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
 			return -EADDRINUSE;
@ -1542,8 +1756,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)

 	if (!cma_any_addr(addr)) {
 		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
-		if (!ret)
+		if (!ret) {
+			mutex_lock(&lock);
 			ret = cma_acquire_dev(id_priv);
+			mutex_unlock(&lock);
+		}
 		if (ret)
 			goto err;
 	}
@ -1649,6 +1866,47 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
 	return ret;
 }

+static int cma_connect_iw(struct rdma_id_private *id_priv,
+			  struct rdma_conn_param *conn_param)
+{
+	struct iw_cm_id *cm_id;
+	struct sockaddr_in* sin;
+	int ret;
+	struct iw_cm_conn_param iw_param;
+
+	cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
+	if (IS_ERR(cm_id)) {
+		ret = PTR_ERR(cm_id);
+		goto out;
+	}
+
+	id_priv->cm_id.iw = cm_id;
+
+	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
+	cm_id->local_addr = *sin;
+
+	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
+	cm_id->remote_addr = *sin;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret) {
+		iw_destroy_cm_id(cm_id);
+		return ret;
+	}
+
+	iw_param.ord = conn_param->initiator_depth;
+	iw_param.ird = conn_param->responder_resources;
+	iw_param.private_data = conn_param->private_data;
+	iw_param.private_data_len = conn_param->private_data_len;
+	if (id_priv->id.qp)
+		iw_param.qpn = id_priv->qp_num;
+	else
+		iw_param.qpn = conn_param->qp_num;
+	ret = iw_cm_connect(cm_id, &iw_param);
+out:
+	return ret;
+}
+
 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
@ -1664,10 +1922,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 		id_priv->srq = conn_param->srq;
 	}

-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = cma_connect_ib(id_priv, conn_param);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_connect_iw(id_priv, conn_param);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@ -1708,6 +1969,28 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
 	return ib_send_cm_rep(id_priv->cm_id.ib, &rep);
 }

+static int cma_accept_iw(struct rdma_id_private *id_priv,
+		  struct rdma_conn_param *conn_param)
+{
+	struct iw_cm_conn_param iw_param;
+	int ret;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret)
+		return ret;
+
+	iw_param.ord = conn_param->initiator_depth;
+	iw_param.ird = conn_param->responder_resources;
+	iw_param.private_data = conn_param->private_data;
+	iw_param.private_data_len = conn_param->private_data_len;
+	if (id_priv->id.qp) {
+		iw_param.qpn = id_priv->qp_num;
+	} else
+		iw_param.qpn = conn_param->qp_num;
+
+	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
+}
+
 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
@ -1723,13 +2006,16 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 		id_priv->srq = conn_param->srq;
 	}

-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		if (conn_param)
 			ret = cma_accept_ib(id_priv, conn_param);
 		else
 			ret = cma_rep_recv(id_priv);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_accept_iw(id_priv, conn_param);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@ -1756,12 +2042,16 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
 	if (!cma_comp(id_priv, CMA_CONNECT))
 		return -EINVAL;

-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = ib_send_cm_rej(id_priv->cm_id.ib,
 				     IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
 				     private_data, private_data_len);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_reject(id_priv->cm_id.iw,
+				   private_data, private_data_len);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@ -1780,17 +2070,20 @@ int rdma_disconnect(struct rdma_cm_id *id)
 	    !cma_comp(id_priv, CMA_DISCONNECT))
 		return -EINVAL;

-	ret = cma_modify_qp_err(id);
-	if (ret)
-		goto out;
-
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		ret = cma_modify_qp_err(id);
+		if (ret)
+			goto out;
 		/* Initiate or respond to a disconnect. */
 		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
 			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
+		break;
 	default:
+		ret = -EINVAL;
 		break;
 	}
 out:
@ -1907,12 +2200,15 @@ static int cma_init(void)
 	if (!cma_wq)
 		return -ENOMEM;

+	ib_sa_register_client(&sa_client);
+
 	ret = ib_register_client(&cma_client);
 	if (ret)
 		goto err;
 	return 0;

 err:
+	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
 	return ret;
 }
@ -1920,6 +2216,7 @@ static int cma_init(void)
 static void cma_cleanup(void)
 {
 	ib_unregister_client(&cma_client);
+	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
 	idr_destroy(&sdp_ps);
 	idr_destroy(&tcp_ps);
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@ -385,7 +385,7 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
 EXPORT_SYMBOL(ib_get_client_data);

 /**
- * ib_set_client_data - Get IB client context
+ * ib_set_client_data - Set IB client context
 * @device:Device to set context for
 * @client:Client to set context for
 * @data:Context to set
@ -505,7 +505,7 @@ int ib_query_port(struct ib_device *device,
 		  u8 port_num,
 		  struct ib_port_attr *port_attr)
 {
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		if (port_num)
 			return -EINVAL;
 	} else if (port_num < 1 || port_num > device->phys_port_cnt)
@ -580,7 +580,7 @@ int ib_modify_port(struct ib_device *device,
 		   u8 port_num, int port_modify_mask,
 		   struct ib_port_modify *port_modify)
 {
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		if (port_num)
 			return -EINVAL;
 	} else if (port_num < 1 || port_num > device->phys_port_cnt)
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
--- a/drivers/infiniband/core/iwcm.h
+++ b/drivers/infiniband/core/iwcm.h
@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IWCM_H
+#define IWCM_H
+
+enum iw_cm_state {
+	IW_CM_STATE_IDLE,             /* unbound, inactive */
+	IW_CM_STATE_LISTEN,           /* listen waiting for connect */
+	IW_CM_STATE_CONN_RECV,        /* inbound waiting for user accept */
+	IW_CM_STATE_CONN_SENT,        /* outbound waiting for peer accept */
+	IW_CM_STATE_ESTABLISHED,      /* established */
+	IW_CM_STATE_CLOSING,	      /* disconnect */
+	IW_CM_STATE_DESTROYING        /* object being deleted */
+};
+
+struct iwcm_id_private {
+	struct iw_cm_id	id;
+	enum iw_cm_state state;
+	unsigned long flags;
+	struct ib_qp *qp;
+	struct completion destroy_comp;
+	wait_queue_head_t connect_wait;
+	struct list_head work_list;
+	spinlock_t lock;
+	atomic_t refcount;
+	struct list_head work_free_list;
+};
+
+#define IWCM_F_CALLBACK_DESTROY   1
+#define IWCM_F_CONNECT_WAIT       2
+
+#endif /* IWCM_H */
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@ -1246,8 +1246,8 @@ static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
 	int i;

 	for (i = 0; i < MAX_MGMT_OUI; i++)
-                /* Is there matching OUI for this vendor class ? */
-                if (!memcmp(vendor_class->oui[i], oui, 3))
+		/* Is there matching OUI for this vendor class ? */
+		if (!memcmp(vendor_class->oui[i], oui, 3))
 			return i;

 	return -1;
@ -2237,7 +2237,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
 				 &mad_agent_priv->send_list, agent_list) {
 		if (mad_send_wr->status == IB_WC_SUCCESS) {
- 			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
+			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
 			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
 		}
 	}
@ -2528,10 +2528,10 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
 			}
 		}
 		sg_list.addr = dma_map_single(qp_info->port_priv->
-					      	device->dma_device,
+					        device->dma_device,
 					      &mad_priv->grh,
 					      sizeof *mad_priv -
-					      	sizeof mad_priv->header,
+					        sizeof mad_priv->header,
 					      DMA_FROM_DEVICE);
 		pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
 		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
@ -2606,7 +2606,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
 	struct ib_qp *qp;

 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
- 	if (!attr) {
+	if (!attr) {
 		printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
 		return -ENOMEM;
 	}
@ -2876,7 +2876,10 @@ static void ib_mad_init_device(struct ib_device *device)
 {
 	int start, end, i;

-	if (device->node_type == IB_NODE_SWITCH) {
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		start = 0;
 		end   = 0;
 	} else {
@ -2923,7 +2926,7 @@ static void ib_mad_remove_device(struct ib_device *device)
 {
 	int i, num_ports, cur_port;

-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		num_ports = 1;
 		cur_port = 0;
 	} else {
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@ -39,7 +39,6 @@

 #include <linux/completion.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/workqueue.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@ -33,8 +33,6 @@
 * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $
 */

-#include <linux/dma-mapping.h>
-
 #include "mad_priv.h"
 #include "mad_rmpp.h"

@ -60,6 +58,7 @@ struct mad_rmpp_recv {
 	int last_ack;
 	int seg_num;
 	int newwin;
+	int repwin;

 	__be64 tid;
 	u32 src_qp;
@ -170,6 +169,32 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
 	return msg;
 }

+static void ack_ds_ack(struct ib_mad_agent_private *agent,
+		       struct ib_mad_recv_wc *recv_wc)
+{
+	struct ib_mad_send_buf *msg;
+	struct ib_rmpp_mad *rmpp_mad;
+	int ret;
+
+	msg = alloc_response_msg(&agent->agent, recv_wc);
+	if (IS_ERR(msg))
+		return;
+
+	rmpp_mad = msg->mad;
+	memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
+
+	rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
+	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+	rmpp_mad->rmpp_hdr.seg_num = 0;
+	rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1);
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		ib_destroy_ah(msg->ah);
+		ib_free_send_mad(msg);
+	}
+}
+
 void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
 {
 	struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad;
@ -271,6 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent,
 	rmpp_recv->newwin = 1;
 	rmpp_recv->seg_num = 1;
 	rmpp_recv->last_ack = 0;
+	rmpp_recv->repwin = 1;

 	mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
 	rmpp_recv->tid = mad_hdr->tid;
@ -365,7 +391,7 @@ static inline int window_size(struct ib_mad_agent_private *agent)
 static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
 						  int seg_num)
 {
-        struct ib_mad_recv_buf *seg_buf;
+	struct ib_mad_recv_buf *seg_buf;
 	int cur_seg_num;

 	list_for_each_entry_reverse(seg_buf, rmpp_list, list) {
@ -591,6 +617,16 @@ static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr,
 			break;
 }

+static void process_ds_ack(struct ib_mad_agent_private *agent,
+			   struct ib_mad_recv_wc *mad_recv_wc, int newwin)
+{
+	struct mad_rmpp_recv *rmpp_recv;
+
+	rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
+	if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE)
+		rmpp_recv->repwin = newwin;
+}
+
 static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 			     struct ib_mad_recv_wc *mad_recv_wc)
 {
@ -616,8 +652,18 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,

 	spin_lock_irqsave(&agent->lock, flags);
 	mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
-	if (!mad_send_wr)
-		goto out;	/* Unmatched ACK */
+	if (!mad_send_wr) {
+		if (!seg_num)
+			process_ds_ack(agent, mad_recv_wc, newwin);
+		goto out;	/* Unmatched or DS RMPP ACK */
+	}
+
+	if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) &&
+	    (mad_send_wr->timeout)) {
+		spin_unlock_irqrestore(&agent->lock, flags);
+		ack_ds_ack(agent, mad_recv_wc);
+		return;		/* Repeated ACK for DS RMPP transaction */
+	}

 	if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
 	    (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
@ -656,6 +702,9 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 		if (mad_send_wr->refcount == 1)
 			ib_reset_mad_timeout(mad_send_wr,
 					     mad_send_wr->send_buf.timeout_ms);
+		spin_unlock_irqrestore(&agent->lock, flags);
+		ack_ds_ack(agent, mad_recv_wc);
+		return;
 	} else if (mad_send_wr->refcount == 1 &&
 		   mad_send_wr->seg_num < mad_send_wr->newwin &&
 		   mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
@ -772,6 +821,39 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
 	return NULL;
 }

+static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
+{
+	struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
+	struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
+	struct mad_rmpp_recv *rmpp_recv;
+	struct ib_ah_attr ah_attr;
+	unsigned long flags;
+	int newwin = 1;
+
+	if (!(mad_hdr->method & IB_MGMT_METHOD_RESP))
+		goto out;
+
+	spin_lock_irqsave(&agent->lock, flags);
+	list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+		if (rmpp_recv->tid != mad_hdr->tid ||
+		    rmpp_recv->mgmt_class != mad_hdr->mgmt_class ||
+		    rmpp_recv->class_version != mad_hdr->class_version ||
+		    (rmpp_recv->method & IB_MGMT_METHOD_RESP))
+			continue;
+
+		if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
+			continue;
+
+		if (rmpp_recv->slid == ah_attr.dlid) {
+			newwin = rmpp_recv->repwin;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&agent->lock, flags);
+out:
+	return newwin;
+}
+
 int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	struct ib_rmpp_mad *rmpp_mad;
@ -787,7 +869,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
 		return IB_RMPP_RESULT_INTERNAL;
 	}

-	mad_send_wr->newwin = 1;
+	mad_send_wr->newwin = init_newwin(mad_send_wr);

 	/* We need to wait for the final ACK even if there isn't a response */
 	mad_send_wr->refcount += (mad_send_wr->timeout == 0);
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@ -1,6 +1,7 @@
 /*
 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
@ -75,6 +76,7 @@ struct ib_sa_device {
 struct ib_sa_query {
 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
 	void (*release)(struct ib_sa_query *);
+	struct ib_sa_client    *client;
 	struct ib_sa_port      *port;
 	struct ib_mad_send_buf *mad_buf;
 	struct ib_sa_sm_ah     *sm_ah;
@ -415,6 +417,31 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
 	}
 }

+void ib_sa_register_client(struct ib_sa_client *client)
+{
+	atomic_set(&client->users, 1);
+	init_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_register_client);
+
+static inline void ib_sa_client_get(struct ib_sa_client *client)
+{
+	atomic_inc(&client->users);
+}
+
+static inline void ib_sa_client_put(struct ib_sa_client *client)
+{
+	if (atomic_dec_and_test(&client->users))
+		complete(&client->comp);
+}
+
+void ib_sa_unregister_client(struct ib_sa_client *client)
+{
+	ib_sa_client_put(client);
+	wait_for_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_unregister_client);
+
 /**
 * ib_sa_cancel_query - try to cancel an SA query
 * @id:ID of query to cancel
@ -557,6 +584,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)

 /**
 * ib_sa_path_rec_get - Start a Path get query
+ * @client:SA client
 * @device:device to send query on
 * @port_num: port number to send query on
 * @rec:Path Record to send in query
@ -579,7 +607,8 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
 * error code.  Otherwise it is a query ID that can be used to cancel
 * the query.
 */
-int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+		       struct ib_device *device, u8 port_num,
 		       struct ib_sa_path_rec *rec,
 		       ib_sa_comp_mask comp_mask,
 		       int timeout_ms, gfp_t gfp_mask,
@ -614,8 +643,10 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
 		goto err1;
 	}

-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;

 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@ -639,6 +670,7 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,

 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);

 err1:
@ -671,6 +703,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)

 /**
 * ib_sa_service_rec_query - Start Service Record operation
+ * @client:SA client
 * @device:device to send request on
 * @port_num: port number to send request on
 * @method:SA method - should be get, set, or delete
@ -695,7 +728,8 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
 * error code.  Otherwise it is a request ID that can be used to cancel
 * the query.
 */
-int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+			    struct ib_device *device, u8 port_num, u8 method,
 			    struct ib_sa_service_rec *rec,
 			    ib_sa_comp_mask comp_mask,
 			    int timeout_ms, gfp_t gfp_mask,
@ -735,8 +769,10 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
 		goto err1;
 	}

-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;

 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@ -761,6 +797,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,

 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);

 err1:
@ -791,7 +828,8 @@ static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
 }

-int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+			     struct ib_device *device, u8 port_num,
 			     u8 method,
 			     struct ib_sa_mcmember_rec *rec,
 			     ib_sa_comp_mask comp_mask,
@ -827,8 +865,10 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
 		goto err1;
 	}

-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;

 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@ -853,6 +893,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,

 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);

 err1:
@ -887,8 +928,9 @@ static void send_handler(struct ib_mad_agent *agent,
 	idr_remove(&query_idr, query->id);
 	spin_unlock_irqrestore(&idr_lock, flags);

-        ib_free_send_mad(mad_send_wc->send_buf);
+	ib_free_send_mad(mad_send_wc->send_buf);
 	kref_put(&query->sm_ah->ref, free_sm_ah);
+	ib_sa_client_put(query->client);
 	query->release(query);
 }

@ -919,7 +961,10 @@ static void ib_sa_add_one(struct ib_device *device)
 	struct ib_sa_device *sa_dev;
 	int s, e, i;

-	if (device->node_type == IB_NODE_SWITCH)
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
 		s = e = 0;
 	else {
 		s = 1;
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@ -64,7 +64,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,

 		/* C14-9:2 */
 		if (hop_ptr && hop_ptr < hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;

 			/* smp->return_path set when received */
@ -77,7 +77,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 		if (hop_ptr == hop_cnt) {
 			/* smp->return_path set when received */
 			smp->hop_ptr++;
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_dlid == IB_LID_PERMISSIVE);
 		}

@ -95,7 +95,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,

 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;

 			smp->hop_ptr--;
@ -107,7 +107,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 		if (hop_ptr == 1) {
 			smp->hop_ptr--;
 			/* C14-13:3 -- SMPs destined for SM shouldn't be here */
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_slid == IB_LID_PERMISSIVE);
 		}

@ -142,7 +142,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,

 		/* C14-9:2 -- intermediate hop */
 		if (hop_ptr && hop_ptr < hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;

 			smp->return_path[hop_ptr] = port_num;
@ -156,7 +156,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 				smp->return_path[hop_ptr] = port_num;
 			/* smp->hop_ptr updated when sending */

-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_dlid == IB_LID_PERMISSIVE);
 		}

@ -175,7 +175,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,

 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;

 			/* smp->hop_ptr updated when sending */
@ -190,7 +190,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 				return 1;
 			}
 			/* smp->hop_ptr updated when sending */
-			return (node_type == IB_NODE_SWITCH);
+			return (node_type == RDMA_NODE_IB_SWITCH);
 		}

 		/* C14-13:4 -- hop_ptr = 0 -> give to SM */
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@ -68,7 +68,7 @@ struct port_table_attribute {
 	int			index;
 };

-static inline int ibdev_is_alive(const struct ib_device *dev) 
+static inline int ibdev_is_alive(const struct ib_device *dev)
 {
 	return dev->reg_state == IB_DEV_REGISTERED;
 }
@ -589,10 +589,11 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf)
 		return -ENODEV;

 	switch (dev->node_type) {
-	case IB_NODE_CA:     return sprintf(buf, "%d: CA\n", dev->node_type);
-	case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
-	case IB_NODE_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
-	default:             return sprintf(buf, "%d: <unknown>\n", dev->node_type);
+	case RDMA_NODE_IB_CA:	  return sprintf(buf, "%d: CA\n", dev->node_type);
+	case RDMA_NODE_RNIC:	  return sprintf(buf, "%d: RNIC\n", dev->node_type);
+	case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
+	case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
+	default:		  return sprintf(buf, "%d: <unknown>\n", dev->node_type);
 	}
 }

@ -708,7 +709,7 @@ int ib_device_register_sysfs(struct ib_device *device)
 	if (ret)
 		goto err_put;

-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		ret = add_port(device, 0);
 		if (ret)
 			goto err_put;
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@ -309,9 +309,9 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
 		info	      = evt->param.apr_rcvd.apr_info;
 		break;
 	case IB_CM_SIDR_REQ_RECEIVED:
-		uvt->resp.u.sidr_req_resp.pkey = 
+		uvt->resp.u.sidr_req_resp.pkey =
 					evt->param.sidr_req_rcvd.pkey;
-		uvt->resp.u.sidr_req_resp.port = 
+		uvt->resp.u.sidr_req_resp.port =
 					evt->param.sidr_req_rcvd.port;
 		uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
 		break;
@ -1237,7 +1237,7 @@ static struct class ucm_class = {
 static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 {
 	struct ib_ucm_device *dev;
-	
+
 	dev = container_of(class_dev, struct ib_ucm_device, class_dev);
 	return sprintf(buf, "%s\n", dev->ib_dev->name);
 }
@ -1247,7 +1247,8 @@ static void ib_ucm_add_one(struct ib_device *device)
 {
 	struct ib_ucm_device *ucm_dev;

-	if (!device->alloc_ucontext)
+	if (!device->alloc_ucontext ||
+	    rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
 		return;

 	ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
 *
 * This software is available to you under a choice of one of two
@ -1032,7 +1032,10 @@ static void ib_umad_add_one(struct ib_device *device)
 	struct ib_umad_device *umad_dev;
 	int s, e, i;

-	if (device->node_type == IB_NODE_SWITCH)
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
 		s = e = 0;
 	else {
 		s = 1;
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@ -155,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
 }

 static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
-					struct ib_ucontext *context)
+					struct ib_ucontext *context, int nested)
 {
 	struct ib_uobject *uobj;

@ -163,7 +163,10 @@ static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
 	if (!uobj)
 		return NULL;

-	down_read(&uobj->mutex);
+	if (nested)
+		down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
+	else
+		down_read(&uobj->mutex);
 	if (!uobj->live) {
 		put_uobj_read(uobj);
 		return NULL;
@ -190,17 +193,18 @@ static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
 	return uobj;
 }

-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context)
+static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
+			  int nested)
 {
 	struct ib_uobject *uobj;

-	uobj = idr_read_uobj(idr, id, context);
+	uobj = idr_read_uobj(idr, id, context, nested);
 	return uobj ? uobj->object : NULL;
 }

 static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context);
+	return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
 }

 static void put_pd_read(struct ib_pd *pd)
@ -208,9 +212,9 @@ static void put_pd_read(struct ib_pd *pd)
 	put_uobj_read(pd->uobject);
 }

-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context)
+static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
 {
-	return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context);
+	return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
 }

 static void put_cq_read(struct ib_cq *cq)
@ -220,7 +224,7 @@ static void put_cq_read(struct ib_cq *cq)

 static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context);
+	return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
 }

 static void put_ah_read(struct ib_ah *ah)
@ -230,7 +234,7 @@ static void put_ah_read(struct ib_ah *ah)

 static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context);
+	return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
 }

 static void put_qp_read(struct ib_qp *qp)
@ -240,7 +244,7 @@ static void put_qp_read(struct ib_qp *qp)

 static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context);
+	return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
 }

 static void put_srq_read(struct ib_srq *srq)
@ -837,7 +841,6 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
 err_copy:
 	idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);

-
 err_free:
 	ib_destroy_cq(cq);

@ -867,7 +870,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);

-	cq = idr_read_cq(cmd.cq_handle, file->ucontext);
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
 	if (!cq)
 		return -EINVAL;

@ -875,11 +878,10 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
 	if (ret)
 		goto out;

-	memset(&resp, 0, sizeof resp);
 	resp.cqe = cq->cqe;

 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
-			 &resp, sizeof resp))
+			 &resp, sizeof resp.cqe))
 		ret = -EFAULT;

 out:
@ -894,7 +896,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_poll_cq       cmd;
 	struct ib_uverbs_poll_cq_resp *resp;
-	struct ib_uobject	      *uobj;
 	struct ib_cq                  *cq;
 	struct ib_wc                  *wc;
 	int                            ret = 0;
@ -915,16 +916,15 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 		goto out_wc;
 	}

-	uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-	if (!uobj) {
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+	if (!cq) {
 		ret = -EINVAL;
 		goto out;
 	}
-	cq = uobj->object;

 	resp->count = ib_poll_cq(cq, cmd.ne, wc);

-	put_uobj_read(uobj);
+	put_cq_read(cq);

 	for (i = 0; i < resp->count; i++) {
 		resp->wc[i].wr_id 	   = wc[i].wr_id;
@ -959,21 +959,19 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
 				int out_len)
 {
 	struct ib_uverbs_req_notify_cq cmd;
-	struct ib_uobject	      *uobj;
 	struct ib_cq                  *cq;

 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;

-	uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-	if (!uobj)
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+	if (!cq)
 		return -EINVAL;
-	cq = uobj->object;

 	ib_req_notify_cq(cq, cmd.solicited_only ?
 			 IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);

-	put_uobj_read(uobj);
+	put_cq_read(cq);

 	return in_len;
 }
@ -1064,9 +1062,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,

 	srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
 	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
-	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext);
+	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
 	rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
-		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext);
+		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);

 	if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
 		ret = -EINVAL;
@ -1274,6 +1272,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 			    int out_len)
 {
 	struct ib_uverbs_modify_qp cmd;
+	struct ib_udata            udata;
 	struct ib_qp              *qp;
 	struct ib_qp_attr         *attr;
 	int                        ret;
@ -1281,6 +1280,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;

+	INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+		   out_len);
+
 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
 	if (!attr)
 		return -ENOMEM;
@ -1337,7 +1339,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 	attr->alt_ah_attr.ah_flags 	    = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
 	attr->alt_ah_attr.port_num 	    = cmd.alt_dest.port_num;

-	ret = ib_modify_qp(qp, attr, cmd.attr_mask);
+	ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);

 	put_qp_read(qp);

@ -1674,7 +1676,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
 				break;
 		}

-
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
@ -1724,7 +1725,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
 				break;
 		}

-
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
@ -2055,6 +2055,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 			     int out_len)
 {
 	struct ib_uverbs_modify_srq cmd;
+	struct ib_udata             udata;
 	struct ib_srq              *srq;
 	struct ib_srq_attr          attr;
 	int                         ret;
@ -2062,6 +2063,9 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;

+	INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+		   out_len);
+
 	srq = idr_read_srq(cmd.srq_handle, file->ucontext);
 	if (!srq)
 		return -EINVAL;
@ -2069,7 +2073,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 	attr.max_wr    = cmd.max_wr;
 	attr.srq_limit = cmd.srq_limit;

-	ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
+	ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);

 	put_srq_read(srq);

--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@ -79,6 +79,23 @@ enum ib_rate mult_to_ib_rate(int mult)
 }
 EXPORT_SYMBOL(mult_to_ib_rate);

+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type)
+{
+	switch (node_type) {
+	case RDMA_NODE_IB_CA:
+	case RDMA_NODE_IB_SWITCH:
+	case RDMA_NODE_IB_ROUTER:
+		return RDMA_TRANSPORT_IB;
+	case RDMA_NODE_RNIC:
+		return RDMA_TRANSPORT_IWARP;
+	default:
+		BUG();
+		return 0;
+	}
+}
+EXPORT_SYMBOL(rdma_node_get_transport);
+
 /* Protection domains */

 struct ib_pd *ib_alloc_pd(struct ib_device *device)
@ -231,7 +248,7 @@ int ib_modify_srq(struct ib_srq *srq,
 		  struct ib_srq_attr *srq_attr,
 		  enum ib_srq_attr_mask srq_attr_mask)
 {
-	return srq->device->modify_srq(srq, srq_attr, srq_attr_mask);
+	return srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_srq);

@ -547,7 +564,7 @@ int ib_modify_qp(struct ib_qp *qp,
 		 struct ib_qp_attr *qp_attr,
 		 int qp_attr_mask)
 {
-	return qp->device->modify_qp(qp, qp_attr, qp_attr_mask);
+	return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_qp);

--- a/drivers/infiniband/hw/amso1100/Kbuild
+++ b/drivers/infiniband/hw/amso1100/Kbuild
@ -0,0 +1,8 @@
+ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG
+EXTRA_CFLAGS += -DDEBUG
+endif
+
+obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
+
+iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
+	c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
--- a/drivers/infiniband/hw/amso1100/Kconfig
+++ b/drivers/infiniband/hw/amso1100/Kconfig
@ -0,0 +1,15 @@
+config INFINIBAND_AMSO1100
+	tristate "Ammasso 1100 HCA support"
+	depends on PCI && INET && INFINIBAND
+	---help---
+	  This is a low-level driver for the Ammasso 1100 host
+	  channel adapter (HCA).
+
+config INFINIBAND_AMSO1100_DEBUG
+	bool "Verbose debugging output"
+	depends on INFINIBAND_AMSO1100
+	default n
+	---help---
+	  This option causes the amso1100 driver to produce a bunch of
+	  debug messages.  Select this if you are developing the driver
+	  or trying to diagnose a problem.
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
--- a/drivers/infiniband/hw/amso1100/c2.h
+++ b/drivers/infiniband/hw/amso1100/c2.h
@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __C2_H
+#define __C2_H
+
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/idr.h>
+#include <asm/semaphore.h>
+
+#include "c2_provider.h"
+#include "c2_mq.h"
+#include "c2_status.h"
+
+#define DRV_NAME     "c2"
+#define DRV_VERSION  "1.1"
+#define PFX          DRV_NAME ": "
+
+#define BAR_0                0
+#define BAR_2                2
+#define BAR_4                4
+
+#define RX_BUF_SIZE         (1536 + 8)
+#define ETH_JUMBO_MTU        9000
+#define C2_MAGIC            "CEPHEUS"
+#define C2_VERSION           4
+#define C2_IVN              (18 & 0x7fffffff)
+
+#define C2_REG0_SIZE        (16 * 1024)
+#define C2_REG2_SIZE        (2 * 1024 * 1024)
+#define C2_REG4_SIZE        (256 * 1024 * 1024)
+#define C2_NUM_TX_DESC       341
+#define C2_NUM_RX_DESC       256
+#define C2_PCI_REGS_OFFSET  (0x10000)
+#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
+#define C2_RXP_HRXDQ_SIZE   (4096)
+#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
+#define C2_TXP_HTXDQ_SIZE   (4096)
+#define C2_TX_TIMEOUT	    (6*HZ)
+
+/* CEPHEUS */
+static const u8 c2_magic[] = {
+	0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
+};
+
+enum adapter_pci_regs {
+	C2_REGS_MAGIC = 0x0000,
+	C2_REGS_VERS = 0x0008,
+	C2_REGS_IVN = 0x000C,
+	C2_REGS_PCI_WINSIZE = 0x0010,
+	C2_REGS_Q0_QSIZE = 0x0014,
+	C2_REGS_Q0_MSGSIZE = 0x0018,
+	C2_REGS_Q0_POOLSTART = 0x001C,
+	C2_REGS_Q0_SHARED = 0x0020,
+	C2_REGS_Q1_QSIZE = 0x0024,
+	C2_REGS_Q1_MSGSIZE = 0x0028,
+	C2_REGS_Q1_SHARED = 0x0030,
+	C2_REGS_Q2_QSIZE = 0x0034,
+	C2_REGS_Q2_MSGSIZE = 0x0038,
+	C2_REGS_Q2_SHARED = 0x0040,
+	C2_REGS_ENADDR = 0x004C,
+	C2_REGS_RDMA_ENADDR = 0x0054,
+	C2_REGS_HRX_CUR = 0x006C,
+};
+
+struct c2_adapter_pci_regs {
+	char reg_magic[8];
+	u32 version;
+	u32 ivn;
+	u32 pci_window_size;
+	u32 q0_q_size;
+	u32 q0_msg_size;
+	u32 q0_pool_start;
+	u32 q0_shared;
+	u32 q1_q_size;
+	u32 q1_msg_size;
+	u32 q1_pool_start;
+	u32 q1_shared;
+	u32 q2_q_size;
+	u32 q2_msg_size;
+	u32 q2_pool_start;
+	u32 q2_shared;
+	u32 log_start;
+	u32 log_size;
+	u8 host_enaddr[8];
+	u8 rdma_enaddr[8];
+	u32 crash_entry;
+	u32 crash_ready[2];
+	u32 fw_txd_cur;
+	u32 fw_hrxd_cur;
+	u32 fw_rxd_cur;
+};
+
+enum pci_regs {
+	C2_HISR = 0x0000,
+	C2_DISR = 0x0004,
+	C2_HIMR = 0x0008,
+	C2_DIMR = 0x000C,
+	C2_NISR0 = 0x0010,
+	C2_NISR1 = 0x0014,
+	C2_NIMR0 = 0x0018,
+	C2_NIMR1 = 0x001C,
+	C2_IDIS = 0x0020,
+};
+
+enum {
+	C2_PCI_HRX_INT = 1 << 8,
+	C2_PCI_HTX_INT = 1 << 17,
+	C2_PCI_HRX_QUI = 1 << 31,
+};
+
+/*
+ * Cepheus registers in BAR0.
+ */
+struct c2_pci_regs {
+	u32 hostisr;
+	u32 dmaisr;
+	u32 hostimr;
+	u32 dmaimr;
+	u32 netisr0;
+	u32 netisr1;
+	u32 netimr0;
+	u32 netimr1;
+	u32 int_disable;
+};
+
+/* TXP flags */
+enum c2_txp_flags {
+	TXP_HTXD_DONE = 0,
+	TXP_HTXD_READY = 1 << 0,
+	TXP_HTXD_UNINIT = 1 << 1,
+};
+
+/* RXP flags */
+enum c2_rxp_flags {
+	RXP_HRXD_UNINIT = 0,
+	RXP_HRXD_READY = 1 << 0,
+	RXP_HRXD_DONE = 1 << 1,
+};
+
+/* RXP status */
+enum c2_rxp_status {
+	RXP_HRXD_ZERO = 0,
+	RXP_HRXD_OK = 1 << 0,
+	RXP_HRXD_BUF_OV = 1 << 1,
+};
+
+/* TXP descriptor fields */
+enum txp_desc {
+	C2_TXP_FLAGS = 0x0000,
+	C2_TXP_LEN = 0x0002,
+	C2_TXP_ADDR = 0x0004,
+};
+
+/* RXP descriptor fields */
+enum rxp_desc {
+	C2_RXP_FLAGS = 0x0000,
+	C2_RXP_STATUS = 0x0002,
+	C2_RXP_COUNT = 0x0004,
+	C2_RXP_LEN = 0x0006,
+	C2_RXP_ADDR = 0x0008,
+};
+
+struct c2_txp_desc {
+	u16 flags;
+	u16 len;
+	u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_desc {
+	u16 flags;
+	u16 status;
+	u16 count;
+	u16 len;
+	u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_hdr {
+	u16 flags;
+	u16 status;
+	u16 len;
+	u16 rsvd;
+} __attribute__ ((packed));
+
+struct c2_tx_desc {
+	u32 len;
+	u32 status;
+	dma_addr_t next_offset;
+};
+
+struct c2_rx_desc {
+	u32 len;
+	u32 status;
+	dma_addr_t next_offset;
+};
+
+struct c2_alloc {
+	u32 last;
+	u32 max;
+	spinlock_t lock;
+	unsigned long *table;
+};
+
+struct c2_array {
+	struct {
+		void **page;
+		int used;
+	} *page_list;
+};
+
+/*
+ * The MQ shared pointer pool is organized as a linked list of
+ * chunks. Each chunk contains a linked list of free shared pointers
+ * that can be allocated to a given user mode client.
+ *
+ */
+struct sp_chunk {
+	struct sp_chunk *next;
+	dma_addr_t dma_addr;
+	DECLARE_PCI_UNMAP_ADDR(mapping);
+	u16 head;
+	u16 shared_ptr[0];
+};
+
+struct c2_pd_table {
+	u32 last;
+	u32 max;
+	spinlock_t lock;
+	unsigned long *table;
+};
+
+struct c2_qp_table {
+	struct idr idr;
+	spinlock_t lock;
+	int last;
+};
+
+struct c2_element {
+	struct c2_element *next;
+	void *ht_desc;		/* host     descriptor */
+	void __iomem *hw_desc;	/* hardware descriptor */
+	struct sk_buff *skb;
+	dma_addr_t mapaddr;
+	u32 maplen;
+};
+
+struct c2_ring {
+	struct c2_element *to_clean;
+	struct c2_element *to_use;
+	struct c2_element *start;
+	unsigned long count;
+};
+
+struct c2_dev {
+	struct ib_device ibdev;
+	void __iomem *regs;
+	void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
+	void __iomem *mmio_rxp_ring;
+	spinlock_t lock;
+	struct pci_dev *pcidev;
+	struct net_device *netdev;
+	struct net_device *pseudo_netdev;
+	unsigned int cur_tx;
+	unsigned int cur_rx;
+	u32 adapter_handle;
+	int device_cap_flags;
+	void __iomem *kva;	/* KVA device memory */
+	unsigned long pa;	/* PA device memory */
+	void **qptr_array;
+
+	kmem_cache_t *host_msg_cache;
+
+	struct list_head cca_link;		/* adapter list */
+	struct list_head eh_wakeup_list;	/* event wakeup list */
+	wait_queue_head_t req_vq_wo;
+
+	/* Cached RNIC properties */
+	struct ib_device_attr props;
+
+	struct c2_pd_table pd_table;
+	struct c2_qp_table qp_table;
+	int ports;		/* num of GigE ports */
+	int devnum;
+	spinlock_t vqlock;	/* sync vbs req MQ */
+
+	/* Verbs Queues */
+	struct c2_mq req_vq;	/* Verbs Request MQ */
+	struct c2_mq rep_vq;	/* Verbs Reply MQ */
+	struct c2_mq aeq;	/* Async Events MQ */
+
+	/* Kernel client MQs */
+	struct sp_chunk *kern_mqsp_pool;
+
+	/* Device updates these values when posting messages to a host
+	 * target queue */
+	u16 req_vq_shared;
+	u16 rep_vq_shared;
+	u16 aeq_shared;
+	u16 irq_claimed;
+
+	/*
+	 * Shared host target pages for user-accessible MQs.
+	 */
+	int hthead;		/* index of first free entry */
+	void *htpages;		/* kernel vaddr */
+	int htlen;		/* length of htpages memory */
+	void *htuva;		/* user mapped vaddr */
+	spinlock_t htlock;	/* serialize allocation */
+
+	u64 adapter_hint_uva;	/* access to the activity FIFO */
+
+	//	spinlock_t aeq_lock;
+	//	spinlock_t rnic_lock;
+
+	u16 *hint_count;
+	dma_addr_t hint_count_dma;
+	u16 hints_read;
+
+	int init;		/* TRUE if it's ready */
+	char ae_cache_name[16];
+	char vq_cache_name[16];
+};
+
+struct c2_port {
+	u32 msg_enable;
+	struct c2_dev *c2dev;
+	struct net_device *netdev;
+
+	spinlock_t tx_lock;
+	u32 tx_avail;
+	struct c2_ring tx_ring;
+	struct c2_ring rx_ring;
+
+	void *mem;		/* PCI memory for host rings */
+	dma_addr_t dma;
+	unsigned long mem_size;
+
+	u32 rx_buf_size;
+
+	struct net_device_stats netstats;
+};
+
+/*
+ * Activity FIFO registers in BAR0.
+ */
+#define PCI_BAR0_HOST_HINT	0x100
+#define PCI_BAR0_ADAPTER_HINT	0x2000
+
+/*
+ * Ammasso PCI vendor id and Cepheus PCI device id.
+ */
+#define CQ_ARMED 	0x01
+#define CQ_WAIT_FOR_DMA	0x80
+
+/*
+ * The format of a hint is as follows:
+ * Lower 16 bits are the count of hints for the queue.
+ * Next 15 bits are the qp_index
+ * Upper most bit depends on who reads it:
+ *    If read by producer, then it means Full (1) or Not-Full (0)
+ *    If read by consumer, then it means Empty (1) or Not-Empty (0)
+ */
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+
+/*
+ * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
+ * struct.
+ */
+#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
+
+#ifndef readq
+static inline u64 readq(const void __iomem * addr)
+{
+	u64 ret = readl(addr + 4);
+	ret <<= 32;
+	ret |= readl(addr);
+
+	return ret;
+}
+#endif
+
+#ifndef writeq
+static inline void __raw_writeq(u64 val, void __iomem * addr)
+{
+	__raw_writel((u32) (val), addr);
+	__raw_writel((u32) (val >> 32), (addr + 4));
+}
+#endif
+
+#define C2_SET_CUR_RX(c2dev, cur_rx) \
+	__raw_writel(cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
+
+#define C2_GET_CUR_RX(c2dev) \
+	be32_to_cpu(readl(c2dev->mmio_txp_ring + 4092))
+
+static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct c2_dev, ibdev);
+}
+
+static inline int c2_errno(void *reply)
+{
+	switch (c2_wr_get_result(reply)) {
+	case C2_OK:
+		return 0;
+	case CCERR_NO_BUFS:
+	case CCERR_INSUFFICIENT_RESOURCES:
+	case CCERR_ZERO_RDMA_READ_RESOURCES:
+		return -ENOMEM;
+	case CCERR_MR_IN_USE:
+	case CCERR_QP_IN_USE:
+		return -EBUSY;
+	case CCERR_ADDR_IN_USE:
+		return -EADDRINUSE;
+	case CCERR_ADDR_NOT_AVAIL:
+		return -EADDRNOTAVAIL;
+	case CCERR_CONN_RESET:
+		return -ECONNRESET;
+	case CCERR_NOT_IMPLEMENTED:
+	case CCERR_INVALID_WQE:
+		return -ENOSYS;
+	case CCERR_QP_NOT_PRIVILEGED:
+		return -EPERM;
+	case CCERR_STACK_ERROR:
+		return -EPROTO;
+	case CCERR_ACCESS_VIOLATION:
+	case CCERR_BASE_AND_BOUNDS_VIOLATION:
+		return -EFAULT;
+	case CCERR_STAG_STATE_NOT_INVALID:
+	case CCERR_INVALID_ADDRESS:
+	case CCERR_INVALID_CQ:
+	case CCERR_INVALID_EP:
+	case CCERR_INVALID_MODIFIER:
+	case CCERR_INVALID_MTU:
+	case CCERR_INVALID_PD_ID:
+	case CCERR_INVALID_QP:
+	case CCERR_INVALID_RNIC:
+	case CCERR_INVALID_STAG:
+		return -EINVAL;
+	default:
+		return -EAGAIN;
+	}
+}
+
+/* Device */
+extern int c2_register_device(struct c2_dev *c2dev);
+extern void c2_unregister_device(struct c2_dev *c2dev);
+extern int c2_rnic_init(struct c2_dev *c2dev);
+extern void c2_rnic_term(struct c2_dev *c2dev);
+extern void c2_rnic_interrupt(struct c2_dev *c2dev);
+extern int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
+extern int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
+
+/* QPs */
+extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
+		       struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
+extern void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
+extern struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
+extern int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+			struct ib_qp_attr *attr, int attr_mask);
+extern int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+				 int ord, int ird);
+extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+			struct ib_send_wr **bad_wr);
+extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+			   struct ib_recv_wr **bad_wr);
+extern void __devinit c2_init_qp_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev);
+extern void c2_set_qp_state(struct c2_qp *, int);
+extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
+
+/* PDs */
+extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
+extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
+extern int __devinit c2_init_pd_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev);
+
+/* CQs */
+extern int c2_init_cq(struct c2_dev *c2dev, int entries,
+		      struct c2_ucontext *ctx, struct c2_cq *cq);
+extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
+extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
+extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
+extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
+
+/* CM */
+extern int c2_llp_connect(struct iw_cm_id *cm_id,
+			  struct iw_cm_conn_param *iw_param);
+extern int c2_llp_accept(struct iw_cm_id *cm_id,
+			 struct iw_cm_conn_param *iw_param);
+extern int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
+			 u8 pdata_len);
+extern int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
+extern int c2_llp_service_destroy(struct iw_cm_id *cm_id);
+
+/* MM */
+extern int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+ 				      int page_size, int pbl_depth, u32 length,
+ 				      u32 off, u64 *va, enum c2_acf acf,
+				      struct c2_mr *mr);
+extern int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
+
+/* AE */
+extern void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
+
+/* MQSP Allocator */
+extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+			     struct sp_chunk **root);
+extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
+extern u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+			  dma_addr_t *dma_addr, gfp_t gfp_mask);
+extern void c2_free_mqsp(u16 * mqsp);
+#endif
--- a/drivers/infiniband/hw/amso1100/c2_ae.c
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_status.h"
+#include "c2_ae.h"
+
+static int c2_convert_cm_status(u32 c2_status)
+{
+	switch (c2_status) {
+	case C2_CONN_STATUS_SUCCESS:
+		return 0;
+	case C2_CONN_STATUS_REJECTED:
+		return -ENETRESET;
+	case C2_CONN_STATUS_REFUSED:
+		return -ECONNREFUSED;
+	case C2_CONN_STATUS_TIMEDOUT:
+		return -ETIMEDOUT;
+	case C2_CONN_STATUS_NETUNREACH:
+		return -ENETUNREACH;
+	case C2_CONN_STATUS_HOSTUNREACH:
+		return -EHOSTUNREACH;
+	case C2_CONN_STATUS_INVALID_RNIC:
+		return -EINVAL;
+	case C2_CONN_STATUS_INVALID_QP:
+		return -EINVAL;
+	case C2_CONN_STATUS_INVALID_QP_STATE:
+		return -EINVAL;
+	case C2_CONN_STATUS_ADDR_NOT_AVAIL:
+		return -EADDRNOTAVAIL;
+	default:
+		printk(KERN_ERR PFX
+		       "%s - Unable to convert CM status: %d\n",
+		       __FUNCTION__, c2_status);
+		return -EIO;
+	}
+}
+
+#ifdef DEBUG
+static const char* to_event_str(int event)
+{
+	static const char* event_str[] = {
+		"CCAE_REMOTE_SHUTDOWN",
+		"CCAE_ACTIVE_CONNECT_RESULTS",
+		"CCAE_CONNECTION_REQUEST",
+		"CCAE_LLP_CLOSE_COMPLETE",
+		"CCAE_TERMINATE_MESSAGE_RECEIVED",
+		"CCAE_LLP_CONNECTION_RESET",
+		"CCAE_LLP_CONNECTION_LOST",
+		"CCAE_LLP_SEGMENT_SIZE_INVALID",
+		"CCAE_LLP_INVALID_CRC",
+		"CCAE_LLP_BAD_FPDU",
+		"CCAE_INVALID_DDP_VERSION",
+		"CCAE_INVALID_RDMA_VERSION",
+		"CCAE_UNEXPECTED_OPCODE",
+		"CCAE_INVALID_DDP_QUEUE_NUMBER",
+		"CCAE_RDMA_READ_NOT_ENABLED",
+		"CCAE_RDMA_WRITE_NOT_ENABLED",
+		"CCAE_RDMA_READ_TOO_SMALL",
+		"CCAE_NO_L_BIT",
+		"CCAE_TAGGED_INVALID_STAG",
+		"CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
+		"CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
+		"CCAE_TAGGED_INVALID_PD",
+		"CCAE_WRAP_ERROR",
+		"CCAE_BAD_CLOSE",
+		"CCAE_BAD_LLP_CLOSE",
+		"CCAE_INVALID_MSN_RANGE",
+		"CCAE_INVALID_MSN_GAP",
+		"CCAE_IRRQ_OVERFLOW",
+		"CCAE_IRRQ_MSN_GAP",
+		"CCAE_IRRQ_MSN_RANGE",
+		"CCAE_IRRQ_INVALID_STAG",
+		"CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
+		"CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
+		"CCAE_IRRQ_INVALID_PD",
+		"CCAE_IRRQ_WRAP_ERROR",
+		"CCAE_CQ_SQ_COMPLETION_OVERFLOW",
+		"CCAE_CQ_RQ_COMPLETION_ERROR",
+		"CCAE_QP_SRQ_WQE_ERROR",
+		"CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
+		"CCAE_CQ_OVERFLOW",
+		"CCAE_CQ_OPERATION_ERROR",
+		"CCAE_SRQ_LIMIT_REACHED",
+		"CCAE_QP_RQ_LIMIT_REACHED",
+		"CCAE_SRQ_CATASTROPHIC_ERROR",
+		"CCAE_RNIC_CATASTROPHIC_ERROR"
+	};
+
+	if (event < CCAE_REMOTE_SHUTDOWN ||
+	    event > CCAE_RNIC_CATASTROPHIC_ERROR)
+		return "<invalid event>";
+
+	event -= CCAE_REMOTE_SHUTDOWN;
+	return event_str[event];
+}
+
+static const char *to_qp_state_str(int state)
+{
+	switch (state) {
+	case C2_QP_STATE_IDLE:
+		return "C2_QP_STATE_IDLE";
+	case C2_QP_STATE_CONNECTING:
+		return "C2_QP_STATE_CONNECTING";
+	case C2_QP_STATE_RTS:
+		return "C2_QP_STATE_RTS";
+	case C2_QP_STATE_CLOSING:
+		return "C2_QP_STATE_CLOSING";
+	case C2_QP_STATE_TERMINATE:
+		return "C2_QP_STATE_TERMINATE";
+	case C2_QP_STATE_ERROR:
+		return "C2_QP_STATE_ERROR";
+	default:
+		return "<invalid QP state>";
+	};
+}
+#endif
+
+void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
+{
+	struct c2_mq *mq = c2dev->qptr_array[mq_index];
+	union c2wr *wr;
+	void *resource_user_context;
+	struct iw_cm_event cm_event;
+	struct ib_event ib_event;
+	enum c2_resource_indicator resource_indicator;
+	enum c2_event_id event_id;
+	unsigned long flags;
+	int status;
+
+	/*
+	 * retreive the message
+	 */
+	wr = c2_mq_consume(mq);
+	if (!wr)
+		return;
+
+	memset(&ib_event, 0, sizeof(ib_event));
+	memset(&cm_event, 0, sizeof(cm_event));
+
+	event_id = c2_wr_get_id(wr);
+	resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
+	resource_user_context =
+	    (void *) (unsigned long) wr->ae.ae_generic.user_context;
+
+	status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
+
+	pr_debug("event received c2_dev=%p, event_id=%d, "
+		"resource_indicator=%d, user_context=%p, status = %d\n",
+		c2dev, event_id, resource_indicator, resource_user_context,
+		status);
+
+	switch (resource_indicator) {
+	case C2_RES_IND_QP:{
+
+		struct c2_qp *qp = (struct c2_qp *)resource_user_context;
+		struct iw_cm_id *cm_id = qp->cm_id;
+		struct c2wr_ae_active_connect_results *res;
+
+		if (!cm_id) {
+			pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
+				qp);
+			goto ignore_it;
+		}
+		pr_debug("%s: event = %s, user_context=%llx, "
+			"resource_type=%x, "
+			"resource=%x, qp_state=%s\n",
+			__FUNCTION__,
+			to_event_str(event_id),
+			be64_to_cpu(wr->ae.ae_generic.user_context),
+			be32_to_cpu(wr->ae.ae_generic.resource_type),
+			be32_to_cpu(wr->ae.ae_generic.resource),
+			to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
+
+		c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
+
+		switch (event_id) {
+		case CCAE_ACTIVE_CONNECT_RESULTS:
+			res = &wr->ae.ae_active_connect_results;
+			cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+			cm_event.local_addr.sin_addr.s_addr = res->laddr;
+			cm_event.remote_addr.sin_addr.s_addr = res->raddr;
+			cm_event.local_addr.sin_port = res->lport;
+			cm_event.remote_addr.sin_port =	res->rport;
+			if (status == 0) {
+				cm_event.private_data_len =
+					be32_to_cpu(res->private_data_length);
+				cm_event.private_data = res->private_data;
+			} else {
+				spin_lock_irqsave(&qp->lock, flags);
+				if (qp->cm_id) {
+					qp->cm_id->rem_ref(qp->cm_id);
+					qp->cm_id = NULL;
+				}
+				spin_unlock_irqrestore(&qp->lock, flags);
+				cm_event.private_data_len = 0;
+				cm_event.private_data = NULL;
+			}
+			if (cm_id->event_handler)
+				cm_id->event_handler(cm_id, &cm_event);
+			break;
+		case CCAE_TERMINATE_MESSAGE_RECEIVED:
+		case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
+			ib_event.device = &c2dev->ibdev;
+			ib_event.element.qp = &qp->ibqp;
+			ib_event.event = IB_EVENT_QP_REQ_ERR;
+
+			if (qp->ibqp.event_handler)
+				qp->ibqp.event_handler(&ib_event,
+						       qp->ibqp.
+						       qp_context);
+			break;
+		case CCAE_BAD_CLOSE:
+		case CCAE_LLP_CLOSE_COMPLETE:
+		case CCAE_LLP_CONNECTION_RESET:
+		case CCAE_LLP_CONNECTION_LOST:
+			BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
+
+			spin_lock_irqsave(&qp->lock, flags);
+			if (qp->cm_id) {
+				qp->cm_id->rem_ref(qp->cm_id);
+				qp->cm_id = NULL;
+			}
+			spin_unlock_irqrestore(&qp->lock, flags);
+			cm_event.event = IW_CM_EVENT_CLOSE;
+			cm_event.status = 0;
+			if (cm_id->event_handler)
+				cm_id->event_handler(cm_id, &cm_event);
+			break;
+		default:
+			BUG_ON(1);
+			pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
+				"CM_ID=%p\n",
+				__FUNCTION__, __LINE__,
+				event_id, qp, cm_id);
+			break;
+		}
+		break;
+	}
+
+	case C2_RES_IND_EP:{
+
+		struct c2wr_ae_connection_request *req =
+			&wr->ae.ae_connection_request;
+		struct iw_cm_id *cm_id =
+			(struct iw_cm_id *)resource_user_context;
+
+		pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
+		if (event_id != CCAE_CONNECTION_REQUEST) {
+			pr_debug("%s: Invalid event_id: %d\n",
+				__FUNCTION__, event_id);
+			break;
+		}
+		cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
+		cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
+		cm_event.local_addr.sin_addr.s_addr = req->laddr;
+		cm_event.remote_addr.sin_addr.s_addr = req->raddr;
+		cm_event.local_addr.sin_port = req->lport;
+		cm_event.remote_addr.sin_port = req->rport;
+		cm_event.private_data_len =
+			be32_to_cpu(req->private_data_length);
+		cm_event.private_data = req->private_data;
+
+		if (cm_id->event_handler)
+			cm_id->event_handler(cm_id, &cm_event);
+		break;
+	}
+
+	case C2_RES_IND_CQ:{
+		struct c2_cq *cq =
+		    (struct c2_cq *) resource_user_context;
+
+		pr_debug("IB_EVENT_CQ_ERR\n");
+		ib_event.device = &c2dev->ibdev;
+		ib_event.element.cq = &cq->ibcq;
+		ib_event.event = IB_EVENT_CQ_ERR;
+
+		if (cq->ibcq.event_handler)
+			cq->ibcq.event_handler(&ib_event,
+					       cq->ibcq.cq_context);
+	}
+
+	default:
+		printk("Bad resource indicator = %d\n",
+		       resource_indicator);
+		break;
+	}
+
+ ignore_it:
+	c2_mq_free(mq);
+}
--- a/drivers/infiniband/hw/amso1100/c2_ae.h
+++ b/drivers/infiniband/hw/amso1100/c2_ae.h
@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_AE_H_
+#define _C2_AE_H_
+
+/*
+ * WARNING: If you change this file, also bump C2_IVN_BASE
+ * in common/include/clustercore/c2_ivn.h.
+ */
+
+/*
+ * Asynchronous Event Identifiers
+ *
+ * These start at 0x80 only so it's obvious from inspection that
+ * they are not work-request statuses.  This isn't critical.
+ *
+ * NOTE: these event id's must fit in eight bits.
+ */
+enum c2_event_id {
+	CCAE_REMOTE_SHUTDOWN = 0x80,
+	CCAE_ACTIVE_CONNECT_RESULTS,
+	CCAE_CONNECTION_REQUEST,
+	CCAE_LLP_CLOSE_COMPLETE,
+	CCAE_TERMINATE_MESSAGE_RECEIVED,
+	CCAE_LLP_CONNECTION_RESET,
+	CCAE_LLP_CONNECTION_LOST,
+	CCAE_LLP_SEGMENT_SIZE_INVALID,
+	CCAE_LLP_INVALID_CRC,
+	CCAE_LLP_BAD_FPDU,
+	CCAE_INVALID_DDP_VERSION,
+	CCAE_INVALID_RDMA_VERSION,
+	CCAE_UNEXPECTED_OPCODE,
+	CCAE_INVALID_DDP_QUEUE_NUMBER,
+	CCAE_RDMA_READ_NOT_ENABLED,
+	CCAE_RDMA_WRITE_NOT_ENABLED,
+	CCAE_RDMA_READ_TOO_SMALL,
+	CCAE_NO_L_BIT,
+	CCAE_TAGGED_INVALID_STAG,
+	CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
+	CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
+	CCAE_TAGGED_INVALID_PD,
+	CCAE_WRAP_ERROR,
+	CCAE_BAD_CLOSE,
+	CCAE_BAD_LLP_CLOSE,
+	CCAE_INVALID_MSN_RANGE,
+	CCAE_INVALID_MSN_GAP,
+	CCAE_IRRQ_OVERFLOW,
+	CCAE_IRRQ_MSN_GAP,
+	CCAE_IRRQ_MSN_RANGE,
+	CCAE_IRRQ_INVALID_STAG,
+	CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
+	CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
+	CCAE_IRRQ_INVALID_PD,
+	CCAE_IRRQ_WRAP_ERROR,
+	CCAE_CQ_SQ_COMPLETION_OVERFLOW,
+	CCAE_CQ_RQ_COMPLETION_ERROR,
+	CCAE_QP_SRQ_WQE_ERROR,
+	CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
+	CCAE_CQ_OVERFLOW,
+	CCAE_CQ_OPERATION_ERROR,
+	CCAE_SRQ_LIMIT_REACHED,
+	CCAE_QP_RQ_LIMIT_REACHED,
+	CCAE_SRQ_CATASTROPHIC_ERROR,
+	CCAE_RNIC_CATASTROPHIC_ERROR
+/* WARNING If you add more id's, make sure their values fit in eight bits. */
+};
+
+/*
+ * Resource Indicators and Identifiers
+ */
+enum c2_resource_indicator {
+	C2_RES_IND_QP = 1,
+	C2_RES_IND_EP,
+	C2_RES_IND_CQ,
+	C2_RES_IND_SRQ,
+};
+
+#endif /* _C2_AE_H_ */
--- a/drivers/infiniband/hw/amso1100/c2_alloc.c
+++ b/drivers/infiniband/hw/amso1100/c2_alloc.c
@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+
+#include "c2.h"
+
+static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
+			       struct sp_chunk **head)
+{
+	int i;
+	struct sp_chunk *new_head;
+
+	new_head = (struct sp_chunk *) __get_free_page(gfp_mask);
+	if (new_head == NULL)
+		return -ENOMEM;
+
+	new_head->dma_addr = dma_map_single(c2dev->ibdev.dma_device, new_head,
+					    PAGE_SIZE, DMA_FROM_DEVICE);
+	pci_unmap_addr_set(new_head, mapping, new_head->dma_addr);
+
+	new_head->next = NULL;
+	new_head->head = 0;
+
+	/* build list where each index is the next free slot */
+	for (i = 0;
+	     i < (PAGE_SIZE - sizeof(struct sp_chunk) -
+		  sizeof(u16)) / sizeof(u16) - 1;
+	     i++) {
+		new_head->shared_ptr[i] = i + 1;
+	}
+	/* terminate list */
+	new_head->shared_ptr[i] = 0xFFFF;
+
+	*head = new_head;
+	return 0;
+}
+
+int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+		      struct sp_chunk **root)
+{
+	return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
+}
+
+void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
+{
+	struct sp_chunk *next;
+
+	while (root) {
+		next = root->next;
+		dma_unmap_single(c2dev->ibdev.dma_device,
+				 pci_unmap_addr(root, mapping), PAGE_SIZE,
+			         DMA_FROM_DEVICE);
+		__free_page((struct page *) root);
+		root = next;
+	}
+}
+
+u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+		   dma_addr_t *dma_addr, gfp_t gfp_mask)
+{
+	u16 mqsp;
+
+	while (head) {
+		mqsp = head->head;
+		if (mqsp != 0xFFFF) {
+			head->head = head->shared_ptr[mqsp];
+			break;
+		} else if (head->next == NULL) {
+			if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
+			    0) {
+				head = head->next;
+				mqsp = head->head;
+				head->head = head->shared_ptr[mqsp];
+				break;
+			} else
+				return NULL;
+		} else
+			head = head->next;
+	}
+	if (head) {
+		*dma_addr = head->dma_addr +
+			    ((unsigned long) &(head->shared_ptr[mqsp]) -
+			     (unsigned long) head);
+		pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__,
+			 &(head->shared_ptr[mqsp]), (u64)*dma_addr);
+		return &(head->shared_ptr[mqsp]);
+	}
+	return NULL;
+}
+
+void c2_free_mqsp(u16 * mqsp)
+{
+	struct sp_chunk *head;
+	u16 idx;
+
+	/* The chunk containing this ptr begins at the page boundary */
+	head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
+
+	/* Link head to new mqsp */
+	*mqsp = head->head;
+
+	/* Compute the shared_ptr index */
+	idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1;
+	idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
+
+	/* Point this index at the head */
+	head->shared_ptr[idx] = head->head;
+
+	/* Point head at this index */
+	head->head = idx;
+}
--- a/drivers/infiniband/hw/amso1100/c2_cm.c
+++ b/drivers/infiniband/hw/amso1100/c2_cm.c
@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_vq.h"
+#include <rdma/iw_cm.h>
+
+int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct c2_dev *c2dev = to_c2dev(cm_id->device);
+	struct ib_qp *ibqp;
+	struct c2_qp *qp;
+	struct c2wr_qp_connect_req *wr;	/* variable size needs a malloc. */
+	struct c2_vq_req *vq_req;
+	int err;
+
+	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+	qp = to_c2qp(ibqp);
+
+	/* Associate QP <--> CM_ID */
+	cm_id->provider_data = qp;
+	cm_id->add_ref(cm_id);
+	qp->cm_id = cm_id;
+
+	/*
+	 * only support the max private_data length
+	 */
+	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+		err = -EINVAL;
+		goto bail0;
+	}
+	/*
+	 * Set the rdma read limits
+	 */
+	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Create and send a WR_QP_CONNECT...
+	 */
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	c2_wr_set_id(wr, CCWR_QP_CONNECT);
+	wr->hdr.context = 0;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->qp_handle = qp->adapter_handle;
+
+	wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr;
+	wr->remote_port = cm_id->remote_addr.sin_port;
+
+	/*
+	 * Move any private data from the callers's buf into
+	 * the WR.
+	 */
+	if (iw_param->private_data) {
+		wr->private_data_length =
+			cpu_to_be32(iw_param->private_data_len);
+		memcpy(&wr->private_data[0], iw_param->private_data,
+		       iw_param->private_data_len);
+	} else
+		wr->private_data_length = 0;
+
+	/*
+	 * Send WR to adapter.  NOTE: There is no synch reply from
+	 * the adapter.
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	vq_req_free(c2dev, vq_req);
+
+ bail1:
+	kfree(wr);
+ bail0:
+	if (err) {
+		/*
+		 * If we fail, release reference on QP and
+		 * disassociate QP from CM_ID
+		 */
+		cm_id->provider_data = NULL;
+		qp->cm_id = NULL;
+		cm_id->rem_ref(cm_id);
+	}
+	return err;
+}
+
+int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+	struct c2_dev *c2dev;
+	struct c2wr_ep_listen_create_req wr;
+	struct c2wr_ep_listen_create_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+	if (c2dev == NULL)
+		return -EINVAL;
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
+	wr.hdr.context = (u64) (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.local_addr = cm_id->local_addr.sin_addr.s_addr;
+	wr.local_port = cm_id->local_addr.sin_port;
+	wr.backlog = cpu_to_be32(backlog);
+	wr.user_context = (u64) (unsigned long) cm_id;
+
+	/*
+	 * Reference the request struct.  Dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply =
+	    (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	if ((err = c2_errno(reply)) != 0)
+		goto bail1;
+
+	/*
+	 * Keep the adapter handle. Used in subsequent destroy
+	 */
+	cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
+
+	/*
+	 * free vq stuff
+	 */
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	return 0;
+
+ bail1:
+	vq_repbuf_free(c2dev, reply);
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+
+int c2_llp_service_destroy(struct iw_cm_id *cm_id)
+{
+
+	struct c2_dev *c2dev;
+	struct c2wr_ep_listen_destroy_req wr;
+	struct c2wr_ep_listen_destroy_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+	if (c2dev == NULL)
+		return -EINVAL;
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+	if ((err = c2_errno(reply)) != 0)
+		goto bail1;
+
+ bail1:
+	vq_repbuf_free(c2dev, reply);
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct c2_dev *c2dev = to_c2dev(cm_id->device);
+	struct c2_qp *qp;
+	struct ib_qp *ibqp;
+	struct c2wr_cr_accept_req *wr;	/* variable length WR */
+	struct c2_vq_req *vq_req;
+	struct c2wr_cr_accept_rep *reply;	/* VQ Reply msg ptr. */
+	int err;
+
+	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+	qp = to_c2qp(ibqp);
+
+	/* Set the RDMA read limits */
+	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+	if (err)
+		goto bail0;
+
+	/* Allocate verbs request. */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	vq_req->qp = qp;
+	vq_req->cm_id = cm_id;
+	vq_req->event = IW_CM_EVENT_ESTABLISHED;
+
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	/* Build the WR */
+	c2_wr_set_id(wr, CCWR_CR_ACCEPT);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
+	wr->qp_handle = qp->adapter_handle;
+
+	/* Replace the cr_handle with the QP after accept */
+	cm_id->provider_data = qp;
+	cm_id->add_ref(cm_id);
+	qp->cm_id = cm_id;
+
+	cm_id->provider_data = qp;
+
+	/* Validate private_data length */
+	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+		err = -EINVAL;
+		goto bail2;
+	}
+
+	if (iw_param->private_data) {
+		wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
+		memcpy(&wr->private_data[0],
+		       iw_param->private_data, iw_param->private_data_len);
+	} else
+		wr->private_data_length = 0;
+
+	/* Reference the request struct.  Dereferenced in the int handler. */
+	vq_req_get(c2dev, vq_req);
+
+	/* Send WR to adapter */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail2;
+	}
+
+	/* Wait for reply from adapter */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail2;
+
+	/* Check that reply is present */
+	reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+	if (!err)
+		c2_set_qp_state(qp, C2_QP_STATE_RTS);
+ bail2:
+	kfree(wr);
+ bail1:
+	vq_req_free(c2dev, vq_req);
+ bail0:
+	if (err) {
+		/*
+		 * If we fail, release reference on QP and
+		 * disassociate QP from CM_ID
+		 */
+		cm_id->provider_data = NULL;
+		qp->cm_id = NULL;
+		cm_id->rem_ref(cm_id);
+	}
+	return err;
+}
+
+int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	struct c2_dev *c2dev;
+	struct c2wr_cr_reject_req wr;
+	struct c2_vq_req *vq_req;
+	struct c2wr_cr_reject_rep *reply;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_CR_REJECT);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_cr_reject_rep *) (unsigned long)
+		vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+	err = c2_errno(reply);
+	/*
+	 * free vq stuff
+	 */
+	vq_repbuf_free(c2dev, reply);
+
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
--- a/drivers/infiniband/hw/amso1100/c2_cq.c
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
+
+static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
+{
+	struct c2_cq *cq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c2dev->lock, flags);
+	cq = c2dev->qptr_array[cqn];
+	if (!cq) {
+		spin_unlock_irqrestore(&c2dev->lock, flags);
+		return NULL;
+	}
+	atomic_inc(&cq->refcount);
+	spin_unlock_irqrestore(&c2dev->lock, flags);
+	return cq;
+}
+
+static void c2_cq_put(struct c2_cq *cq)
+{
+	if (atomic_dec_and_test(&cq->refcount))
+		wake_up(&cq->wait);
+}
+
+void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
+{
+	struct c2_cq *cq;
+
+	cq = c2_cq_get(c2dev, mq_index);
+	if (!cq) {
+		printk("discarding events on destroyed CQN=%d\n", mq_index);
+		return;
+	}
+
+	(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
+	c2_cq_put(cq);
+}
+
+void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
+{
+	struct c2_cq *cq;
+	struct c2_mq *q;
+
+	cq = c2_cq_get(c2dev, mq_index);
+	if (!cq)
+		return;
+
+	spin_lock_irq(&cq->lock);
+	q = &cq->mq;
+	if (q && !c2_mq_empty(q)) {
+		u16 priv = q->priv;
+		struct c2wr_ce *msg;
+
+		while (priv != be16_to_cpu(*q->shared)) {
+			msg = (struct c2wr_ce *)
+				(q->msg_pool.host + priv * q->msg_size);
+			if (msg->qp_user_context == (u64) (unsigned long) qp) {
+				msg->qp_user_context = (u64) 0;
+			}
+			priv = (priv + 1) % q->q_size;
+		}
+	}
+	spin_unlock_irq(&cq->lock);
+	c2_cq_put(cq);
+}
+
+static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
+{
+	switch (status) {
+	case C2_OK:
+		return IB_WC_SUCCESS;
+	case CCERR_FLUSHED:
+		return IB_WC_WR_FLUSH_ERR;
+	case CCERR_BASE_AND_BOUNDS_VIOLATION:
+		return IB_WC_LOC_PROT_ERR;
+	case CCERR_ACCESS_VIOLATION:
+		return IB_WC_LOC_ACCESS_ERR;
+	case CCERR_TOTAL_LENGTH_TOO_BIG:
+		return IB_WC_LOC_LEN_ERR;
+	case CCERR_INVALID_WINDOW:
+		return IB_WC_MW_BIND_ERR;
+	default:
+		return IB_WC_GENERAL_ERR;
+	}
+}
+
+
+static inline int c2_poll_one(struct c2_dev *c2dev,
+			      struct c2_cq *cq, struct ib_wc *entry)
+{
+	struct c2wr_ce *ce;
+	struct c2_qp *qp;
+	int is_recv = 0;
+
+	ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+	if (!ce) {
+		return -EAGAIN;
+	}
+
+	/*
+	 * if the qp returned is null then this qp has already
+	 * been freed and we are unable process the completion.
+	 * try pulling the next message
+	 */
+	while ((qp =
+		(struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
+		c2_mq_free(&cq->mq);
+		ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+		if (!ce)
+			return -EAGAIN;
+	}
+
+	entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
+	entry->wr_id = ce->hdr.context;
+	entry->qp_num = ce->handle;
+	entry->wc_flags = 0;
+	entry->slid = 0;
+	entry->sl = 0;
+	entry->src_qp = 0;
+	entry->dlid_path_bits = 0;
+	entry->pkey_index = 0;
+
+	switch (c2_wr_get_id(ce)) {
+	case C2_WR_TYPE_SEND:
+		entry->opcode = IB_WC_SEND;
+		break;
+	case C2_WR_TYPE_RDMA_WRITE:
+		entry->opcode = IB_WC_RDMA_WRITE;
+		break;
+	case C2_WR_TYPE_RDMA_READ:
+		entry->opcode = IB_WC_RDMA_READ;
+		break;
+	case C2_WR_TYPE_BIND_MW:
+		entry->opcode = IB_WC_BIND_MW;
+		break;
+	case C2_WR_TYPE_RECV:
+		entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
+		entry->opcode = IB_WC_RECV;
+		is_recv = 1;
+		break;
+	default:
+		break;
+	}
+
+	/* consume the WQEs */
+	if (is_recv)
+		c2_mq_lconsume(&qp->rq_mq, 1);
+	else
+		c2_mq_lconsume(&qp->sq_mq,
+			       be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
+
+	/* free the message */
+	c2_mq_free(&cq->mq);
+
+	return 0;
+}
+
+int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+	struct c2_dev *c2dev = to_c2dev(ibcq->device);
+	struct c2_cq *cq = to_c2cq(ibcq);
+	unsigned long flags;
+	int npolled, err;
+
+	spin_lock_irqsave(&cq->lock, flags);
+
+	for (npolled = 0; npolled < num_entries; ++npolled) {
+
+		err = c2_poll_one(c2dev, cq, entry + npolled);
+		if (err)
+			break;
+	}
+
+	spin_unlock_irqrestore(&cq->lock, flags);
+
+	return npolled;
+}
+
+int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+{
+	struct c2_mq_shared __iomem *shared;
+	struct c2_cq *cq;
+
+	cq = to_c2cq(ibcq);
+	shared = cq->mq.peer;
+
+	if (notify == IB_CQ_NEXT_COMP)
+		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
+	else if (notify == IB_CQ_SOLICITED)
+		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
+	else
+		return -EINVAL;
+
+	writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
+
+	/*
+	 * Now read back shared->armed to make the PCI
+	 * write synchronous.  This is necessary for
+	 * correct cq notification semantics.
+	 */
+	readb(&shared->armed);
+
+	return 0;
+}
+
+static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
+{
+
+	dma_unmap_single(c2dev->ibdev.dma_device, pci_unmap_addr(mq, mapping),
+			 mq->q_size * mq->msg_size, DMA_FROM_DEVICE);
+	free_pages((unsigned long) mq->msg_pool.host,
+		   get_order(mq->q_size * mq->msg_size));
+}
+
+static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
+			   int msg_size)
+{
+	unsigned long pool_start;
+
+	pool_start = __get_free_pages(GFP_KERNEL,
+				      get_order(q_size * msg_size));
+	if (!pool_start)
+		return -ENOMEM;
+
+	c2_mq_rep_init(mq,
+		       0,		/* index (currently unknown) */
+		       q_size,
+		       msg_size,
+		       (u8 *) pool_start,
+		       NULL,	/* peer (currently unknown) */
+		       C2_MQ_HOST_TARGET);
+
+	mq->host_dma = dma_map_single(c2dev->ibdev.dma_device,
+				      (void *)pool_start,
+				      q_size * msg_size, DMA_FROM_DEVICE);
+	pci_unmap_addr_set(mq, mapping, mq->host_dma);
+
+	return 0;
+}
+
+int c2_init_cq(struct c2_dev *c2dev, int entries,
+	       struct c2_ucontext *ctx, struct c2_cq *cq)
+{
+	struct c2wr_cq_create_req wr;
+	struct c2wr_cq_create_rep *reply;
+	unsigned long peer_pa;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	might_sleep();
+
+	cq->ibcq.cqe = entries - 1;
+	cq->is_kernel = !ctx;
+
+	/* Allocate a shared pointer */
+	cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+				      &cq->mq.shared_dma, GFP_KERNEL);
+	if (!cq->mq.shared)
+		return -ENOMEM;
+
+	/* Allocate pages for the message pool */
+	err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
+	if (err)
+		goto bail0;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_CQ_CREATE);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.msg_size = cpu_to_be32(cq->mq.msg_size);
+	wr.depth = cpu_to_be32(cq->mq.q_size);
+	wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
+	wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
+	wr.user_context = (u64) (unsigned long) (cq);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail2;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail2;
+
+	reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	if ((err = c2_errno(reply)) != 0)
+		goto bail3;
+
+	cq->adapter_handle = reply->cq_handle;
+	cq->mq.index = be32_to_cpu(reply->mq_index);
+
+	peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
+	cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
+	if (!cq->mq.peer) {
+		err = -ENOMEM;
+		goto bail3;
+	}
+
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	spin_lock_init(&cq->lock);
+	atomic_set(&cq->refcount, 1);
+	init_waitqueue_head(&cq->wait);
+
+	/*
+	 * Use the MQ index allocated by the adapter to
+	 * store the CQ in the qptr_array
+	 */
+	cq->cqn = cq->mq.index;
+	c2dev->qptr_array[cq->cqn] = cq;
+
+	return 0;
+
+      bail3:
+	vq_repbuf_free(c2dev, reply);
+      bail2:
+	vq_req_free(c2dev, vq_req);
+      bail1:
+	c2_free_cq_buf(c2dev, &cq->mq);
+      bail0:
+	c2_free_mqsp(cq->mq.shared);
+
+	return err;
+}
+
+void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
+{
+	int err;
+	struct c2_vq_req *vq_req;
+	struct c2wr_cq_destroy_req wr;
+	struct c2wr_cq_destroy_rep *reply;
+
+	might_sleep();
+
+	/* Clear CQ from the qptr array */
+	spin_lock_irq(&c2dev->lock);
+	c2dev->qptr_array[cq->mq.index] = NULL;
+	atomic_dec(&cq->refcount);
+	spin_unlock_irq(&c2dev->lock);
+
+	wait_event(cq->wait, !atomic_read(&cq->refcount));
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		goto bail0;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.cq_handle = cq->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+
+	vq_repbuf_free(c2dev, reply);
+      bail1:
+	vq_req_free(c2dev, vq_req);
+      bail0:
+	if (cq->is_kernel) {
+		c2_free_cq_buf(c2dev, &cq->mq);
+	}
+
+	return;
+}
--- a/drivers/infiniband/hw/amso1100/c2_intr.c
+++ b/drivers/infiniband/hw/amso1100/c2_intr.c
@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_vq.h"
+
+static void handle_mq(struct c2_dev *c2dev, u32 index);
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
+
+/*
+ * Handle RNIC interrupts
+ */
+void c2_rnic_interrupt(struct c2_dev *c2dev)
+{
+	unsigned int mq_index;
+
+	while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
+		mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
+		if (mq_index & 0x80000000) {
+			break;
+		}
+
+		c2dev->hints_read++;
+		handle_mq(c2dev, mq_index);
+	}
+
+}
+
+/*
+ * Top level MQ handler
+ */
+static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
+{
+	if (c2dev->qptr_array[mq_index] == NULL) {
+		pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n",
+			mq_index);
+		return;
+	}
+
+	switch (mq_index) {
+	case (0):
+		/*
+		 * An index of 0 in the activity queue
+		 * indicates the req vq now has messages
+		 * available...
+		 *
+		 * Wake up any waiters waiting on req VQ
+		 * message availability.
+		 */
+		wake_up(&c2dev->req_vq_wo);
+		break;
+	case (1):
+		handle_vq(c2dev, mq_index);
+		break;
+	case (2):
+		/* We have to purge the VQ in case there are pending
+		 * accept reply requests that would result in the
+		 * generation of an ESTABLISHED event. If we don't
+		 * generate these first, a CLOSE event could end up
+		 * being delivered before the ESTABLISHED event.
+		 */
+		handle_vq(c2dev, 1);
+
+		c2_ae_event(c2dev, mq_index);
+		break;
+	default:
+		/* There is no event synchronization between CQ events
+		 * and AE or CM events. In fact, CQE could be
+		 * delivered for all of the I/O up to and including the
+		 * FLUSH for a peer disconenct prior to the ESTABLISHED
+		 * event being delivered to the app. The reason for this
+		 * is that CM events are delivered on a thread, while AE
+		 * and CM events are delivered on interrupt context.
+		 */
+		c2_cq_event(c2dev, mq_index);
+		break;
+	}
+
+	return;
+}
+
+/*
+ * Handles verbs WR replies.
+ */
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
+{
+	void *adapter_msg, *reply_msg;
+	struct c2wr_hdr *host_msg;
+	struct c2wr_hdr tmp;
+	struct c2_mq *reply_vq;
+	struct c2_vq_req *req;
+	struct iw_cm_event cm_event;
+	int err;
+
+	reply_vq = (struct c2_mq *) c2dev->qptr_array[mq_index];
+
+	/*
+	 * get next msg from mq_index into adapter_msg.
+	 * don't free it yet.
+	 */
+	adapter_msg = c2_mq_consume(reply_vq);
+	if (adapter_msg == NULL) {
+		return;
+	}
+
+	host_msg = vq_repbuf_alloc(c2dev);
+
+	/*
+	 * If we can't get a host buffer, then we'll still
+	 * wakeup the waiter, we just won't give him the msg.
+	 * It is assumed the waiter will deal with this...
+	 */
+	if (!host_msg) {
+		pr_debug("handle_vq: no repbufs!\n");
+
+		/*
+		 * just copy the WR header into a local variable.
+		 * this allows us to still demux on the context
+		 */
+		host_msg = &tmp;
+		memcpy(host_msg, adapter_msg, sizeof(tmp));
+		reply_msg = NULL;
+	} else {
+		memcpy(host_msg, adapter_msg, reply_vq->msg_size);
+		reply_msg = host_msg;
+	}
+
+	/*
+	 * consume the msg from the MQ
+	 */
+	c2_mq_free(reply_vq);
+
+	/*
+	 * wakeup the waiter.
+	 */
+	req = (struct c2_vq_req *) (unsigned long) host_msg->context;
+	if (req == NULL) {
+		/*
+		 * We should never get here, as the adapter should
+		 * never send us a reply that we're not expecting.
+		 */
+		vq_repbuf_free(c2dev, host_msg);
+		pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
+		return;
+	}
+
+	err = c2_errno(reply_msg);
+	if (!err) switch (req->event) {
+	case IW_CM_EVENT_ESTABLISHED:
+		c2_set_qp_state(req->qp,
+				C2_QP_STATE_RTS);
+	case IW_CM_EVENT_CLOSE:
+
+		/*
+		 * Move the QP to RTS if this is
+		 * the established event
+		 */
+		cm_event.event = req->event;
+		cm_event.status = 0;
+		cm_event.local_addr = req->cm_id->local_addr;
+		cm_event.remote_addr = req->cm_id->remote_addr;
+		cm_event.private_data = NULL;
+		cm_event.private_data_len = 0;
+		req->cm_id->event_handler(req->cm_id, &cm_event);
+		break;
+	default:
+		break;
+	}
+
+	req->reply_msg = (u64) (unsigned long) (reply_msg);
+	atomic_set(&req->reply_ready, 1);
+	wake_up(&req->wait_object);
+
+	/*
+	 * If the request was cancelled, then this put will
+	 * free the vq_req memory...and reply_msg!!!
+	 */
+	vq_req_put(c2dev, req);
+}
--- a/drivers/infiniband/hw/amso1100/c2_mm.c
+++ b/drivers/infiniband/hw/amso1100/c2_mm.c
@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_vq.h"
+
+#define PBL_VIRT 1
+#define PBL_PHYS 2
+
+/*
+ * Send all the PBL messages to convey the remainder of the PBL
+ * Wait for the adapter's reply on the last one.
+ * This is indicated by setting the MEM_PBL_COMPLETE in the flags.
+ *
+ * NOTE:  vq_req is _not_ freed by this function.  The VQ Host
+ *	  Reply buffer _is_ freed by this function.
+ */
+static int
+send_pbl_messages(struct c2_dev *c2dev, u32 stag_index,
+		  unsigned long va, u32 pbl_depth,
+		  struct c2_vq_req *vq_req, int pbl_type)
+{
+	u32 pbe_count;		/* amt that fits in a PBL msg */
+	u32 count;		/* amt in this PBL MSG. */
+	struct c2wr_nsmr_pbl_req *wr;	/* PBL WR ptr */
+	struct c2wr_nsmr_pbl_rep *reply;	/* reply ptr */
+ 	int err, pbl_virt, pbl_index, i;
+
+	switch (pbl_type) {
+	case PBL_VIRT:
+		pbl_virt = 1;
+		break;
+	case PBL_PHYS:
+		pbl_virt = 0;
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	pbe_count = (c2dev->req_vq.msg_size -
+		     sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		return -ENOMEM;
+	}
+	c2_wr_set_id(wr, CCWR_NSMR_PBL);
+
+	/*
+	 * Only the last PBL message will generate a reply from the verbs,
+	 * so we set the context to 0 indicating there is no kernel verbs
+	 * handler blocked awaiting this reply.
+	 */
+	wr->hdr.context = 0;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->stag_index = stag_index;	/* already swapped */
+	wr->flags = 0;
+	pbl_index = 0;
+	while (pbl_depth) {
+		count = min(pbe_count, pbl_depth);
+		wr->addrs_length = cpu_to_be32(count);
+
+		/*
+		 *  If this is the last message, then reference the
+		 *  vq request struct cuz we're gonna wait for a reply.
+		 *  also make this PBL msg as the last one.
+		 */
+		if (count == pbl_depth) {
+			/*
+			 * reference the request struct.  dereferenced in the
+			 * int handler.
+			 */
+			vq_req_get(c2dev, vq_req);
+			wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
+
+			/*
+			 * This is the last PBL message.
+			 * Set the context to our VQ Request Object so we can
+			 * wait for the reply.
+			 */
+			wr->hdr.context = (unsigned long) vq_req;
+		}
+
+		/*
+		 * If pbl_virt is set then va is a virtual address
+		 * that describes a virtually contiguous memory
+		 * allocation. The wr needs the start of each virtual page
+		 * to be converted to the corresponding physical address
+		 * of the page. If pbl_virt is not set then va is an array
+		 * of physical addresses and there is no conversion to do.
+		 * Just fill in the wr with what is in the array.
+		 */
+		for (i = 0; i < count; i++) {
+			if (pbl_virt) {
+				va += PAGE_SIZE;
+			} else {
+ 				wr->paddrs[i] =
+				    cpu_to_be64(((u64 *)va)[pbl_index + i]);
+			}
+		}
+
+		/*
+		 * Send WR to adapter
+		 */
+		err = vq_send_wr(c2dev, (union c2wr *) wr);
+		if (err) {
+			if (count <= pbe_count) {
+				vq_req_put(c2dev, vq_req);
+			}
+			goto bail0;
+		}
+		pbl_depth -= count;
+		pbl_index += count;
+	}
+
+	/*
+	 *  Now wait for the reply...
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	kfree(wr);
+	return err;
+}
+
+#define C2_PBL_MAX_DEPTH 131072
+int
+c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+ 			   int page_size, int pbl_depth, u32 length,
+ 			   u32 offset, u64 *va, enum c2_acf acf,
+			   struct c2_mr *mr)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_nsmr_register_req *wr;
+	struct c2wr_nsmr_register_rep *reply;
+	u16 flags;
+	int i, pbe_count, count;
+	int err;
+
+	if (!va || !length || !addr_list || !pbl_depth)
+		return -EINTR;
+
+	/*
+	 * Verify PBL depth is within rnic max
+	 */
+	if (pbl_depth > C2_PBL_MAX_DEPTH) {
+		return -EINTR;
+	}
+
+	/*
+	 * allocate verbs request object
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	/*
+	 * build the WR
+	 */
+	c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+
+	flags = (acf | MEM_VA_BASED | MEM_REMOTE);
+
+	/*
+	 * compute how many pbes can fit in the message
+	 */
+	pbe_count = (c2dev->req_vq.msg_size -
+		     sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
+
+	if (pbl_depth <= pbe_count) {
+		flags |= MEM_PBL_COMPLETE;
+	}
+	wr->flags = cpu_to_be16(flags);
+	wr->stag_key = 0;	//stag_key;
+	wr->va = cpu_to_be64(*va);
+	wr->pd_id = mr->pd->pd_id;
+	wr->pbe_size = cpu_to_be32(page_size);
+	wr->length = cpu_to_be32(length);
+	wr->pbl_depth = cpu_to_be32(pbl_depth);
+	wr->fbo = cpu_to_be32(offset);
+	count = min(pbl_depth, pbe_count);
+	wr->addrs_length = cpu_to_be32(count);
+
+	/*
+	 * fill out the PBL for this message
+	 */
+	for (i = 0; i < count; i++) {
+		wr->paddrs[i] = cpu_to_be64(addr_list[i]);
+	}
+
+	/*
+	 * regerence the request struct
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * send the WR to the adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	/*
+	 * wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail1;
+	}
+
+	/*
+	 * process reply
+	 */
+	reply =
+	    (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	if ((err = c2_errno(reply))) {
+		goto bail2;
+	}
+	//*p_pb_entries = be32_to_cpu(reply->pbl_depth);
+	mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
+	vq_repbuf_free(c2dev, reply);
+
+	/*
+	 * if there are still more PBEs we need to send them to
+	 * the adapter and wait for a reply on the final one.
+	 * reuse vq_req for this purpose.
+	 */
+	pbl_depth -= count;
+	if (pbl_depth) {
+
+		vq_req->reply_msg = (unsigned long) NULL;
+		atomic_set(&vq_req->reply_ready, 0);
+		err = send_pbl_messages(c2dev,
+					cpu_to_be32(mr->ibmr.lkey),
+					(unsigned long) &addr_list[i],
+					pbl_depth, vq_req, PBL_PHYS);
+		if (err) {
+			goto bail1;
+		}
+	}
+
+	vq_req_free(c2dev, vq_req);
+	kfree(wr);
+
+	return err;
+
+      bail2:
+	vq_repbuf_free(c2dev, reply);
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
+{
+	struct c2_vq_req *vq_req;	/* verbs request object */
+	struct c2wr_stag_dealloc_req wr;	/* work request */
+	struct c2wr_stag_dealloc_rep *reply;	/* WR reply  */
+	int err;
+
+
+	/*
+	 * allocate verbs request object
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		return -ENOMEM;
+	}
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
+	wr.hdr.context = (u64) (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.stag_index = cpu_to_be32(stag_index);
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
--- a/drivers/infiniband/hw/amso1100/c2_mq.c
+++ b/drivers/infiniband/hw/amso1100/c2_mq.c
@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_mq.h"
+
+void *c2_mq_alloc(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	if (c2_mq_full(q)) {
+		return NULL;
+	} else {
+#ifdef DEBUG
+		struct c2wr_hdr *m =
+		    (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+		BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
+		m->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+		return m;
+#else
+		return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+	}
+}
+
+void c2_mq_produce(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	if (!c2_mq_full(q)) {
+		q->priv = (q->priv + 1) % q->q_size;
+		q->hint_count++;
+		/* Update peer's offset. */
+		__raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+	}
+}
+
+void *c2_mq_consume(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+	if (c2_mq_empty(q)) {
+		return NULL;
+	} else {
+#ifdef DEBUG
+		struct c2wr_hdr *m = (struct c2wr_hdr *)
+		    (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+		BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
+#endif
+		return m;
+#else
+		return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+	}
+}
+
+void c2_mq_free(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+	if (!c2_mq_empty(q)) {
+
+#ifdef CCMSGMAGIC
+		{
+			struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
+			    (q->msg_pool.adapter + q->priv * q->msg_size);
+			__raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
+		}
+#endif
+		q->priv = (q->priv + 1) % q->q_size;
+		/* Update peer's offset. */
+		__raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+	}
+}
+
+
+void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	while (wqe_count--) {
+		BUG_ON(c2_mq_empty(q));
+		*q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
+	}
+}
+
+#if 0
+u32 c2_mq_count(struct c2_mq *q)
+{
+	s32 count;
+
+	if (q->type == C2_MQ_HOST_TARGET)
+		count = be16_to_cpu(*q->shared) - q->priv;
+	else
+		count = q->priv - be16_to_cpu(*q->shared);
+
+	if (count < 0)
+		count += q->q_size;
+
+	return (u32) count;
+}
+#endif  /*  0  */
+
+void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		    u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
+{
+	BUG_ON(!q->shared);
+
+	/* This code assumes the byte swapping has already been done! */
+	q->index = index;
+	q->q_size = q_size;
+	q->msg_size = msg_size;
+	q->msg_pool.adapter = pool_start;
+	q->peer = (struct c2_mq_shared __iomem *) peer;
+	q->magic = C2_MQ_MAGIC;
+	q->type = type;
+	q->priv = 0;
+	q->hint_count = 0;
+	return;
+}
+void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		    u8 *pool_start, u16 __iomem *peer, u32 type)
+{
+	BUG_ON(!q->shared);
+
+	/* This code assumes the byte swapping has already been done! */
+	q->index = index;
+	q->q_size = q_size;
+	q->msg_size = msg_size;
+	q->msg_pool.host = pool_start;
+	q->peer = (struct c2_mq_shared __iomem *) peer;
+	q->magic = C2_MQ_MAGIC;
+	q->type = type;
+	q->priv = 0;
+	q->hint_count = 0;
+	return;
+}
--- a/drivers/infiniband/hw/amso1100/c2_mq.h
+++ b/drivers/infiniband/hw/amso1100/c2_mq.h
@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _C2_MQ_H_
+#define _C2_MQ_H_
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include "c2_wr.h"
+
+enum c2_shared_regs {
+
+	C2_SHARED_ARMED = 0x10,
+	C2_SHARED_NOTIFY = 0x18,
+	C2_SHARED_SHARED = 0x40,
+};
+
+struct c2_mq_shared {
+	u16 unused1;
+	u8 armed;
+	u8 notification_type;
+	u32 unused2;
+	u16 shared;
+	/* Pad to 64 bytes. */
+	u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
+};
+
+enum c2_mq_type {
+	C2_MQ_HOST_TARGET = 1,
+	C2_MQ_ADAPTER_TARGET = 2,
+};
+
+/*
+ * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
+ * c2_user_mq_t (which is the same format) is for user-mode MQs...
+ */
+#define C2_MQ_MAGIC 0x4d512020	/* 'MQ  ' */
+struct c2_mq {
+	u32 magic;
+	union {
+		u8 *host;
+		u8 __iomem *adapter;
+	} msg_pool;
+	dma_addr_t host_dma;
+	DECLARE_PCI_UNMAP_ADDR(mapping);
+	u16 hint_count;
+	u16 priv;
+	struct c2_mq_shared __iomem *peer;
+	u16 *shared;
+	dma_addr_t shared_dma;
+	u32 q_size;
+	u32 msg_size;
+	u32 index;
+	enum c2_mq_type type;
+};
+
+static __inline__ int c2_mq_empty(struct c2_mq *q)
+{
+	return q->priv == be16_to_cpu(*q->shared);
+}
+
+static __inline__ int c2_mq_full(struct c2_mq *q)
+{
+	return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
+}
+
+extern void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
+extern void *c2_mq_alloc(struct c2_mq *q);
+extern void c2_mq_produce(struct c2_mq *q);
+extern void *c2_mq_consume(struct c2_mq *q);
+extern void c2_mq_free(struct c2_mq *q);
+extern void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		       u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
+extern void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+			   u8 *pool_start, u16 __iomem *peer, u32 type);
+
+#endif				/* _C2_MQ_H_ */
--- a/drivers/infiniband/hw/amso1100/c2_pd.c
+++ b/drivers/infiniband/hw/amso1100/c2_pd.c
@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include "c2.h"
+#include "c2_provider.h"
+
+int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
+{
+	u32 obj;
+	int ret = 0;
+
+	spin_lock(&c2dev->pd_table.lock);
+	obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
+				 c2dev->pd_table.last);
+	if (obj >= c2dev->pd_table.max)
+		obj = find_first_zero_bit(c2dev->pd_table.table,
+					  c2dev->pd_table.max);
+	if (obj < c2dev->pd_table.max) {
+		pd->pd_id = obj;
+		__set_bit(obj, c2dev->pd_table.table);
+		c2dev->pd_table.last = obj+1;
+		if (c2dev->pd_table.last >= c2dev->pd_table.max)
+			c2dev->pd_table.last = 0;
+	} else
+		ret = -ENOMEM;
+	spin_unlock(&c2dev->pd_table.lock);
+	return ret;
+}
+
+void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
+{
+	spin_lock(&c2dev->pd_table.lock);
+	__clear_bit(pd->pd_id, c2dev->pd_table.table);
+	spin_unlock(&c2dev->pd_table.lock);
+}
+
+int __devinit c2_init_pd_table(struct c2_dev *c2dev)
+{
+
+	c2dev->pd_table.last = 0;
+	c2dev->pd_table.max = c2dev->props.max_pd;
+	spin_lock_init(&c2dev->pd_table.lock);
+	c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
+					sizeof(long), GFP_KERNEL);
+	if (!c2dev->pd_table.table)
+		return -ENOMEM;
+	bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
+	return 0;
+}
+
+void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev)
+{
+	kfree(c2dev->pd_table.table);
+}
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@ -0,0 +1,869 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_arp.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include "c2.h"
+#include "c2_provider.h"
+#include "c2_user.h"
+
+static int c2_query_device(struct ib_device *ibdev,
+			   struct ib_device_attr *props)
+{
+	struct c2_dev *c2dev = to_c2dev(ibdev);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	*props = c2dev->props;
+	return 0;
+}
+
+static int c2_query_port(struct ib_device *ibdev,
+			 u8 port, struct ib_port_attr *props)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	props->max_mtu = IB_MTU_4096;
+	props->lid = 0;
+	props->lmc = 0;
+	props->sm_lid = 0;
+	props->sm_sl = 0;
+	props->state = IB_PORT_ACTIVE;
+	props->phys_state = 0;
+	props->port_cap_flags =
+	    IB_PORT_CM_SUP |
+	    IB_PORT_REINIT_SUP |
+	    IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+	props->gid_tbl_len = 1;
+	props->pkey_tbl_len = 1;
+	props->qkey_viol_cntr = 0;
+	props->active_width = 1;
+	props->active_speed = 1;
+
+	return 0;
+}
+
+static int c2_modify_port(struct ib_device *ibdev,
+			  u8 port, int port_modify_mask,
+			  struct ib_port_modify *props)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return 0;
+}
+
+static int c2_query_pkey(struct ib_device *ibdev,
+			 u8 port, u16 index, u16 * pkey)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	*pkey = 0;
+	return 0;
+}
+
+static int c2_query_gid(struct ib_device *ibdev, u8 port,
+			int index, union ib_gid *gid)
+{
+	struct c2_dev *c2dev = to_c2dev(ibdev);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+	memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
+
+	return 0;
+}
+
+/* Allocate the user context data structure. This keeps track
+ * of all objects associated with a particular user-mode client.
+ */
+static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
+					     struct ib_udata *udata)
+{
+	struct c2_ucontext *context;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	context = kmalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return ERR_PTR(-ENOMEM);
+
+	return &context->ibucontext;
+}
+
+static int c2_dealloc_ucontext(struct ib_ucontext *context)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	kfree(context);
+	return 0;
+}
+
+static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
+				 struct ib_ucontext *context,
+				 struct ib_udata *udata)
+{
+	struct c2_pd *pd;
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
+
+	err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
+	if (err) {
+		kfree(pd);
+		return ERR_PTR(err);
+	}
+
+	if (context) {
+		if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
+			c2_pd_free(to_c2dev(ibdev), pd);
+			kfree(pd);
+			return ERR_PTR(-EFAULT);
+		}
+	}
+
+	return &pd->ibpd;
+}
+
+static int c2_dealloc_pd(struct ib_pd *pd)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
+	kfree(pd);
+
+	return 0;
+}
+
+static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return ERR_PTR(-ENOSYS);
+}
+
+static int c2_ah_destroy(struct ib_ah *ah)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static void c2_add_ref(struct ib_qp *ibqp)
+{
+	struct c2_qp *qp;
+	BUG_ON(!ibqp);
+	qp = to_c2qp(ibqp);
+	atomic_inc(&qp->refcount);
+}
+
+static void c2_rem_ref(struct ib_qp *ibqp)
+{
+	struct c2_qp *qp;
+	BUG_ON(!ibqp);
+	qp = to_c2qp(ibqp);
+	if (atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+}
+
+struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
+{
+	struct c2_dev* c2dev = to_c2dev(device);
+	struct c2_qp *qp;
+
+	qp = c2_find_qpn(c2dev, qpn);
+	pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
+		__FUNCTION__, qp, qpn, device,
+		(qp?atomic_read(&qp->refcount):0));
+
+	return (qp?&qp->ibqp:NULL);
+}
+
+static struct ib_qp *c2_create_qp(struct ib_pd *pd,
+				  struct ib_qp_init_attr *init_attr,
+				  struct ib_udata *udata)
+{
+	struct c2_qp *qp;
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	switch (init_attr->qp_type) {
+	case IB_QPT_RC:
+		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+		if (!qp) {
+			pr_debug("%s: Unable to allocate QP\n", __FUNCTION__);
+			return ERR_PTR(-ENOMEM);
+		}
+		spin_lock_init(&qp->lock);
+		if (pd->uobject) {
+			/* userspace specific */
+		}
+
+		err = c2_alloc_qp(to_c2dev(pd->device),
+				  to_c2pd(pd), init_attr, qp);
+
+		if (err && pd->uobject) {
+			/* userspace specific */
+		}
+
+		break;
+	default:
+		pr_debug("%s: Invalid QP type: %d\n", __FUNCTION__,
+			init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+		break;
+	}
+
+	if (err) {
+		kfree(qp);
+		return ERR_PTR(err);
+	}
+
+	return &qp->ibqp;
+}
+
+static int c2_destroy_qp(struct ib_qp *ib_qp)
+{
+	struct c2_qp *qp = to_c2qp(ib_qp);
+
+	pr_debug("%s:%u qp=%p,qp->state=%d\n",
+		__FUNCTION__, __LINE__,ib_qp,qp->state);
+	c2_free_qp(to_c2dev(ib_qp->device), qp);
+	kfree(qp);
+	return 0;
+}
+
+static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries,
+				  struct ib_ucontext *context,
+				  struct ib_udata *udata)
+{
+	struct c2_cq *cq;
+	int err;
+
+	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+	if (!cq) {
+		pr_debug("%s: Unable to allocate CQ\n", __FUNCTION__);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
+	if (err) {
+		pr_debug("%s: error initializing CQ\n", __FUNCTION__);
+		kfree(cq);
+		return ERR_PTR(err);
+	}
+
+	return &cq->ibcq;
+}
+
+static int c2_destroy_cq(struct ib_cq *ib_cq)
+{
+	struct c2_cq *cq = to_c2cq(ib_cq);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	c2_free_cq(to_c2dev(ib_cq->device), cq);
+	kfree(cq);
+
+	return 0;
+}
+
+static inline u32 c2_convert_access(int acc)
+{
+	return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
+	    (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
+	    (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
+	    C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
+}
+
+static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
+				    struct ib_phys_buf *buffer_list,
+				    int num_phys_buf, int acc, u64 * iova_start)
+{
+	struct c2_mr *mr;
+	u64 *page_list;
+	u32 total_len;
+	int err, i, j, k, page_shift, pbl_depth;
+
+	pbl_depth = 0;
+	total_len = 0;
+
+	page_shift = PAGE_SHIFT;
+	/*
+	 * If there is only 1 buffer we assume this could
+	 * be a map of all phy mem...use a 32k page_shift.
+	 */
+	if (num_phys_buf == 1)
+		page_shift += 3;
+
+	for (i = 0; i < num_phys_buf; i++) {
+
+		if (buffer_list[i].addr & ~PAGE_MASK) {
+			pr_debug("Unaligned Memory Buffer: 0x%x\n",
+				(unsigned int) buffer_list[i].addr);
+			return ERR_PTR(-EINVAL);
+		}
+
+		if (!buffer_list[i].size) {
+			pr_debug("Invalid Buffer Size\n");
+			return ERR_PTR(-EINVAL);
+		}
+
+		total_len += buffer_list[i].size;
+		pbl_depth += ALIGN(buffer_list[i].size,
+				   (1 << page_shift)) >> page_shift;
+	}
+
+	page_list = vmalloc(sizeof(u64) * pbl_depth);
+	if (!page_list) {
+		pr_debug("couldn't vmalloc page_list of size %zd\n",
+			(sizeof(u64) * pbl_depth));
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0, j = 0; i < num_phys_buf; i++) {
+
+		int naddrs;
+
+ 		naddrs = ALIGN(buffer_list[i].size,
+			       (1 << page_shift)) >> page_shift;
+		for (k = 0; k < naddrs; k++)
+			page_list[j++] = (buffer_list[i].addr +
+						     (k << page_shift));
+	}
+
+	mr = kmalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->pd = to_c2pd(ib_pd);
+	pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
+		"*iova_start %llx, first pa %llx, last pa %llx\n",
+		__FUNCTION__, page_shift, pbl_depth, total_len,
+		*iova_start, page_list[0], page_list[pbl_depth-1]);
+  	err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
+ 					 (1 << page_shift), pbl_depth,
+					 total_len, 0, iova_start,
+					 c2_convert_access(acc), mr);
+	vfree(page_list);
+	if (err) {
+		kfree(mr);
+		return ERR_PTR(err);
+	}
+
+	return &mr->ibmr;
+}
+
+static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
+{
+	struct ib_phys_buf bl;
+	u64 kva = 0;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* AMSO1100 limit */
+	bl.size = 0xffffffff;
+	bl.addr = 0;
+	return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
+}
+
+static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
+				    int acc, struct ib_udata *udata)
+{
+	u64 *pages;
+	u64 kva = 0;
+	int shift, n, len;
+	int i, j, k;
+	int err = 0;
+	struct ib_umem_chunk *chunk;
+	struct c2_pd *c2pd = to_c2pd(pd);
+	struct c2_mr *c2mr;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	shift = ffs(region->page_size) - 1;
+
+	c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
+	if (!c2mr)
+		return ERR_PTR(-ENOMEM);
+	c2mr->pd = c2pd;
+
+	n = 0;
+	list_for_each_entry(chunk, &region->chunk_list, list)
+		n += chunk->nents;
+
+	pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
+	if (!pages) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	i = 0;
+	list_for_each_entry(chunk, &region->chunk_list, list) {
+		for (j = 0; j < chunk->nmap; ++j) {
+			len = sg_dma_len(&chunk->page_list[j]) >> shift;
+			for (k = 0; k < len; ++k) {
+				pages[i++] =
+					sg_dma_address(&chunk->page_list[j]) +
+					(region->page_size * k);
+			}
+		}
+	}
+
+	kva = (u64)region->virt_base;
+  	err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
+					 pages,
+ 					 region->page_size,
+					 i,
+					 region->length,
+					 region->offset,
+					 &kva,
+					 c2_convert_access(acc),
+					 c2mr);
+	kfree(pages);
+	if (err) {
+		kfree(c2mr);
+		return ERR_PTR(err);
+	}
+	return &c2mr->ibmr;
+
+err:
+	kfree(c2mr);
+	return ERR_PTR(err);
+}
+
+static int c2_dereg_mr(struct ib_mr *ib_mr)
+{
+	struct c2_mr *mr = to_c2mr(ib_mr);
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
+	if (err)
+		pr_debug("c2_stag_dealloc failed: %d\n", err);
+	else
+		kfree(mr);
+
+	return err;
+}
+
+static ssize_t show_rev(struct class_device *cdev, char *buf)
+{
+	struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%x\n", dev->props.hw_ver);
+}
+
+static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
+{
+	struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%x.%x.%x\n",
+		       (int) (dev->props.fw_ver >> 32),
+		       (int) (dev->props.fw_ver >> 16) & 0xffff,
+		       (int) (dev->props.fw_ver & 0xffff));
+}
+
+static ssize_t show_hca(struct class_device *cdev, char *buf)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "AMSO1100\n");
+}
+
+static ssize_t show_board(struct class_device *cdev, char *buf)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
+}
+
+static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct class_device_attribute *c2_class_attributes[] = {
+	&class_device_attr_hw_rev,
+	&class_device_attr_fw_ver,
+	&class_device_attr_hca_type,
+	&class_device_attr_board_id
+};
+
+static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+			int attr_mask, struct ib_udata *udata)
+{
+	int err;
+
+	err =
+	    c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
+			 attr_mask);
+
+	return err;
+}
+
+static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_process_mad(struct ib_device *ibdev,
+			  int mad_flags,
+			  u8 port_num,
+			  struct ib_wc *in_wc,
+			  struct ib_grh *in_grh,
+			  struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* Request a connection */
+	return c2_llp_connect(cm_id, iw_param);
+}
+
+static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* Accept the new connection */
+	return c2_llp_accept(cm_id, iw_param);
+}
+
+static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_llp_reject(cm_id, pdata, pdata_len);
+	return err;
+}
+
+static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	err = c2_llp_service_create(cm_id, backlog);
+	pr_debug("%s:%u err=%d\n",
+		__FUNCTION__, __LINE__,
+		err);
+	return err;
+}
+
+static int c2_service_destroy(struct iw_cm_id *cm_id)
+{
+	int err;
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_llp_service_destroy(cm_id);
+
+	return err;
+}
+
+static int c2_pseudo_up(struct net_device *netdev)
+{
+	struct in_device *ind;
+	struct c2_dev *c2dev = netdev->priv;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	pr_debug("adding...\n");
+	for_ifa(ind) {
+#ifdef DEBUG
+		u8 *ip = (u8 *) & ifa->ifa_address;
+
+		pr_debug("%s: %d.%d.%d.%d\n",
+		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+		c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+
+	return 0;
+}
+
+static int c2_pseudo_down(struct net_device *netdev)
+{
+	struct in_device *ind;
+	struct c2_dev *c2dev = netdev->priv;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	pr_debug("deleting...\n");
+	for_ifa(ind) {
+#ifdef DEBUG
+		u8 *ip = (u8 *) & ifa->ifa_address;
+
+		pr_debug("%s: %d.%d.%d.%d\n",
+		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+		c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+
+	return 0;
+}
+
+static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	int ret = 0;
+
+	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+		return -EINVAL;
+
+	netdev->mtu = new_mtu;
+
+	/* TODO: Tell rnic about new rmda interface mtu */
+	return ret;
+}
+
+static void setup(struct net_device *netdev)
+{
+	SET_MODULE_OWNER(netdev);
+	netdev->open = c2_pseudo_up;
+	netdev->stop = c2_pseudo_down;
+	netdev->hard_start_xmit = c2_pseudo_xmit_frame;
+	netdev->get_stats = NULL;
+	netdev->tx_timeout = NULL;
+	netdev->set_mac_address = NULL;
+	netdev->change_mtu = c2_pseudo_change_mtu;
+	netdev->watchdog_timeo = 0;
+	netdev->type = ARPHRD_ETHER;
+	netdev->mtu = 1500;
+	netdev->hard_header_len = ETH_HLEN;
+	netdev->addr_len = ETH_ALEN;
+	netdev->tx_queue_len = 0;
+	netdev->flags |= IFF_NOARP;
+	return;
+}
+
+static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
+{
+	char name[IFNAMSIZ];
+	struct net_device *netdev;
+
+	/* change ethxxx to iwxxx */
+	strcpy(name, "iw");
+	strcat(name, &c2dev->netdev->name[3]);
+	netdev = alloc_netdev(sizeof(*netdev), name, setup);
+	if (!netdev) {
+		printk(KERN_ERR PFX "%s -  etherdev alloc failed",
+			__FUNCTION__);
+		return NULL;
+	}
+
+	netdev->priv = c2dev;
+
+	SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+	memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
+
+	/* Print out the MAC address */
+	pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n",
+		netdev->name,
+		netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+		netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
+
+#if 0
+	/* Disable network packets */
+	netif_stop_queue(netdev);
+#endif
+	return netdev;
+}
+
+int c2_register_device(struct c2_dev *dev)
+{
+	int ret;
+	int i;
+
+	/* Register pseudo network device */
+	dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
+	if (dev->pseudo_netdev) {
+		ret = register_netdev(dev->pseudo_netdev);
+		if (ret) {
+			printk(KERN_ERR PFX
+				"Unable to register netdev, ret = %d\n", ret);
+			free_netdev(dev->pseudo_netdev);
+			return ret;
+		}
+	}
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
+	dev->ibdev.owner = THIS_MODULE;
+	dev->ibdev.uverbs_cmd_mask =
+	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+	    (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_REG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+	    (1ull << IB_USER_VERBS_CMD_POST_RECV);
+
+	dev->ibdev.node_type = RDMA_NODE_RNIC;
+	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
+	memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
+	dev->ibdev.phys_port_cnt = 1;
+	dev->ibdev.dma_device = &dev->pcidev->dev;
+	dev->ibdev.class_dev.dev = &dev->pcidev->dev;
+	dev->ibdev.query_device = c2_query_device;
+	dev->ibdev.query_port = c2_query_port;
+	dev->ibdev.modify_port = c2_modify_port;
+	dev->ibdev.query_pkey = c2_query_pkey;
+	dev->ibdev.query_gid = c2_query_gid;
+	dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
+	dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
+	dev->ibdev.mmap = c2_mmap_uar;
+	dev->ibdev.alloc_pd = c2_alloc_pd;
+	dev->ibdev.dealloc_pd = c2_dealloc_pd;
+	dev->ibdev.create_ah = c2_ah_create;
+	dev->ibdev.destroy_ah = c2_ah_destroy;
+	dev->ibdev.create_qp = c2_create_qp;
+	dev->ibdev.modify_qp = c2_modify_qp;
+	dev->ibdev.destroy_qp = c2_destroy_qp;
+	dev->ibdev.create_cq = c2_create_cq;
+	dev->ibdev.destroy_cq = c2_destroy_cq;
+	dev->ibdev.poll_cq = c2_poll_cq;
+	dev->ibdev.get_dma_mr = c2_get_dma_mr;
+	dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
+	dev->ibdev.reg_user_mr = c2_reg_user_mr;
+	dev->ibdev.dereg_mr = c2_dereg_mr;
+
+	dev->ibdev.alloc_fmr = NULL;
+	dev->ibdev.unmap_fmr = NULL;
+	dev->ibdev.dealloc_fmr = NULL;
+	dev->ibdev.map_phys_fmr = NULL;
+
+	dev->ibdev.attach_mcast = c2_multicast_attach;
+	dev->ibdev.detach_mcast = c2_multicast_detach;
+	dev->ibdev.process_mad = c2_process_mad;
+
+	dev->ibdev.req_notify_cq = c2_arm_cq;
+	dev->ibdev.post_send = c2_post_send;
+	dev->ibdev.post_recv = c2_post_receive;
+
+	dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
+	dev->ibdev.iwcm->add_ref = c2_add_ref;
+	dev->ibdev.iwcm->rem_ref = c2_rem_ref;
+	dev->ibdev.iwcm->get_qp = c2_get_qp;
+	dev->ibdev.iwcm->connect = c2_connect;
+	dev->ibdev.iwcm->accept = c2_accept;
+	dev->ibdev.iwcm->reject = c2_reject;
+	dev->ibdev.iwcm->create_listen = c2_service_create;
+	dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
+
+	ret = ib_register_device(&dev->ibdev);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(c2_class_attributes); ++i) {
+		ret = class_device_create_file(&dev->ibdev.class_dev,
+					       c2_class_attributes[i]);
+		if (ret) {
+			unregister_netdev(dev->pseudo_netdev);
+			free_netdev(dev->pseudo_netdev);
+			ib_unregister_device(&dev->ibdev);
+			return ret;
+		}
+	}
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return 0;
+}
+
+void c2_unregister_device(struct c2_dev *dev)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	unregister_netdev(dev->pseudo_netdev);
+	free_netdev(dev->pseudo_netdev);
+	ib_unregister_device(&dev->ibdev);
+}
--- a/drivers/infiniband/hw/amso1100/c2_provider.h
+++ b/drivers/infiniband/hw/amso1100/c2_provider.h
@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_PROVIDER_H
+#define C2_PROVIDER_H
+#include <linux/inetdevice.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+
+#include "c2_mq.h"
+#include <rdma/iw_cm.h>
+
+#define C2_MPT_FLAG_ATOMIC        (1 << 14)
+#define C2_MPT_FLAG_REMOTE_WRITE  (1 << 13)
+#define C2_MPT_FLAG_REMOTE_READ   (1 << 12)
+#define C2_MPT_FLAG_LOCAL_WRITE   (1 << 11)
+#define C2_MPT_FLAG_LOCAL_READ    (1 << 10)
+
+struct c2_buf_list {
+	void *buf;
+	 DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+
+/* The user context keeps track of objects allocated for a
+ * particular user-mode client. */
+struct c2_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
+struct c2_mtt;
+
+/* All objects associated with a PD are kept in the
+ * associated user context if present.
+ */
+struct c2_pd {
+	struct ib_pd ibpd;
+	u32 pd_id;
+};
+
+struct c2_mr {
+	struct ib_mr ibmr;
+	struct c2_pd *pd;
+};
+
+struct c2_av;
+
+enum c2_ah_type {
+	C2_AH_ON_HCA,
+	C2_AH_PCI_POOL,
+	C2_AH_KMALLOC
+};
+
+struct c2_ah {
+	struct ib_ah ibah;
+};
+
+struct c2_cq {
+	struct ib_cq ibcq;
+	spinlock_t lock;
+	atomic_t refcount;
+	int cqn;
+	int is_kernel;
+	wait_queue_head_t wait;
+
+	u32 adapter_handle;
+	struct c2_mq mq;
+};
+
+struct c2_wq {
+	spinlock_t lock;
+};
+struct iw_cm_id;
+struct c2_qp {
+	struct ib_qp ibqp;
+	struct iw_cm_id *cm_id;
+	spinlock_t lock;
+	atomic_t refcount;
+	wait_queue_head_t wait;
+	int qpn;
+
+	u32 adapter_handle;
+	u32 send_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u8 state;
+
+	struct c2_mq sq_mq;
+	struct c2_mq rq_mq;
+};
+
+struct c2_cr_query_attrs {
+	u32 local_addr;
+	u32 remote_addr;
+	u16 local_port;
+	u16 remote_port;
+};
+
+static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct c2_pd, ibpd);
+}
+
+static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
+{
+	return container_of(ibucontext, struct c2_ucontext, ibucontext);
+}
+
+static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct c2_mr, ibmr);
+}
+
+
+static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
+{
+	return container_of(ibah, struct c2_ah, ibah);
+}
+
+static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct c2_cq, ibcq);
+}
+
+static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct c2_qp, ibqp);
+}
+
+static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
+{
+	struct in_device *ind;
+	int ret = 0;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	for_ifa(ind) {
+		if (ifa->ifa_address == addr) {
+			ret = 1;
+			break;
+		}
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+	return ret;
+}
+#endif				/* C2_PROVIDER_H */
--- a/drivers/infiniband/hw/amso1100/c2_qp.c
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@ -0,0 +1,975 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_MAX_ORD_PER_QP 128
+#define C2_MAX_IRD_PER_QP 128
+
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+#define NO_SUPPORT -1
+static const u8 c2_opcode[] = {
+	[IB_WR_SEND] = C2_WR_TYPE_SEND,
+	[IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
+	[IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
+	[IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
+	[IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
+	[IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
+	[IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
+};
+
+static int to_c2_state(enum ib_qp_state ib_state)
+{
+	switch (ib_state) {
+	case IB_QPS_RESET:
+		return C2_QP_STATE_IDLE;
+	case IB_QPS_RTS:
+		return C2_QP_STATE_RTS;
+	case IB_QPS_SQD:
+		return C2_QP_STATE_CLOSING;
+	case IB_QPS_SQE:
+		return C2_QP_STATE_CLOSING;
+	case IB_QPS_ERR:
+		return C2_QP_STATE_ERROR;
+	default:
+		return -1;
+	}
+}
+
+static int to_ib_state(enum c2_qp_state c2_state)
+{
+	switch (c2_state) {
+	case C2_QP_STATE_IDLE:
+		return IB_QPS_RESET;
+	case C2_QP_STATE_CONNECTING:
+		return IB_QPS_RTR;
+	case C2_QP_STATE_RTS:
+		return IB_QPS_RTS;
+	case C2_QP_STATE_CLOSING:
+		return IB_QPS_SQD;
+	case C2_QP_STATE_ERROR:
+		return IB_QPS_ERR;
+	case C2_QP_STATE_TERMINATE:
+		return IB_QPS_SQE;
+	default:
+		return -1;
+	}
+}
+
+static const char *to_ib_state_str(int ib_state)
+{
+	static const char *state_str[] = {
+		"IB_QPS_RESET",
+		"IB_QPS_INIT",
+		"IB_QPS_RTR",
+		"IB_QPS_RTS",
+		"IB_QPS_SQD",
+		"IB_QPS_SQE",
+		"IB_QPS_ERR"
+	};
+	if (ib_state < IB_QPS_RESET ||
+	    ib_state > IB_QPS_ERR)
+		return "<invalid IB QP state>";
+
+	ib_state -= IB_QPS_RESET;
+	return state_str[ib_state];
+}
+
+void c2_set_qp_state(struct c2_qp *qp, int c2_state)
+{
+	int new_state = to_ib_state(c2_state);
+
+	pr_debug("%s: qp[%p] state modify %s --> %s\n",
+	       __FUNCTION__,
+		qp,
+		to_ib_state_str(qp->state),
+		to_ib_state_str(new_state));
+	qp->state = new_state;
+}
+
+#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
+
+int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+		 struct ib_qp_attr *attr, int attr_mask)
+{
+	struct c2wr_qp_modify_req wr;
+	struct c2wr_qp_modify_rep *reply;
+	struct c2_vq_req *vq_req;
+	unsigned long flags;
+	u8 next_state;
+	int err;
+
+	pr_debug("%s:%d qp=%p, %s --> %s\n",
+		__FUNCTION__, __LINE__,
+		qp,
+		to_ib_state_str(qp->state),
+		to_ib_state_str(attr->qp_state));
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+	wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+	if (attr_mask & IB_QP_STATE) {
+		/* Ensure the state is valid */
+		if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
+			return -EINVAL;
+
+		wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
+
+		if (attr->qp_state == IB_QPS_ERR) {
+			spin_lock_irqsave(&qp->lock, flags);
+			if (qp->cm_id && qp->state == IB_QPS_RTS) {
+				pr_debug("Generating CLOSE event for QP-->ERR, "
+					"qp=%p, cm_id=%p\n",qp,qp->cm_id);
+				/* Generate an CLOSE event */
+				vq_req->cm_id = qp->cm_id;
+				vq_req->event = IW_CM_EVENT_CLOSE;
+			}
+			spin_unlock_irqrestore(&qp->lock, flags);
+		}
+		next_state =  attr->qp_state;
+
+	} else if (attr_mask & IB_QP_CUR_STATE) {
+
+		if (attr->cur_qp_state != IB_QPS_RTR &&
+		    attr->cur_qp_state != IB_QPS_RTS &&
+		    attr->cur_qp_state != IB_QPS_SQD &&
+		    attr->cur_qp_state != IB_QPS_SQE)
+			return -EINVAL;
+		else
+			wr.next_qp_state =
+			    cpu_to_be32(to_c2_state(attr->cur_qp_state));
+
+		next_state = attr->cur_qp_state;
+
+	} else {
+		err = 0;
+		goto bail0;
+	}
+
+	/* reference the request struct */
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+	if (!err)
+		qp->state = next_state;
+#ifdef DEBUG
+	else
+		pr_debug("%s: c2_errno=%d\n", __FUNCTION__, err);
+#endif
+	/*
+	 * If we're going to error and generating the event here, then
+	 * we need to remove the reference because there will be no
+	 * close event generated by the adapter
+	*/
+	spin_lock_irqsave(&qp->lock, flags);
+	if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
+		qp->cm_id->rem_ref(qp->cm_id);
+		qp->cm_id = NULL;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+
+	pr_debug("%s:%d qp=%p, cur_state=%s\n",
+		__FUNCTION__, __LINE__,
+		qp,
+		to_ib_state_str(qp->state));
+	return err;
+}
+
+int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+			  int ord, int ird)
+{
+	struct c2wr_qp_modify_req wr;
+	struct c2wr_qp_modify_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+	wr.ord = cpu_to_be32(ord);
+	wr.ird = cpu_to_be32(ird);
+	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+	/* reference the request struct */
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	reply = (struct c2wr_qp_modify_rep *) (unsigned long)
+		vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_qp_destroy_req wr;
+	struct c2wr_qp_destroy_rep *reply;
+	unsigned long flags;
+	int err;
+
+	/*
+	 * Allocate a verb request message
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		return -ENOMEM;
+	}
+
+	/*
+	 * Initialize the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_QP_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	spin_lock_irqsave(&qp->lock, flags);
+	if (qp->cm_id && qp->state == IB_QPS_RTS) {
+		pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
+			"qp=%p, cm_id=%p\n",qp,qp->cm_id);
+		/* Generate an CLOSE event */
+		vq_req->qp = qp;
+		vq_req->cm_id = qp->cm_id;
+		vq_req->event = IW_CM_EVENT_CLOSE;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	spin_lock_irqsave(&qp->lock, flags);
+	if (qp->cm_id) {
+		qp->cm_id->rem_ref(qp->cm_id);
+		qp->cm_id = NULL;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	int ret;
+
+        do {
+		spin_lock_irq(&c2dev->qp_table.lock);
+		ret = idr_get_new_above(&c2dev->qp_table.idr, qp,
+					c2dev->qp_table.last++, &qp->qpn);
+		spin_unlock_irq(&c2dev->qp_table.lock);
+        } while ((ret == -EAGAIN) &&
+	 	 idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL));
+	return ret;
+}
+
+static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
+{
+	spin_lock_irq(&c2dev->qp_table.lock);
+	idr_remove(&c2dev->qp_table.idr, qpn);
+	spin_unlock_irq(&c2dev->qp_table.lock);
+}
+
+struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
+{
+	unsigned long flags;
+	struct c2_qp *qp;
+
+	spin_lock_irqsave(&c2dev->qp_table.lock, flags);
+	qp = idr_find(&c2dev->qp_table.idr, qpn);
+	spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
+	return qp;
+}
+
+int c2_alloc_qp(struct c2_dev *c2dev,
+		struct c2_pd *pd,
+		struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
+{
+	struct c2wr_qp_create_req wr;
+	struct c2wr_qp_create_rep *reply;
+	struct c2_vq_req *vq_req;
+	struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
+	struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
+	unsigned long peer_pa;
+	u32 q_size, msg_size, mmap_size;
+	void __iomem *mmap;
+	int err;
+
+	err = c2_alloc_qpn(c2dev, qp);
+	if (err)
+		return err;
+	qp->ibqp.qp_num = qp->qpn;
+	qp->ibqp.qp_type = IB_QPT_RC;
+
+	/* Allocate the SQ and RQ shared pointers */
+	qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					 &qp->sq_mq.shared_dma, GFP_KERNEL);
+	if (!qp->sq_mq.shared) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					 &qp->rq_mq.shared_dma, GFP_KERNEL);
+	if (!qp->rq_mq.shared) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	/* Allocate the verbs request */
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	/* Initialize the work request */
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_QP_CREATE);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.sq_cq_handle = send_cq->adapter_handle;
+	wr.rq_cq_handle = recv_cq->adapter_handle;
+	wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
+	wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
+	wr.srq_handle = 0;
+	wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
+			       QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
+	wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+	wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
+	wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+	wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
+	wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
+	wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
+	wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
+	wr.pd_id = pd->pd_id;
+	wr.user_context = (unsigned long) qp;
+
+	vq_req_get(c2dev, vq_req);
+
+	/* Send the WR to the adapter */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail3;
+	}
+
+	/* Wait for the verb reply  */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail3;
+	}
+
+	/* Process the reply */
+	reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail3;
+	}
+
+	if ((err = c2_wr_get_result(reply)) != 0) {
+		goto bail4;
+	}
+
+	/* Fill in the kernel QP struct */
+	atomic_set(&qp->refcount, 1);
+	qp->adapter_handle = reply->qp_handle;
+	qp->state = IB_QPS_RESET;
+	qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
+	qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
+	qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
+
+	/* Initialize the SQ MQ */
+	q_size = be32_to_cpu(reply->sq_depth);
+	msg_size = be32_to_cpu(reply->sq_msg_size);
+	peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
+	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+	mmap = ioremap_nocache(peer_pa, mmap_size);
+	if (!mmap) {
+		err = -ENOMEM;
+		goto bail5;
+	}
+
+	c2_mq_req_init(&qp->sq_mq,
+		       be32_to_cpu(reply->sq_mq_index),
+		       q_size,
+		       msg_size,
+		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
+		       mmap,				/* peer */
+		       C2_MQ_ADAPTER_TARGET);
+
+	/* Initialize the RQ mq */
+	q_size = be32_to_cpu(reply->rq_depth);
+	msg_size = be32_to_cpu(reply->rq_msg_size);
+	peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
+	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+	mmap = ioremap_nocache(peer_pa, mmap_size);
+	if (!mmap) {
+		err = -ENOMEM;
+		goto bail6;
+	}
+
+	c2_mq_req_init(&qp->rq_mq,
+		       be32_to_cpu(reply->rq_mq_index),
+		       q_size,
+		       msg_size,
+		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
+		       mmap,				/* peer */
+		       C2_MQ_ADAPTER_TARGET);
+
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	return 0;
+
+      bail6:
+	iounmap(qp->sq_mq.peer);
+      bail5:
+	destroy_qp(c2dev, qp);
+      bail4:
+	vq_repbuf_free(c2dev, reply);
+      bail3:
+	vq_req_free(c2dev, vq_req);
+      bail2:
+	c2_free_mqsp(qp->rq_mq.shared);
+      bail1:
+	c2_free_mqsp(qp->sq_mq.shared);
+      bail0:
+	c2_free_qpn(c2dev, qp->qpn);
+	return err;
+}
+
+void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	struct c2_cq *send_cq;
+	struct c2_cq *recv_cq;
+
+	send_cq = to_c2cq(qp->ibqp.send_cq);
+	recv_cq = to_c2cq(qp->ibqp.recv_cq);
+
+	/*
+	 * Lock CQs here, so that CQ polling code can do QP lookup
+	 * without taking a lock.
+	 */
+	spin_lock_irq(&send_cq->lock);
+	if (send_cq != recv_cq)
+		spin_lock(&recv_cq->lock);
+
+	c2_free_qpn(c2dev, qp->qpn);
+
+	if (send_cq != recv_cq)
+		spin_unlock(&recv_cq->lock);
+	spin_unlock_irq(&send_cq->lock);
+
+	/*
+	 * Destory qp in the rnic...
+	 */
+	destroy_qp(c2dev, qp);
+
+	/*
+	 * Mark any unreaped CQEs as null and void.
+	 */
+	c2_cq_clean(c2dev, qp, send_cq->cqn);
+	if (send_cq != recv_cq)
+		c2_cq_clean(c2dev, qp, recv_cq->cqn);
+	/*
+	 * Unmap the MQs and return the shared pointers
+	 * to the message pool.
+	 */
+	iounmap(qp->sq_mq.peer);
+	iounmap(qp->rq_mq.peer);
+	c2_free_mqsp(qp->sq_mq.shared);
+	c2_free_mqsp(qp->rq_mq.shared);
+
+	atomic_dec(&qp->refcount);
+	wait_event(qp->wait, !atomic_read(&qp->refcount));
+}
+
+/*
+ * Function: move_sgl
+ *
+ * Description:
+ * Move an SGL from the user's work request struct into a CCIL Work Request
+ * message, swapping to WR byte order and ensure the total length doesn't
+ * overflow.
+ *
+ * IN:
+ * dst		- ptr to CCIL Work Request message SGL memory.
+ * src		- ptr to the consumers SGL memory.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int
+move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
+	 u8 * actual_count)
+{
+	u32 tot = 0;		/* running total */
+	u8 acount = 0;		/* running total non-0 len sge's */
+
+	while (count > 0) {
+		/*
+		 * If the addition of this SGE causes the
+		 * total SGL length to exceed 2^32-1, then
+		 * fail-n-bail.
+		 *
+		 * If the current total plus the next element length
+		 * wraps, then it will go negative and be less than the
+		 * current total...
+		 */
+		if ((tot + src->length) < tot) {
+			return -EINVAL;
+		}
+		/*
+		 * Bug: 1456 (as well as 1498 & 1643)
+		 * Skip over any sge's supplied with len=0
+		 */
+		if (src->length) {
+			tot += src->length;
+			dst->stag = cpu_to_be32(src->lkey);
+			dst->to = cpu_to_be64(src->addr);
+			dst->length = cpu_to_be32(src->length);
+			dst++;
+			acount++;
+		}
+		src++;
+		count--;
+	}
+
+	if (acount == 0) {
+		/*
+		 * Bug: 1476 (as well as 1498, 1456 and 1643)
+		 * Setup the SGL in the WR to make it easier for the RNIC.
+		 * This way, the FW doesn't have to deal with special cases.
+		 * Setting length=0 should be sufficient.
+		 */
+		dst->stag = 0;
+		dst->to = 0;
+		dst->length = 0;
+	}
+
+	*p_len = tot;
+	*actual_count = acount;
+	return 0;
+}
+
+/*
+ * Function: c2_activity (private function)
+ *
+ * Description:
+ * Post an mq index to the host->adapter activity fifo.
+ *
+ * IN:
+ * c2dev	- ptr to c2dev structure
+ * mq_index	- mq index to post
+ * shared	- value most recently written to shared
+ *
+ * OUT:
+ *
+ * Return:
+ * none
+ */
+static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
+{
+	/*
+	 * First read the register to see if the FIFO is full, and if so,
+	 * spin until it's not.  This isn't perfect -- there is no
+	 * synchronization among the clients of the register, but in
+	 * practice it prevents multiple CPU from hammering the bus
+	 * with PCI RETRY. Note that when this does happen, the card
+	 * cannot get on the bus and the card and system hang in a
+	 * deadlock -- thus the need for this code. [TOT]
+	 */
+	while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(0);
+	}
+
+	__raw_writel(C2_HINT_MAKE(mq_index, shared),
+		     c2dev->regs + PCI_BAR0_ADAPTER_HINT);
+}
+
+/*
+ * Function: qp_wr_post
+ *
+ * Description:
+ * This in-line function allocates a MQ msg, then moves the host-copy of
+ * the completed WR into msg.  Then it posts the message.
+ *
+ * IN:
+ * q		- ptr to user MQ.
+ * wr		- ptr to host-copy of the WR.
+ * qp		- ptr to user qp
+ * size		- Number of bytes to post.  Assumed to be divisible by 4.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
+{
+	union c2wr *msg;
+
+	msg = c2_mq_alloc(q);
+	if (msg == NULL) {
+		return -EINVAL;
+	}
+#ifdef CCMSGMAGIC
+	((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+
+	/*
+	 * Since all header fields in the WR are the same as the
+	 * CQE, set the following so the adapter need not.
+	 */
+	c2_wr_set_result(wr, CCERR_PENDING);
+
+	/*
+	 * Copy the wr down to the adapter
+	 */
+	memcpy((void *) msg, (void *) wr, size);
+
+	c2_mq_produce(q);
+	return 0;
+}
+
+
+int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+		 struct ib_send_wr **bad_wr)
+{
+	struct c2_dev *c2dev = to_c2dev(ibqp->device);
+	struct c2_qp *qp = to_c2qp(ibqp);
+	union c2wr wr;
+	int err = 0;
+
+	u32 flags;
+	u32 tot_len;
+	u8 actual_sge_count;
+	u32 msg_size;
+
+	if (qp->state > IB_QPS_RTS)
+		return -EINVAL;
+
+	while (ib_wr) {
+
+		flags = 0;
+		wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+		if (ib_wr->send_flags & IB_SEND_SIGNALED) {
+			flags |= SQ_SIGNALED;
+		}
+
+		switch (ib_wr->opcode) {
+		case IB_WR_SEND:
+			if (ib_wr->send_flags & IB_SEND_SOLICITED) {
+				c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
+				msg_size = sizeof(struct c2wr_send_req);
+			} else {
+				c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
+				msg_size = sizeof(struct c2wr_send_req);
+			}
+
+			wr.sqwr.send.remote_stag = 0;
+			msg_size += sizeof(struct c2_data_addr) * ib_wr->num_sge;
+			if (ib_wr->num_sge > qp->send_sgl_depth) {
+				err = -EINVAL;
+				break;
+			}
+			if (ib_wr->send_flags & IB_SEND_FENCE) {
+				flags |= SQ_READ_FENCE;
+			}
+			err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
+				       ib_wr->sg_list,
+				       ib_wr->num_sge,
+				       &tot_len, &actual_sge_count);
+			wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
+			c2_wr_set_sge_count(&wr, actual_sge_count);
+			break;
+		case IB_WR_RDMA_WRITE:
+			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
+			msg_size = sizeof(struct c2wr_rdma_write_req) +
+			    (sizeof(struct c2_data_addr) * ib_wr->num_sge);
+			if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
+				err = -EINVAL;
+				break;
+			}
+			if (ib_wr->send_flags & IB_SEND_FENCE) {
+				flags |= SQ_READ_FENCE;
+			}
+			wr.sqwr.rdma_write.remote_stag =
+			    cpu_to_be32(ib_wr->wr.rdma.rkey);
+			wr.sqwr.rdma_write.remote_to =
+			    cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+			err = move_sgl((struct c2_data_addr *)
+				       & (wr.sqwr.rdma_write.data),
+				       ib_wr->sg_list,
+				       ib_wr->num_sge,
+				       &tot_len, &actual_sge_count);
+			wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
+			c2_wr_set_sge_count(&wr, actual_sge_count);
+			break;
+		case IB_WR_RDMA_READ:
+			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
+			msg_size = sizeof(struct c2wr_rdma_read_req);
+
+			/* IWarp only suppots 1 sge for RDMA reads */
+			if (ib_wr->num_sge > 1) {
+				err = -EINVAL;
+				break;
+			}
+
+			/*
+			 * Move the local and remote stag/to/len into the WR.
+			 */
+			wr.sqwr.rdma_read.local_stag =
+			    cpu_to_be32(ib_wr->sg_list->lkey);
+			wr.sqwr.rdma_read.local_to =
+			    cpu_to_be64(ib_wr->sg_list->addr);
+			wr.sqwr.rdma_read.remote_stag =
+			    cpu_to_be32(ib_wr->wr.rdma.rkey);
+			wr.sqwr.rdma_read.remote_to =
+			    cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+			wr.sqwr.rdma_read.length =
+			    cpu_to_be32(ib_wr->sg_list->length);
+			break;
+		default:
+			/* error */
+			msg_size = 0;
+			err = -EINVAL;
+			break;
+		}
+
+		/*
+		 * If we had an error on the last wr build, then
+		 * break out.  Possible errors include bogus WR
+		 * type, and a bogus SGL length...
+		 */
+		if (err) {
+			break;
+		}
+
+		/*
+		 * Store flags
+		 */
+		c2_wr_set_flags(&wr, flags);
+
+		/*
+		 * Post the puppy!
+		 */
+		err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
+		if (err) {
+			break;
+		}
+
+		/*
+		 * Enqueue mq index to activity FIFO.
+		 */
+		c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
+
+		ib_wr = ib_wr->next;
+	}
+
+	if (err)
+		*bad_wr = ib_wr;
+	return err;
+}
+
+int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+		    struct ib_recv_wr **bad_wr)
+{
+	struct c2_dev *c2dev = to_c2dev(ibqp->device);
+	struct c2_qp *qp = to_c2qp(ibqp);
+	union c2wr wr;
+	int err = 0;
+
+	if (qp->state > IB_QPS_RTS)
+		return -EINVAL;
+
+	/*
+	 * Try and post each work request
+	 */
+	while (ib_wr) {
+		u32 tot_len;
+		u8 actual_sge_count;
+
+		if (ib_wr->num_sge > qp->recv_sgl_depth) {
+			err = -EINVAL;
+			break;
+		}
+
+		/*
+		 * Create local host-copy of the WR
+		 */
+		wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+		c2_wr_set_id(&wr, CCWR_RECV);
+		c2_wr_set_flags(&wr, 0);
+
+		/* sge_count is limited to eight bits. */
+		BUG_ON(ib_wr->num_sge >= 256);
+		err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
+			       ib_wr->sg_list,
+			       ib_wr->num_sge, &tot_len, &actual_sge_count);
+		c2_wr_set_sge_count(&wr, actual_sge_count);
+
+		/*
+		 * If we had an error on the last wr build, then
+		 * break out.  Possible errors include bogus WR
+		 * type, and a bogus SGL length...
+		 */
+		if (err) {
+			break;
+		}
+
+		err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
+		if (err) {
+			break;
+		}
+
+		/*
+		 * Enqueue mq index to activity FIFO
+		 */
+		c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
+
+		ib_wr = ib_wr->next;
+	}
+
+	if (err)
+		*bad_wr = ib_wr;
+	return err;
+}
+
+void __devinit c2_init_qp_table(struct c2_dev *c2dev)
+{
+	spin_lock_init(&c2dev->qp_table.lock);
+	idr_init(&c2dev->qp_table.idr);
+}
+
+void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev)
+{
+	idr_destroy(&c2dev->qp_table.idr);
+}
--- a/drivers/infiniband/hw/amso1100/c2_rnic.c
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@ -0,0 +1,663 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/inet.h>
+
+#include <linux/route.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_vq.h"
+
+/* Device capabilities */
+#define C2_MIN_PAGESIZE  1024
+
+#define C2_MAX_MRS       32768
+#define C2_MAX_QPS       16000
+#define C2_MAX_WQE_SZ    256
+#define C2_MAX_QP_WR     ((128*1024)/C2_MAX_WQE_SZ)
+#define C2_MAX_SGES      4
+#define C2_MAX_SGE_RD    1
+#define C2_MAX_CQS       32768
+#define C2_MAX_CQES      4096
+#define C2_MAX_PDS       16384
+
+/*
+ * Send the adapter INIT message to the amso1100
+ */
+static int c2_adapter_init(struct c2_dev *c2dev)
+{
+	struct c2wr_init_req wr;
+	int err;
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_INIT);
+	wr.hdr.context = 0;
+	wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
+	wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
+	wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
+	wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
+	wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
+	wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
+
+	/* Post the init message */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+
+	return err;
+}
+
+/*
+ * Send the adapter TERM message to the amso1100
+ */
+static void c2_adapter_term(struct c2_dev *c2dev)
+{
+	struct c2wr_init_req wr;
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_TERM);
+	wr.hdr.context = 0;
+
+	/* Post the init message */
+	vq_send_wr(c2dev, (union c2wr *) & wr);
+	c2dev->init = 0;
+
+	return;
+}
+
+/*
+ * Query the adapter
+ */
+static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_query_req wr;
+	struct c2wr_rnic_query_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply)
+		err = -ENOMEM;
+
+	err = c2_errno(reply);
+	if (err)
+		goto bail2;
+
+	props->fw_ver =
+		((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
+		((be32_to_cpu(reply->fw_ver_minor) && 0xFFFF) << 16) |
+		(be32_to_cpu(reply->fw_ver_patch) && 0xFFFF);
+	memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
+	props->max_mr_size         = 0xFFFFFFFF;
+	props->page_size_cap       = ~(C2_MIN_PAGESIZE-1);
+	props->vendor_id           = be32_to_cpu(reply->vendor_id);
+	props->vendor_part_id      = be32_to_cpu(reply->part_number);
+	props->hw_ver              = be32_to_cpu(reply->hw_version);
+	props->max_qp              = be32_to_cpu(reply->max_qps);
+	props->max_qp_wr           = be32_to_cpu(reply->max_qp_depth);
+	props->device_cap_flags    = c2dev->device_cap_flags;
+	props->max_sge             = C2_MAX_SGES;
+	props->max_sge_rd          = C2_MAX_SGE_RD;
+	props->max_cq              = be32_to_cpu(reply->max_cqs);
+	props->max_cqe             = be32_to_cpu(reply->max_cq_depth);
+	props->max_mr              = be32_to_cpu(reply->max_mrs);
+	props->max_pd              = be32_to_cpu(reply->max_pds);
+	props->max_qp_rd_atom      = be32_to_cpu(reply->max_qp_ird);
+	props->max_ee_rd_atom      = 0;
+	props->max_res_rd_atom     = be32_to_cpu(reply->max_global_ird);
+	props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
+	props->max_ee_init_rd_atom = 0;
+	props->atomic_cap          = IB_ATOMIC_NONE;
+	props->max_ee              = 0;
+	props->max_rdd             = 0;
+	props->max_mw              = be32_to_cpu(reply->max_mws);
+	props->max_raw_ipv6_qp     = 0;
+	props->max_raw_ethy_qp     = 0;
+	props->max_mcast_grp       = 0;
+	props->max_mcast_qp_attach = 0;
+	props->max_total_mcast_qp_attach = 0;
+	props->max_ah              = 0;
+	props->max_fmr             = 0;
+	props->max_map_per_fmr     = 0;
+	props->max_srq             = 0;
+	props->max_srq_wr          = 0;
+	props->max_srq_sge         = 0;
+	props->max_pkeys           = 0;
+	props->local_ca_ack_delay  = 0;
+
+ bail2:
+	vq_repbuf_free(c2dev, reply);
+
+ bail1:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Add an IP address to the RNIC interface
+ */
+int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_setconfig_req *wr;
+	struct c2wr_rnic_setconfig_rep *reply;
+	struct c2_netaddr netaddr;
+	int err, len;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	len = sizeof(struct c2_netaddr);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
+
+	netaddr.ip_addr = inaddr;
+	netaddr.netmask = inmask;
+	netaddr.mtu = 0;
+
+	memcpy(wr->data, &netaddr, len);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Delete an IP address from the RNIC interface
+ */
+int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_setconfig_req *wr;
+	struct c2wr_rnic_setconfig_rep *reply;
+	struct c2_netaddr netaddr;
+	int err, len;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	len = sizeof(struct c2_netaddr);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
+
+	netaddr.ip_addr = inaddr;
+	netaddr.netmask = inmask;
+	netaddr.mtu = 0;
+
+	memcpy(wr->data, &netaddr, len);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Open a single RNIC instance to use with all
+ * low level openib calls
+ */
+static int c2_rnic_open(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *vq_req;
+	union c2wr wr;
+	struct c2wr_rnic_open_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		return -ENOMEM;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
+	wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
+	wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
+	wr.rnic_open.req.port_num = cpu_to_be16(0);
+	wr.rnic_open.req.user_context = (unsigned long) c2dev;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	if ((err = c2_errno(reply)) != 0) {
+		goto bail1;
+	}
+
+	c2dev->adapter_handle = reply->rnic_handle;
+
+      bail1:
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Close the RNIC instance
+ */
+static int c2_rnic_close(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *vq_req;
+	union c2wr wr;
+	struct c2wr_rnic_close_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		return -ENOMEM;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
+	wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
+	wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	if ((err = c2_errno(reply)) != 0) {
+		goto bail1;
+	}
+
+	c2dev->adapter_handle = 0;
+
+      bail1:
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Called by c2_probe to initialize the RNIC. This principally
+ * involves initalizing the various limits and resouce pools that
+ * comprise the RNIC instance.
+ */
+int c2_rnic_init(struct c2_dev *c2dev)
+{
+	int err;
+	u32 qsize, msgsize;
+	void *q1_pages;
+	void *q2_pages;
+	void __iomem *mmio_regs;
+
+	/* Device capabilities */
+	c2dev->device_cap_flags =
+	    (IB_DEVICE_RESIZE_MAX_WR |
+	     IB_DEVICE_CURR_QP_STATE_MOD |
+	     IB_DEVICE_SYS_IMAGE_GUID |
+	     IB_DEVICE_ZERO_STAG |
+	     IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+
+	/* Allocate the qptr_array */
+	c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
+	if (!c2dev->qptr_array) {
+		return -ENOMEM;
+	}
+
+	/* Inialize the qptr_array */
+	memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *));
+	c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
+	c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
+	c2dev->qptr_array[2] = (void *) &c2dev->aeq;
+
+	/* Initialize data structures */
+	init_waitqueue_head(&c2dev->req_vq_wo);
+	spin_lock_init(&c2dev->vqlock);
+	spin_lock_init(&c2dev->lock);
+
+	/* Allocate MQ shared pointer pool for kernel clients. User
+	 * mode client pools are hung off the user context
+	 */
+	err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
+	if (err) {
+		goto bail0;
+	}
+
+	/* Allocate shared pointers for Q0, Q1, and Q2 from
+	 * the shared pointer pool.
+	 */
+
+	c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->hint_count_dma,
+					     GFP_KERNEL);
+	c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->req_vq.shared_dma,
+					     GFP_KERNEL);
+	c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->rep_vq.shared_dma,
+					     GFP_KERNEL);
+	c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					  &c2dev->aeq.shared_dma, GFP_KERNEL);
+	if (!c2dev->hint_count || !c2dev->req_vq.shared ||
+	    !c2dev->rep_vq.shared || !c2dev->aeq.shared) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	mmio_regs = c2dev->kva;
+	/* Initialize the Verbs Request Queue */
+	c2_mq_req_init(&c2dev->req_vq, 0,
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_QSIZE)),
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_SHARED)),
+		       C2_MQ_ADAPTER_TARGET);
+
+	/* Initialize the Verbs Reply Queue */
+	qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_QSIZE));
+	msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
+	q1_pages = kmalloc(qsize * msgsize, GFP_KERNEL);
+	if (!q1_pages) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	c2dev->rep_vq.host_dma = dma_map_single(c2dev->ibdev.dma_device,
+					        (void *)q1_pages, qsize * msgsize,
+				      		DMA_FROM_DEVICE);
+	pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
+	pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages,
+		 (u64)c2dev->rep_vq.host_dma);
+	c2_mq_rep_init(&c2dev->rep_vq,
+		   1,
+		   qsize,
+		   msgsize,
+		   q1_pages,
+		   mmio_regs +
+		   be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_SHARED)),
+		   C2_MQ_HOST_TARGET);
+
+	/* Initialize the Asynchronus Event Queue */
+	qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_QSIZE));
+	msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
+	q2_pages = kmalloc(qsize * msgsize, GFP_KERNEL);
+	if (!q2_pages) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+	c2dev->aeq.host_dma = dma_map_single(c2dev->ibdev.dma_device,
+					        (void *)q2_pages, qsize * msgsize,
+				      		DMA_FROM_DEVICE);
+	pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
+	pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages,
+		 (u64)c2dev->rep_vq.host_dma);
+	c2_mq_rep_init(&c2dev->aeq,
+		       2,
+		       qsize,
+		       msgsize,
+		       q2_pages,
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_SHARED)),
+		       C2_MQ_HOST_TARGET);
+
+	/* Initialize the verbs request allocator */
+	err = vq_init(c2dev);
+	if (err)
+		goto bail3;
+
+	/* Enable interrupts on the adapter */
+	writel(0, c2dev->regs + C2_IDIS);
+
+	/* create the WR init message */
+	err = c2_adapter_init(c2dev);
+	if (err)
+		goto bail4;
+	c2dev->init++;
+
+	/* open an adapter instance */
+	err = c2_rnic_open(c2dev);
+	if (err)
+		goto bail4;
+
+	/* Initialize cached the adapter limits */
+	if (c2_rnic_query(c2dev, &c2dev->props))
+		goto bail5;
+
+	/* Initialize the PD pool */
+	err = c2_init_pd_table(c2dev);
+	if (err)
+		goto bail5;
+
+	/* Initialize the QP pool */
+	c2_init_qp_table(c2dev);
+	return 0;
+
+      bail5:
+	c2_rnic_close(c2dev);
+      bail4:
+	vq_term(c2dev);
+      bail3:
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->aeq, mapping),
+			 c2dev->aeq.q_size * c2dev->aeq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(q2_pages);
+      bail2:
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->rep_vq, mapping),
+			 c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(q1_pages);
+      bail1:
+	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+      bail0:
+	vfree(c2dev->qptr_array);
+
+	return err;
+}
+
+/*
+ * Called by c2_remove to cleanup the RNIC resources.
+ */
+void c2_rnic_term(struct c2_dev *c2dev)
+{
+
+	/* Close the open adapter instance */
+	c2_rnic_close(c2dev);
+
+	/* Send the TERM message to the adapter */
+	c2_adapter_term(c2dev);
+
+	/* Disable interrupts on the adapter */
+	writel(1, c2dev->regs + C2_IDIS);
+
+	/* Free the QP pool */
+	c2_cleanup_qp_table(c2dev);
+
+	/* Free the PD pool */
+	c2_cleanup_pd_table(c2dev);
+
+	/* Free the verbs request allocator */
+	vq_term(c2dev);
+
+	/* Unmap and free the asynchronus event queue */
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->aeq, mapping),
+			 c2dev->aeq.q_size * c2dev->aeq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(c2dev->aeq.msg_pool.host);
+
+	/* Unmap and free the verbs reply queue */
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->rep_vq, mapping),
+			 c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(c2dev->rep_vq.msg_pool.host);
+
+	/* Free the MQ shared pointer pool */
+	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+
+	/* Free the qptr_array */
+	vfree(c2dev->qptr_array);
+
+	return;
+}
--- a/drivers/infiniband/hw/amso1100/c2_status.h
+++ b/drivers/infiniband/hw/amso1100/c2_status.h
@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef	_C2_STATUS_H_
+#define _C2_STATUS_H_
+
+/*
+ * Verbs Status Codes
+ */
+enum c2_status {
+	C2_OK = 0,		/* This must be zero */
+	CCERR_INSUFFICIENT_RESOURCES = 1,
+	CCERR_INVALID_MODIFIER = 2,
+	CCERR_INVALID_MODE = 3,
+	CCERR_IN_USE = 4,
+	CCERR_INVALID_RNIC = 5,
+	CCERR_INTERRUPTED_OPERATION = 6,
+	CCERR_INVALID_EH = 7,
+	CCERR_INVALID_CQ = 8,
+	CCERR_CQ_EMPTY = 9,
+	CCERR_NOT_IMPLEMENTED = 10,
+	CCERR_CQ_DEPTH_TOO_SMALL = 11,
+	CCERR_PD_IN_USE = 12,
+	CCERR_INVALID_PD = 13,
+	CCERR_INVALID_SRQ = 14,
+	CCERR_INVALID_ADDRESS = 15,
+	CCERR_INVALID_NETMASK = 16,
+	CCERR_INVALID_QP = 17,
+	CCERR_INVALID_QP_STATE = 18,
+	CCERR_TOO_MANY_WRS_POSTED = 19,
+	CCERR_INVALID_WR_TYPE = 20,
+	CCERR_INVALID_SGL_LENGTH = 21,
+	CCERR_INVALID_SQ_DEPTH = 22,
+	CCERR_INVALID_RQ_DEPTH = 23,
+	CCERR_INVALID_ORD = 24,
+	CCERR_INVALID_IRD = 25,
+	CCERR_QP_ATTR_CANNOT_CHANGE = 26,
+	CCERR_INVALID_STAG = 27,
+	CCERR_QP_IN_USE = 28,
+	CCERR_OUTSTANDING_WRS = 29,
+	CCERR_STAG_IN_USE = 30,
+	CCERR_INVALID_STAG_INDEX = 31,
+	CCERR_INVALID_SGL_FORMAT = 32,
+	CCERR_ADAPTER_TIMEOUT = 33,
+	CCERR_INVALID_CQ_DEPTH = 34,
+	CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
+	CCERR_INVALID_EP = 36,
+	CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
+	CCERR_FLUSHED = 38,
+	CCERR_INVALID_WQE = 39,
+	CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
+	CCERR_REMOTE_TERMINATION_ERROR = 41,
+	CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
+	CCERR_ACCESS_VIOLATION = 43,
+	CCERR_INVALID_PD_ID = 44,
+	CCERR_WRAP_ERROR = 45,
+	CCERR_INV_STAG_ACCESS_ERROR = 46,
+	CCERR_ZERO_RDMA_READ_RESOURCES = 47,
+	CCERR_QP_NOT_PRIVILEGED = 48,
+	CCERR_STAG_STATE_NOT_INVALID = 49,
+	CCERR_INVALID_PAGE_SIZE = 50,
+	CCERR_INVALID_BUFFER_SIZE = 51,
+	CCERR_INVALID_PBE = 52,
+	CCERR_INVALID_FBO = 53,
+	CCERR_INVALID_LENGTH = 54,
+	CCERR_INVALID_ACCESS_RIGHTS = 55,
+	CCERR_PBL_TOO_BIG = 56,
+	CCERR_INVALID_VA = 57,
+	CCERR_INVALID_REGION = 58,
+	CCERR_INVALID_WINDOW = 59,
+	CCERR_TOTAL_LENGTH_TOO_BIG = 60,
+	CCERR_INVALID_QP_ID = 61,
+	CCERR_ADDR_IN_USE = 62,
+	CCERR_ADDR_NOT_AVAIL = 63,
+	CCERR_NET_DOWN = 64,
+	CCERR_NET_UNREACHABLE = 65,
+	CCERR_CONN_ABORTED = 66,
+	CCERR_CONN_RESET = 67,
+	CCERR_NO_BUFS = 68,
+	CCERR_CONN_TIMEDOUT = 69,
+	CCERR_CONN_REFUSED = 70,
+	CCERR_HOST_UNREACHABLE = 71,
+	CCERR_INVALID_SEND_SGL_DEPTH = 72,
+	CCERR_INVALID_RECV_SGL_DEPTH = 73,
+	CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
+	CCERR_INSUFFICIENT_PRIVILEGES = 75,
+	CCERR_STACK_ERROR = 76,
+	CCERR_INVALID_VERSION = 77,
+	CCERR_INVALID_MTU = 78,
+	CCERR_INVALID_IMAGE = 79,
+	CCERR_PENDING = 98,	/* not an error; user internally by adapter */
+	CCERR_DEFER = 99,	/* not an error; used internally by adapter */
+	CCERR_FAILED_WRITE = 100,
+	CCERR_FAILED_ERASE = 101,
+	CCERR_FAILED_VERIFICATION = 102,
+	CCERR_NOT_FOUND = 103,
+
+};
+
+/*
+ * CCAE_ACTIVE_CONNECT_RESULTS status result codes.
+ */
+enum c2_connect_status {
+	C2_CONN_STATUS_SUCCESS = C2_OK,
+	C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
+	C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
+	C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
+	C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
+	C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
+	C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
+	C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
+	C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
+	C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
+	C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
+};
+
+/*
+ * Flash programming status codes.
+ */
+enum c2_flash_status {
+	C2_FLASH_STATUS_SUCCESS = 0x0000,
+	C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
+	C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
+	C2_FLASH_STATUS_ECLBS = 0x0400,
+	C2_FLASH_STATUS_PSLBS = 0x0800,
+	C2_FLASH_STATUS_VPENS = 0x1000,
+};
+
+#endif				/* _C2_STATUS_H_ */
--- a/drivers/infiniband/hw/amso1100/c2_user.h
+++ b/drivers/infiniband/hw/amso1100/c2_user.h
@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_USER_H
+#define C2_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct c2_alloc_ucontext_resp {
+	__u32 qp_tab_size;
+	__u32 uarc_size;
+};
+
+struct c2_alloc_pd_resp {
+	__u32 pdn;
+	__u32 reserved;
+};
+
+struct c2_create_cq {
+	__u32 lkey;
+	__u32 pdn;
+	__u64 arm_db_page;
+	__u64 set_db_page;
+	__u32 arm_db_index;
+	__u32 set_db_index;
+};
+
+struct c2_create_cq_resp {
+	__u32 cqn;
+	__u32 reserved;
+};
+
+struct c2_create_qp {
+	__u32 lkey;
+	__u32 reserved;
+	__u64 sq_db_page;
+	__u64 rq_db_page;
+	__u32 sq_db_index;
+	__u32 rq_db_index;
+};
+
+#endif				/* C2_USER_H */
--- a/drivers/infiniband/hw/amso1100/c2_vq.c
+++ b/drivers/infiniband/hw/amso1100/c2_vq.c
@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "c2_vq.h"
+#include "c2_provider.h"
+
+/*
+ * Verbs Request Objects:
+ *
+ * VQ Request Objects are allocated by the kernel verbs handlers.
+ * They contain a wait object, a refcnt, an atomic bool indicating that the
+ * adapter has replied, and a copy of the verb reply work request.
+ * A pointer to the VQ Request Object is passed down in the context
+ * field of the work request message, and reflected back by the adapter
+ * in the verbs reply message.  The function handle_vq() in the interrupt
+ * path will use this pointer to:
+ * 	1) append a copy of the verbs reply message
+ * 	2) mark that the reply is ready
+ * 	3) wake up the kernel verbs handler blocked awaiting the reply.
+ *
+ *
+ * The kernel verbs handlers do a "get" to put a 2nd reference on the
+ * VQ Request object.  If the kernel verbs handler exits before the adapter
+ * can respond, this extra reference will keep the VQ Request object around
+ * until the adapter's reply can be processed.  The reason we need this is
+ * because a pointer to this object is stuffed into the context field of
+ * the verbs work request message, and reflected back in the reply message.
+ * It is used in the interrupt handler (handle_vq()) to wake up the appropriate
+ * kernel verb handler that is blocked awaiting the verb reply.
+ * So handle_vq() will do a "put" on the object when it's done accessing it.
+ * NOTE:  If we guarantee that the kernel verb handler will never bail before
+ *        getting the reply, then we don't need these refcnts.
+ *
+ *
+ * VQ Request objects are freed by the kernel verbs handlers only
+ * after the verb has been processed, or when the adapter fails and
+ * does not reply.
+ *
+ *
+ * Verbs Reply Buffers:
+ *
+ * VQ Reply bufs are local host memory copies of a
+ * outstanding Verb Request reply
+ * message.  The are always allocated by the kernel verbs handlers, and _may_ be
+ * freed by either the kernel verbs handler -or- the interrupt handler.  The
+ * kernel verbs handler _must_ free the repbuf, then free the vq request object
+ * in that order.
+ */
+
+int vq_init(struct c2_dev *c2dev)
+{
+	sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
+		(char) ('0' + c2dev->devnum));
+	c2dev->host_msg_cache =
+	    kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
+			      SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (c2dev->host_msg_cache == NULL) {
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void vq_term(struct c2_dev *c2dev)
+{
+	kmem_cache_destroy(c2dev->host_msg_cache);
+}
+
+/* vq_req_alloc - allocate a VQ Request Object and initialize it.
+ * The refcnt is set to 1.
+ */
+struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *r;
+
+	r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
+	if (r) {
+		init_waitqueue_head(&r->wait_object);
+		r->reply_msg = (u64) NULL;
+		r->event = 0;
+		r->cm_id = NULL;
+		r->qp = NULL;
+		atomic_set(&r->refcnt, 1);
+		atomic_set(&r->reply_ready, 0);
+	}
+	return r;
+}
+
+
+/* vq_req_free - free the VQ Request Object.  It is assumed the verbs handler
+ * has already free the VQ Reply Buffer if it existed.
+ */
+void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	r->reply_msg = (u64) NULL;
+	if (atomic_dec_and_test(&r->refcnt)) {
+		kfree(r);
+	}
+}
+
+/* vq_req_get - reference a VQ Request Object.  Done
+ * only in the kernel verbs handlers.
+ */
+void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	atomic_inc(&r->refcnt);
+}
+
+
+/* vq_req_put - dereference and potentially free a VQ Request Object.
+ *
+ * This is only called by handle_vq() on the
+ * interrupt when it is done processing
+ * a verb reply message.  If the associated
+ * kernel verbs handler has already bailed,
+ * then this put will actually free the VQ
+ * Request object _and_ the VQ Reply Buffer
+ * if it exists.
+ */
+void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	if (atomic_dec_and_test(&r->refcnt)) {
+		if (r->reply_msg != (u64) NULL)
+			vq_repbuf_free(c2dev,
+				       (void *) (unsigned long) r->reply_msg);
+		kfree(r);
+	}
+}
+
+
+/*
+ * vq_repbuf_alloc - allocate a VQ Reply Buffer.
+ */
+void *vq_repbuf_alloc(struct c2_dev *c2dev)
+{
+	return kmem_cache_alloc(c2dev->host_msg_cache, SLAB_ATOMIC);
+}
+
+/*
+ * vq_send_wr - post a verbs request message to the Verbs Request Queue.
+ * If a message is not available in the MQ, then block until one is available.
+ * NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
+ * When the adapter drains the Verbs Request Queue,
+ * it inserts MQ index 0 in to the
+ * adapter->host activity fifo and interrupts the host.
+ */
+int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
+{
+	void *msg;
+	wait_queue_t __wait;
+
+	/*
+	 * grab adapter vq lock
+	 */
+	spin_lock(&c2dev->vqlock);
+
+	/*
+	 * allocate msg
+	 */
+	msg = c2_mq_alloc(&c2dev->req_vq);
+
+	/*
+	 * If we cannot get a msg, then we'll wait
+	 * When a messages are available, the int handler will wake_up()
+	 * any waiters.
+	 */
+	while (msg == NULL) {
+		pr_debug("%s:%d no available msg in VQ, waiting...\n",
+		       __FUNCTION__, __LINE__);
+		init_waitqueue_entry(&__wait, current);
+		add_wait_queue(&c2dev->req_vq_wo, &__wait);
+		spin_unlock(&c2dev->vqlock);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (!c2_mq_full(&c2dev->req_vq)) {
+				break;
+			}
+			if (!signal_pending(current)) {
+				schedule_timeout(1 * HZ);	/* 1 second... */
+				continue;
+			}
+			set_current_state(TASK_RUNNING);
+			remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+			return -EINTR;
+		}
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+		spin_lock(&c2dev->vqlock);
+		msg = c2_mq_alloc(&c2dev->req_vq);
+	}
+
+	/*
+	 * copy wr into adapter msg
+	 */
+	memcpy(msg, wr, c2dev->req_vq.msg_size);
+
+	/*
+	 * post msg
+	 */
+	c2_mq_produce(&c2dev->req_vq);
+
+	/*
+	 * release adapter vq lock
+	 */
+	spin_unlock(&c2dev->vqlock);
+	return 0;
+}
+
+
+/*
+ * vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
+ */
+int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
+{
+	if (!wait_event_timeout(req->wait_object,
+				atomic_read(&req->reply_ready),
+				60*HZ))
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+/*
+ * vq_repbuf_free - Free a Verbs Reply Buffer.
+ */
+void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
+{
+	kmem_cache_free(c2dev->host_msg_cache, reply);
+}
--- a/drivers/infiniband/hw/amso1100/c2_vq.h
+++ b/drivers/infiniband/hw/amso1100/c2_vq.h
@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_VQ_H_
+#define _C2_VQ_H_
+#include <linux/sched.h>
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_provider.h"
+
+struct c2_vq_req {
+	u64 reply_msg;		/* ptr to reply msg */
+	wait_queue_head_t wait_object;	/* wait object for vq reqs */
+	atomic_t reply_ready;	/* set when reply is ready */
+	atomic_t refcnt;	/* used to cancel WRs... */
+	int event;
+	struct iw_cm_id *cm_id;
+	struct c2_qp *qp;
+};
+
+extern int vq_init(struct c2_dev *c2dev);
+extern void vq_term(struct c2_dev *c2dev);
+
+extern struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
+extern void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
+
+extern void *vq_repbuf_alloc(struct c2_dev *c2dev);
+extern void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
+
+extern int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
+#endif				/* _C2_VQ_H_ */
--- a/drivers/infiniband/hw/amso1100/c2_wr.h
+++ b/drivers/infiniband/hw/amso1100/c2_wr.h
--- a/drivers/infiniband/hw/ehca/Kconfig
+++ b/drivers/infiniband/hw/ehca/Kconfig
@ -0,0 +1,16 @@
+config INFINIBAND_EHCA
+	tristate "eHCA support"
+	depends on IBMEBUS && INFINIBAND
+	---help---
+	This driver supports the IBM pSeries eHCA InfiniBand adapter.
+
+	To compile the driver as a module, choose M here. The module
+	will be called ib_ehca.
+
+config INFINIBAND_EHCA_SCALING
+	bool "Scaling support (EXPERIMENTAL)"
+	depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
+	---help---
+	eHCA scaling support schedules the CQ callbacks to different CPUs.
+
+	To enable this feature choose Y here.
--- a/drivers/infiniband/hw/ehca/Makefile
+++ b/drivers/infiniband/hw/ehca/Makefile
@ -0,0 +1,16 @@
+#  Authors: Heiko J Schick <schickhj@de.ibm.com>
+#           Christoph Raisch <raisch@de.ibm.com>
+#           Joachim Fenkes <fenkes@de.ibm.com>
+#
+#  Copyright (c) 2005 IBM Corporation
+#
+#  All rights reserved.
+#
+#  This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD.
+
+obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o
+
+ib_ehca-objs  = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \
+		ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \
+		ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o
+
--- a/drivers/infiniband/hw/ehca/ehca_av.c
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@ -0,0 +1,271 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  adress vector functions
+ *
+ *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Khadija Souissi <souissik@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm/current.h>
+
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+static struct kmem_cache *av_cache;
+
+struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+	int ret;
+	struct ehca_av *av;
+	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+					      ib_device);
+
+	av = kmem_cache_alloc(av_cache, SLAB_KERNEL);
+	if (!av) {
+		ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
+			 pd, ah_attr);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	av->av.sl = ah_attr->sl;
+	av->av.dlid = ah_attr->dlid;
+	av->av.slid_path_bits = ah_attr->src_path_bits;
+
+	if (ehca_static_rate < 0) {
+		int ah_mult = ib_rate_to_mult(ah_attr->static_rate);
+		int ehca_mult =
+			ib_rate_to_mult(shca->sport[ah_attr->port_num].rate );
+
+		if (ah_mult >= ehca_mult)
+			av->av.ipd = 0;
+		else
+			av->av.ipd = (ah_mult > 0) ?
+				((ehca_mult - 1) / ah_mult) : 0;
+	} else
+	        av->av.ipd = ehca_static_rate;
+
+	av->av.lnh = ah_attr->ah_flags;
+	av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK,
+					    ah_attr->grh.traffic_class);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
+					    ah_attr->grh.flow_label);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
+					    ah_attr->grh.hop_limit);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B);
+	/* set sgid in grh.word_1 */
+	if (ah_attr->ah_flags & IB_AH_GRH) {
+		int rc;
+		struct ib_port_attr port_attr;
+		union ib_gid gid;
+		memset(&port_attr, 0, sizeof(port_attr));
+		rc = ehca_query_port(pd->device, ah_attr->port_num,
+				     &port_attr);
+		if (rc) { /* invalid port number */
+			ret = -EINVAL;
+			ehca_err(pd->device, "Invalid port number "
+				 "ehca_query_port() returned %x "
+				 "pd=%p ah_attr=%p", rc, pd, ah_attr);
+			goto create_ah_exit1;
+		}
+		memset(&gid, 0, sizeof(gid));
+		rc = ehca_query_gid(pd->device,
+				    ah_attr->port_num,
+				    ah_attr->grh.sgid_index, &gid);
+		if (rc) {
+			ret = -EINVAL;
+			ehca_err(pd->device, "Failed to retrieve sgid "
+				 "ehca_query_gid() returned %x "
+				 "pd=%p ah_attr=%p", rc, pd, ah_attr);
+			goto create_ah_exit1;
+		}
+		memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
+	}
+	/* for the time being we use a hard coded PMTU of 2048 Bytes */
+	av->av.pmtu = 4;
+
+	/* dgid comes in grh.word_3 */
+	memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
+	       sizeof(ah_attr->grh.dgid));
+
+	return &av->ib_ah;
+
+create_ah_exit1:
+	kmem_cache_free(av_cache, av);
+
+	return ERR_PTR(ret);
+}
+
+int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+	struct ehca_av *av;
+	struct ehca_ud_av new_ehca_av;
+	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	memset(&new_ehca_av, 0, sizeof(new_ehca_av));
+	new_ehca_av.sl = ah_attr->sl;
+	new_ehca_av.dlid = ah_attr->dlid;
+	new_ehca_av.slid_path_bits = ah_attr->src_path_bits;
+	new_ehca_av.ipd = ah_attr->static_rate;
+	new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK,
+					 (ah_attr->ah_flags & IB_AH_GRH) > 0);
+	new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK,
+						ah_attr->grh.traffic_class);
+	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
+						 ah_attr->grh.flow_label);
+	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
+						 ah_attr->grh.hop_limit);
+	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b);
+
+	/* set sgid in grh.word_1 */
+	if (ah_attr->ah_flags & IB_AH_GRH) {
+		int rc;
+		struct ib_port_attr port_attr;
+		union ib_gid gid;
+		memset(&port_attr, 0, sizeof(port_attr));
+		rc = ehca_query_port(ah->device, ah_attr->port_num,
+				     &port_attr);
+		if (rc) { /* invalid port number */
+			ehca_err(ah->device, "Invalid port number "
+				 "ehca_query_port() returned %x "
+				 "ah=%p ah_attr=%p port_num=%x",
+				 rc, ah, ah_attr, ah_attr->port_num);
+			return -EINVAL;
+		}
+		memset(&gid, 0, sizeof(gid));
+		rc = ehca_query_gid(ah->device,
+				    ah_attr->port_num,
+				    ah_attr->grh.sgid_index, &gid);
+		if (rc) {
+			ehca_err(ah->device, "Failed to retrieve sgid "
+				 "ehca_query_gid() returned %x "
+				 "ah=%p ah_attr=%p port_num=%x "
+				 "sgid_index=%x",
+				 rc, ah, ah_attr, ah_attr->port_num,
+				 ah_attr->grh.sgid_index);
+			return -EINVAL;
+		}
+		memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
+	}
+
+	new_ehca_av.pmtu = 4; /* see also comment in create_ah() */
+
+	memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
+	       sizeof(ah_attr->grh.dgid));
+
+	av = container_of(ah, struct ehca_av, ib_ah);
+	av->av = new_ehca_av;
+
+	return 0;
+}
+
+int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+	struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
+	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
+	       sizeof(ah_attr->grh.dgid));
+	ah_attr->sl = av->av.sl;
+
+	ah_attr->dlid = av->av.dlid;
+
+	ah_attr->src_path_bits = av->av.slid_path_bits;
+	ah_attr->static_rate = av->av.ipd;
+	ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh);
+	ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK,
+						    av->av.grh.word_0);
+	ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK,
+						av->av.grh.word_0);
+	ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK,
+						 av->av.grh.word_0);
+
+	return 0;
+}
+
+int ehca_destroy_ah(struct ib_ah *ah)
+{
+	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
+
+	return 0;
+}
+
+int ehca_init_av_cache(void)
+{
+	av_cache = kmem_cache_create("ehca_cache_av",
+				   sizeof(struct ehca_av), 0,
+				   SLAB_HWCACHE_ALIGN,
+				   NULL, NULL);
+	if (!av_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_av_cache(void)
+{
+	if (av_cache)
+		kmem_cache_destroy(av_cache);
+}
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@ -0,0 +1,346 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Struct definition for eHCA internal structures
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_H__
+#define __EHCA_CLASSES_H__
+
+#include "ehca_classes.h"
+#include "ipz_pt_fn.h"
+
+struct ehca_module;
+struct ehca_qp;
+struct ehca_cq;
+struct ehca_eq;
+struct ehca_mr;
+struct ehca_mw;
+struct ehca_pd;
+struct ehca_av;
+
+#ifdef CONFIG_PPC64
+#include "ehca_classes_pSeries.h"
+#endif
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "ehca_irq.h"
+
+struct ehca_eq {
+	u32 length;
+	struct ipz_queue ipz_queue;
+	struct ipz_eq_handle ipz_eq_handle;
+	struct work_struct work;
+	struct h_galpas galpas;
+	int is_initialized;
+	struct ehca_pfeq pf;
+	spinlock_t spinlock;
+	struct tasklet_struct interrupt_task;
+	u32 ist;
+};
+
+struct ehca_sport {
+	struct ib_cq *ibcq_aqp1;
+	struct ib_qp *ibqp_aqp1;
+	enum ib_rate  rate;
+	enum ib_port_state port_state;
+};
+
+struct ehca_shca {
+	struct ib_device ib_device;
+	struct ibmebus_dev *ibmebus_dev;
+	u8 num_ports;
+	int hw_level;
+	struct list_head shca_list;
+	struct ipz_adapter_handle ipz_hca_handle;
+	struct ehca_sport sport[2];
+	struct ehca_eq eq;
+	struct ehca_eq neq;
+	struct ehca_mr *maxmr;
+	struct ehca_pd *pd;
+	struct h_galpas galpas;
+};
+
+struct ehca_pd {
+	struct ib_pd ib_pd;
+	struct ipz_pd fw_pd;
+	u32 ownpid;
+};
+
+struct ehca_qp {
+	struct ib_qp ib_qp;
+	u32 qp_type;
+	struct ipz_queue ipz_squeue;
+	struct ipz_queue ipz_rqueue;
+	struct h_galpas galpas;
+	u32 qkey;
+	u32 real_qp_num;
+	u32 token;
+	spinlock_t spinlock_s;
+	spinlock_t spinlock_r;
+	u32 sq_max_inline_data_size;
+	struct ipz_qp_handle ipz_qp_handle;
+	struct ehca_pfqp pf;
+	struct ib_qp_init_attr init_attr;
+	u64 uspace_squeue;
+	u64 uspace_rqueue;
+	u64 uspace_fwh;
+	struct ehca_cq *send_cq;
+	struct ehca_cq *recv_cq;
+	unsigned int sqerr_purgeflag;
+	struct hlist_node list_entries;
+};
+
+/* must be power of 2 */
+#define QP_HASHTAB_LEN 8
+
+struct ehca_cq {
+	struct ib_cq ib_cq;
+	struct ipz_queue ipz_queue;
+	struct h_galpas galpas;
+	spinlock_t spinlock;
+	u32 cq_number;
+	u32 token;
+	u32 nr_of_entries;
+	struct ipz_cq_handle ipz_cq_handle;
+	struct ehca_pfcq pf;
+	spinlock_t cb_lock;
+	u64 uspace_queue;
+	u64 uspace_fwh;
+	struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
+	struct list_head entry;
+	u32 nr_callbacks;
+	spinlock_t task_lock;
+	u32 ownpid;
+};
+
+enum ehca_mr_flag {
+	EHCA_MR_FLAG_FMR = 0x80000000,	 /* FMR, created with ehca_alloc_fmr */
+	EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR                           */
+};
+
+struct ehca_mr {
+	union {
+		struct ib_mr ib_mr;	/* must always be first in ehca_mr */
+		struct ib_fmr ib_fmr;	/* must always be first in ehca_mr */
+	} ib;
+	spinlock_t mrlock;
+
+	enum ehca_mr_flag flags;
+	u32 num_pages;		/* number of MR pages */
+	u32 num_4k;		/* number of 4k "page" portions to form MR */
+	int acl;		/* ACL (stored here for usage in reregister) */
+	u64 *start;		/* virtual start address (stored here for */
+	                        /* usage in reregister) */
+	u64 size;		/* size (stored here for usage in reregister) */
+	u32 fmr_page_size;	/* page size for FMR */
+	u32 fmr_max_pages;	/* max pages for FMR */
+	u32 fmr_max_maps;	/* max outstanding maps for FMR */
+	u32 fmr_map_cnt;	/* map counter for FMR */
+	/* fw specific data */
+	struct ipz_mrmw_handle ipz_mr_handle;	/* MR handle for h-calls */
+	struct h_galpas galpas;
+	/* data for userspace bridge */
+	u32 nr_of_pages;
+	void *pagearray;
+};
+
+struct ehca_mw {
+	struct ib_mw ib_mw;	/* gen2 mw, must always be first in ehca_mw */
+	spinlock_t mwlock;
+
+	u8 never_bound;		/* indication MW was never bound */
+	struct ipz_mrmw_handle ipz_mw_handle;	/* MW handle for h-calls */
+	struct h_galpas galpas;
+};
+
+enum ehca_mr_pgi_type {
+	EHCA_MR_PGI_PHYS   = 1,  /* type of ehca_reg_phys_mr,
+				  * ehca_rereg_phys_mr,
+				  * ehca_reg_internal_maxmr */
+	EHCA_MR_PGI_USER   = 2,  /* type of ehca_reg_user_mr */
+	EHCA_MR_PGI_FMR    = 3   /* type of ehca_map_phys_fmr */
+};
+
+struct ehca_mr_pginfo {
+	enum ehca_mr_pgi_type type;
+	u64 num_pages;
+	u64 page_cnt;
+	u64 num_4k;       /* number of 4k "page" portions */
+	u64 page_4k_cnt;  /* counter for 4k "page" portions */
+	u64 next_4k;      /* next 4k "page" portion in buffer/chunk/listelem */
+
+	/* type EHCA_MR_PGI_PHYS section */
+	int num_phys_buf;
+	struct ib_phys_buf *phys_buf_array;
+	u64 next_buf;
+
+	/* type EHCA_MR_PGI_USER section */
+	struct ib_umem *region;
+	struct ib_umem_chunk *next_chunk;
+	u64 next_nmap;
+
+	/* type EHCA_MR_PGI_FMR section */
+	u64 *page_list;
+	u64 next_listelem;
+	/* next_4k also used within EHCA_MR_PGI_FMR */
+};
+
+/* output parameters for MR/FMR hipz calls */
+struct ehca_mr_hipzout_parms {
+	struct ipz_mrmw_handle handle;
+	u32 lkey;
+	u32 rkey;
+	u64 len;
+	u64 vaddr;
+	u32 acl;
+};
+
+/* output parameters for MW hipz calls */
+struct ehca_mw_hipzout_parms {
+	struct ipz_mrmw_handle handle;
+	u32 rkey;
+};
+
+struct ehca_av {
+	struct ib_ah ib_ah;
+	struct ehca_ud_av av;
+};
+
+struct ehca_ucontext {
+	struct ib_ucontext ib_ucontext;
+};
+
+struct ehca_module *ehca_module_new(void);
+
+int ehca_module_delete(struct ehca_module *me);
+
+int ehca_eq_ctor(struct ehca_eq *eq);
+
+int ehca_eq_dtor(struct ehca_eq *eq);
+
+struct ehca_shca *ehca_shca_new(void);
+
+int ehca_shca_delete(struct ehca_shca *me);
+
+struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor);
+
+int ehca_init_pd_cache(void);
+void ehca_cleanup_pd_cache(void);
+int ehca_init_cq_cache(void);
+void ehca_cleanup_cq_cache(void);
+int ehca_init_qp_cache(void);
+void ehca_cleanup_qp_cache(void);
+int ehca_init_av_cache(void);
+void ehca_cleanup_av_cache(void);
+int ehca_init_mrmw_cache(void);
+void ehca_cleanup_mrmw_cache(void);
+
+extern spinlock_t ehca_qp_idr_lock;
+extern spinlock_t ehca_cq_idr_lock;
+extern struct idr ehca_qp_idr;
+extern struct idr ehca_cq_idr;
+
+extern int ehca_static_rate;
+extern int ehca_port_act_time;
+extern int ehca_use_hp_mr;
+
+struct ipzu_queue_resp {
+	u64 queue;        /* points to first queue entry */
+	u32 qe_size;      /* queue entry size */
+	u32 act_nr_of_sg;
+	u32 queue_length; /* queue length allocated in bytes */
+	u32 pagesize;
+	u32 toggle_state;
+	u32 dummy; /* padding for 8 byte alignment */
+};
+
+struct ehca_create_cq_resp {
+	u32 cq_number;
+	u32 token;
+	struct ipzu_queue_resp ipz_queue;
+	struct h_galpas galpas;
+};
+
+struct ehca_create_qp_resp {
+	u32 qp_num;
+	u32 token;
+	u32 qp_type;
+	u32 qkey;
+	/* qp_num assigned by ehca: sqp0/1 may have got different numbers */
+	u32 real_qp_num;
+	u32 dummy; /* padding for 8 byte alignment */
+	struct ipzu_queue_resp ipz_squeue;
+	struct ipzu_queue_resp ipz_rqueue;
+	struct h_galpas galpas;
+};
+
+struct ehca_alloc_cq_parms {
+	u32 nr_cqe;
+	u32 act_nr_of_entries;
+	u32 act_pages;
+	struct ipz_eq_handle eq_handle;
+};
+
+struct ehca_alloc_qp_parms {
+	int servicetype;
+	int sigtype;
+	int daqp_ctrl;
+	int max_send_sge;
+	int max_recv_sge;
+	int ud_av_l_key_ctl;
+
+	u16 act_nr_send_wqes;
+	u16 act_nr_recv_wqes;
+	u8  act_nr_recv_sges;
+	u8  act_nr_send_sges;
+
+	u32 nr_rq_pages;
+	u32 nr_sq_pages;
+
+	struct ipz_eq_handle ipz_eq_handle;
+	struct ipz_pd pd;
+};
+
+int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
+int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
+struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
+
+#endif
--- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
@ -0,0 +1,236 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  pSeries interface definitions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_PSERIES_H__
+#define __EHCA_CLASSES_PSERIES_H__
+
+#include "hcp_phyp.h"
+#include "ipz_pt_fn.h"
+
+
+struct ehca_pfqp {
+	struct ipz_qpt sqpt;
+	struct ipz_qpt rqpt;
+};
+
+struct ehca_pfcq {
+	struct ipz_qpt qpt;
+	u32 cqnr;
+};
+
+struct ehca_pfeq {
+	struct ipz_qpt qpt;
+	struct h_galpa galpa;
+	u32 eqnr;
+};
+
+struct ipz_adapter_handle {
+	u64 handle;
+};
+
+struct ipz_cq_handle {
+	u64 handle;
+};
+
+struct ipz_eq_handle {
+	u64 handle;
+};
+
+struct ipz_qp_handle {
+	u64 handle;
+};
+struct ipz_mrmw_handle {
+	u64 handle;
+};
+
+struct ipz_pd {
+	u32 value;
+};
+
+struct hcp_modify_qp_control_block {
+	u32 qkey;                      /* 00 */
+	u32 rdd;                       /* reliable datagram domain */
+	u32 send_psn;                  /* 02 */
+	u32 receive_psn;               /* 03 */
+	u32 prim_phys_port;            /* 04 */
+	u32 alt_phys_port;             /* 05 */
+	u32 prim_p_key_idx;            /* 06 */
+	u32 alt_p_key_idx;             /* 07 */
+	u32 rdma_atomic_ctrl;          /* 08 */
+	u32 qp_state;                  /* 09 */
+	u32 reserved_10;               /* 10 */
+	u32 rdma_nr_atomic_resp_res;   /* 11 */
+	u32 path_migration_state;      /* 12 */
+	u32 rdma_atomic_outst_dest_qp; /* 13 */
+	u32 dest_qp_nr;                /* 14 */
+	u32 min_rnr_nak_timer_field;   /* 15 */
+	u32 service_level;             /* 16 */
+	u32 send_grh_flag;             /* 17 */
+	u32 retry_count;               /* 18 */
+	u32 timeout;                   /* 19 */
+	u32 path_mtu;                  /* 20 */
+	u32 max_static_rate;           /* 21 */
+	u32 dlid;                      /* 22 */
+	u32 rnr_retry_count;           /* 23 */
+	u32 source_path_bits;          /* 24 */
+	u32 traffic_class;             /* 25 */
+	u32 hop_limit;                 /* 26 */
+	u32 source_gid_idx;            /* 27 */
+	u32 flow_label;                /* 28 */
+	u32 reserved_29;               /* 29 */
+	union {                        /* 30 */
+		u64 dw[2];
+		u8 byte[16];
+	} dest_gid;
+	u32 service_level_al;          /* 34 */
+	u32 send_grh_flag_al;          /* 35 */
+	u32 retry_count_al;            /* 36 */
+	u32 timeout_al;                /* 37 */
+	u32 max_static_rate_al;        /* 38 */
+	u32 dlid_al;                   /* 39 */
+	u32 rnr_retry_count_al;        /* 40 */
+	u32 source_path_bits_al;       /* 41 */
+	u32 traffic_class_al;          /* 42 */
+	u32 hop_limit_al;              /* 43 */
+	u32 source_gid_idx_al;         /* 44 */
+	u32 flow_label_al;             /* 45 */
+	u32 reserved_46;               /* 46 */
+	u32 reserved_47;               /* 47 */
+	union {                        /* 48 */
+		u64 dw[2];
+		u8 byte[16];
+	} dest_gid_al;
+	u32 max_nr_outst_send_wr;      /* 52 */
+	u32 max_nr_outst_recv_wr;      /* 53 */
+	u32 disable_ete_credit_check;  /* 54 */
+	u32 qp_number;                 /* 55 */
+	u64 send_queue_handle;         /* 56 */
+	u64 recv_queue_handle;         /* 58 */
+	u32 actual_nr_sges_in_sq_wqe;  /* 60 */
+	u32 actual_nr_sges_in_rq_wqe;  /* 61 */
+	u32 qp_enable;                 /* 62 */
+	u32 curr_srq_limit;            /* 63 */
+	u64 qp_aff_asyn_ev_log_reg;    /* 64 */
+	u64 shared_rq_hndl;            /* 66 */
+	u64 trigg_doorbell_qp_hndl;    /* 68 */
+	u32 reserved_70_127[58];       /* 70 */
+};
+
+#define MQPCB_MASK_QKEY                         EHCA_BMASK_IBM(0,0)
+#define MQPCB_MASK_SEND_PSN                     EHCA_BMASK_IBM(2,2)
+#define MQPCB_MASK_RECEIVE_PSN                  EHCA_BMASK_IBM(3,3)
+#define MQPCB_MASK_PRIM_PHYS_PORT               EHCA_BMASK_IBM(4,4)
+#define MQPCB_PRIM_PHYS_PORT                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_ALT_PHYS_PORT                EHCA_BMASK_IBM(5,5)
+#define MQPCB_MASK_PRIM_P_KEY_IDX               EHCA_BMASK_IBM(6,6)
+#define MQPCB_PRIM_P_KEY_IDX                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_ALT_P_KEY_IDX                EHCA_BMASK_IBM(7,7)
+#define MQPCB_MASK_RDMA_ATOMIC_CTRL             EHCA_BMASK_IBM(8,8)
+#define MQPCB_MASK_QP_STATE                     EHCA_BMASK_IBM(9,9)
+#define MQPCB_QP_STATE                          EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES      EHCA_BMASK_IBM(11,11)
+#define MQPCB_MASK_PATH_MIGRATION_STATE         EHCA_BMASK_IBM(12,12)
+#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP    EHCA_BMASK_IBM(13,13)
+#define MQPCB_MASK_DEST_QP_NR                   EHCA_BMASK_IBM(14,14)
+#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD      EHCA_BMASK_IBM(15,15)
+#define MQPCB_MASK_SERVICE_LEVEL                EHCA_BMASK_IBM(16,16)
+#define MQPCB_MASK_SEND_GRH_FLAG                EHCA_BMASK_IBM(17,17)
+#define MQPCB_MASK_RETRY_COUNT                  EHCA_BMASK_IBM(18,18)
+#define MQPCB_MASK_TIMEOUT                      EHCA_BMASK_IBM(19,19)
+#define MQPCB_MASK_PATH_MTU                     EHCA_BMASK_IBM(20,20)
+#define MQPCB_PATH_MTU                          EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_MAX_STATIC_RATE              EHCA_BMASK_IBM(21,21)
+#define MQPCB_MAX_STATIC_RATE                   EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_DLID                         EHCA_BMASK_IBM(22,22)
+#define MQPCB_DLID                              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_RNR_RETRY_COUNT              EHCA_BMASK_IBM(23,23)
+#define MQPCB_RNR_RETRY_COUNT                   EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_SOURCE_PATH_BITS             EHCA_BMASK_IBM(24,24)
+#define MQPCB_SOURCE_PATH_BITS                  EHCA_BMASK_IBM(25,31)
+#define MQPCB_MASK_TRAFFIC_CLASS                EHCA_BMASK_IBM(25,25)
+#define MQPCB_TRAFFIC_CLASS                     EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_HOP_LIMIT                    EHCA_BMASK_IBM(26,26)
+#define MQPCB_HOP_LIMIT                         EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_SOURCE_GID_IDX               EHCA_BMASK_IBM(27,27)
+#define MQPCB_SOURCE_GID_IDX                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_FLOW_LABEL                   EHCA_BMASK_IBM(28,28)
+#define MQPCB_FLOW_LABEL                        EHCA_BMASK_IBM(12,31)
+#define MQPCB_MASK_DEST_GID                     EHCA_BMASK_IBM(30,30)
+#define MQPCB_MASK_SERVICE_LEVEL_AL             EHCA_BMASK_IBM(31,31)
+#define MQPCB_SERVICE_LEVEL_AL                  EHCA_BMASK_IBM(28,31)
+#define MQPCB_MASK_SEND_GRH_FLAG_AL             EHCA_BMASK_IBM(32,32)
+#define MQPCB_SEND_GRH_FLAG_AL                  EHCA_BMASK_IBM(31,31)
+#define MQPCB_MASK_RETRY_COUNT_AL               EHCA_BMASK_IBM(33,33)
+#define MQPCB_RETRY_COUNT_AL                    EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_TIMEOUT_AL                   EHCA_BMASK_IBM(34,34)
+#define MQPCB_TIMEOUT_AL                        EHCA_BMASK_IBM(27,31)
+#define MQPCB_MASK_MAX_STATIC_RATE_AL           EHCA_BMASK_IBM(35,35)
+#define MQPCB_MAX_STATIC_RATE_AL                EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_DLID_AL                      EHCA_BMASK_IBM(36,36)
+#define MQPCB_DLID_AL                           EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_RNR_RETRY_COUNT_AL           EHCA_BMASK_IBM(37,37)
+#define MQPCB_RNR_RETRY_COUNT_AL                EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_SOURCE_PATH_BITS_AL          EHCA_BMASK_IBM(38,38)
+#define MQPCB_SOURCE_PATH_BITS_AL               EHCA_BMASK_IBM(25,31)
+#define MQPCB_MASK_TRAFFIC_CLASS_AL             EHCA_BMASK_IBM(39,39)
+#define MQPCB_TRAFFIC_CLASS_AL                  EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_HOP_LIMIT_AL                 EHCA_BMASK_IBM(40,40)
+#define MQPCB_HOP_LIMIT_AL                      EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_SOURCE_GID_IDX_AL            EHCA_BMASK_IBM(41,41)
+#define MQPCB_SOURCE_GID_IDX_AL                 EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_FLOW_LABEL_AL                EHCA_BMASK_IBM(42,42)
+#define MQPCB_FLOW_LABEL_AL                     EHCA_BMASK_IBM(12,31)
+#define MQPCB_MASK_DEST_GID_AL                  EHCA_BMASK_IBM(44,44)
+#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR         EHCA_BMASK_IBM(45,45)
+#define MQPCB_MAX_NR_OUTST_SEND_WR              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR         EHCA_BMASK_IBM(46,46)
+#define MQPCB_MAX_NR_OUTST_RECV_WR              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK     EHCA_BMASK_IBM(47,47)
+#define MQPCB_DISABLE_ETE_CREDIT_CHECK          EHCA_BMASK_IBM(31,31)
+#define MQPCB_QP_NUMBER                         EHCA_BMASK_IBM(8,31)
+#define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48,48)
+#define MQPCB_QP_ENABLE                         EHCA_BMASK_IBM(31,31)
+#define MQPCB_MASK_CURR_SQR_LIMIT               EHCA_BMASK_IBM(49,49)
+#define MQPCB_CURR_SQR_LIMIT                    EHCA_BMASK_IBM(15,31)
+#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50,50)
+#define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51,51)
+
+#endif /* __EHCA_CLASSES_PSERIES_H__ */
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@ -0,0 +1,427 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Completion queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_iverbs.h"
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "hcp_if.h"
+
+static struct kmem_cache *cq_cache;
+
+int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
+{
+	unsigned int qp_num = qp->real_qp_num;
+	unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
+	unsigned long spl_flags;
+
+	spin_lock_irqsave(&cq->spinlock, spl_flags);
+	hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
+	spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+
+	ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
+		 cq->cq_number, qp_num);
+
+	return 0;
+}
+
+int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
+{
+	int ret = -EINVAL;
+	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
+	struct hlist_node *iter;
+	struct ehca_qp *qp;
+	unsigned long spl_flags;
+
+	spin_lock_irqsave(&cq->spinlock, spl_flags);
+	hlist_for_each(iter, &cq->qp_hashtab[key]) {
+		qp = hlist_entry(iter, struct ehca_qp, list_entries);
+		if (qp->real_qp_num == real_qp_num) {
+			hlist_del(iter);
+			ehca_dbg(cq->ib_cq.device,
+				 "removed qp from cq .cq_num=%x real_qp_num=%x",
+				 cq->cq_number, real_qp_num);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+	if (ret)
+		ehca_err(cq->ib_cq.device,
+			 "qp not found cq_num=%x real_qp_num=%x",
+			 cq->cq_number, real_qp_num);
+
+	return ret;
+}
+
+struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
+{
+	struct ehca_qp *ret = NULL;
+	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
+	struct hlist_node *iter;
+	struct ehca_qp *qp;
+	hlist_for_each(iter, &cq->qp_hashtab[key]) {
+		qp = hlist_entry(iter, struct ehca_qp, list_entries);
+		if (qp->real_qp_num == real_qp_num) {
+			ret = qp;
+			break;
+		}
+	}
+	return ret;
+}
+
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+			     struct ib_ucontext *context,
+			     struct ib_udata *udata)
+{
+	static const u32 additional_cqe = 20;
+	struct ib_cq *cq;
+	struct ehca_cq *my_cq;
+	struct ehca_shca *shca =
+		container_of(device, struct ehca_shca, ib_device);
+	struct ipz_adapter_handle adapter_handle;
+	struct ehca_alloc_cq_parms param; /* h_call's out parameters */
+	struct h_galpa gal;
+	void *vpage;
+	u32 counter;
+	u64 rpage, cqx_fec, h_ret;
+	int ipz_rc, ret, i;
+	unsigned long flags;
+
+	if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
+		return ERR_PTR(-EINVAL);
+
+	my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL);
+	if (!my_cq) {
+		ehca_err(device, "Out of memory for ehca_cq struct device=%p",
+			 device);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(my_cq, 0, sizeof(struct ehca_cq));
+	memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
+
+	spin_lock_init(&my_cq->spinlock);
+	spin_lock_init(&my_cq->cb_lock);
+	spin_lock_init(&my_cq->task_lock);
+	my_cq->ownpid = current->tgid;
+
+	cq = &my_cq->ib_cq;
+
+	adapter_handle = shca->ipz_hca_handle;
+	param.eq_handle = shca->eq.ipz_eq_handle;
+
+	do {
+		if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) {
+			cq = ERR_PTR(-ENOMEM);
+			ehca_err(device, "Can't reserve idr nr. device=%p",
+				 device);
+			goto create_cq_exit1;
+		}
+
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+	} while (ret == -EAGAIN);
+
+	if (ret) {
+		cq = ERR_PTR(-ENOMEM);
+		ehca_err(device, "Can't allocate new idr entry. device=%p",
+			 device);
+		goto create_cq_exit1;
+	}
+
+	/*
+	 * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
+	 * for receiving errors CQEs.
+	 */
+	param.nr_cqe = cqe + additional_cqe;
+	h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, &param);
+
+	if (h_ret != H_SUCCESS) {
+		ehca_err(device, "hipz_h_alloc_resource_cq() failed "
+			 "h_ret=%lx device=%p", h_ret, device);
+		cq = ERR_PTR(ehca2ib_return_code(h_ret));
+		goto create_cq_exit2;
+	}
+
+	ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages,
+				EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0);
+	if (!ipz_rc) {
+		ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p",
+			 ipz_rc, device);
+		cq = ERR_PTR(-EINVAL);
+		goto create_cq_exit3;
+	}
+
+	for (counter = 0; counter < param.act_pages; counter++) {
+		vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
+		if (!vpage) {
+			ehca_err(device, "ipz_qpageit_get_inc() "
+				 "returns NULL device=%p", device);
+			cq = ERR_PTR(-EAGAIN);
+			goto create_cq_exit4;
+		}
+		rpage = virt_to_abs(vpage);
+
+		h_ret = hipz_h_register_rpage_cq(adapter_handle,
+						 my_cq->ipz_cq_handle,
+						 &my_cq->pf,
+						 0,
+						 0,
+						 rpage,
+						 1,
+						 my_cq->galpas.
+						 kernel);
+
+		if (h_ret < H_SUCCESS) {
+			ehca_err(device, "hipz_h_register_rpage_cq() failed "
+				 "ehca_cq=%p cq_num=%x h_ret=%lx counter=%i "
+				 "act_pages=%i", my_cq, my_cq->cq_number,
+				 h_ret, counter, param.act_pages);
+			cq = ERR_PTR(-EINVAL);
+			goto create_cq_exit4;
+		}
+
+		if (counter == (param.act_pages - 1)) {
+			vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
+			if ((h_ret != H_SUCCESS) || vpage) {
+				ehca_err(device, "Registration of pages not "
+					 "complete ehca_cq=%p cq_num=%x "
+					 "h_ret=%lx", my_cq, my_cq->cq_number,
+					 h_ret);
+				cq = ERR_PTR(-EAGAIN);
+				goto create_cq_exit4;
+			}
+		} else {
+			if (h_ret != H_PAGE_REGISTERED) {
+				ehca_err(device, "Registration of page failed "
+					 "ehca_cq=%p cq_num=%x h_ret=%lx"
+					 "counter=%i act_pages=%i",
+					 my_cq, my_cq->cq_number,
+					 h_ret, counter, param.act_pages);
+				cq = ERR_PTR(-ENOMEM);
+				goto create_cq_exit4;
+			}
+		}
+	}
+
+	ipz_qeit_reset(&my_cq->ipz_queue);
+
+	gal = my_cq->galpas.kernel;
+	cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
+	ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%lx",
+		 my_cq, my_cq->cq_number, cqx_fec);
+
+	my_cq->ib_cq.cqe = my_cq->nr_of_entries =
+		param.act_nr_of_entries - additional_cqe;
+	my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff;
+
+	for (i = 0; i < QP_HASHTAB_LEN; i++)
+		INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
+
+	if (context) {
+		struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
+		struct ehca_create_cq_resp resp;
+		struct vm_area_struct *vma;
+		memset(&resp, 0, sizeof(resp));
+		resp.cq_number = my_cq->cq_number;
+		resp.token = my_cq->token;
+		resp.ipz_queue.qe_size = ipz_queue->qe_size;
+		resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg;
+		resp.ipz_queue.queue_length = ipz_queue->queue_length;
+		resp.ipz_queue.pagesize = ipz_queue->pagesize;
+		resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
+		ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000,
+				       ipz_queue->queue_length,
+				       (void**)&resp.ipz_queue.queue,
+				       &vma);
+		if (ret) {
+			ehca_err(device, "Could not mmap queue pages");
+			cq = ERR_PTR(ret);
+			goto create_cq_exit4;
+		}
+		my_cq->uspace_queue = resp.ipz_queue.queue;
+		resp.galpas = my_cq->galpas;
+		ret = ehca_mmap_register(my_cq->galpas.user.fw_handle,
+					 (void**)&resp.galpas.kernel.fw_handle,
+					 &vma);
+		if (ret) {
+			ehca_err(device, "Could not mmap fw_handle");
+			cq = ERR_PTR(ret);
+			goto create_cq_exit5;
+		}
+		my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
+		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
+			ehca_err(device, "Copy to udata failed.");
+			goto create_cq_exit6;
+		}
+	}
+
+	return cq;
+
+create_cq_exit6:
+	ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
+
+create_cq_exit5:
+	ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length);
+
+create_cq_exit4:
+	ipz_queue_dtor(&my_cq->ipz_queue);
+
+create_cq_exit3:
+	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
+	if (h_ret != H_SUCCESS)
+		ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
+			 "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
+
+create_cq_exit2:
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	idr_remove(&ehca_cq_idr, my_cq->token);
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+create_cq_exit1:
+	kmem_cache_free(cq_cache, my_cq);
+
+	return cq;
+}
+
+int ehca_destroy_cq(struct ib_cq *cq)
+{
+	u64 h_ret;
+	int ret;
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	int cq_num = my_cq->cq_number;
+	struct ib_device *device = cq->device;
+	struct ehca_shca *shca = container_of(device, struct ehca_shca,
+					      ib_device);
+	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+	u32 cur_pid = current->tgid;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	while (my_cq->nr_callbacks)
+		yield();
+
+	idr_remove(&ehca_cq_idr, my_cq->token);
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+	if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+		ehca_err(device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_cq->ownpid);
+		return -EINVAL;
+	}
+
+	/* un-mmap if vma alloc */
+	if (my_cq->uspace_queue ) {
+		ret = ehca_munmap(my_cq->uspace_queue,
+				  my_cq->ipz_queue.queue_length);
+		if (ret)
+			ehca_err(device, "Could not munmap queue ehca_cq=%p "
+				 "cq_num=%x", my_cq, cq_num);
+		ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
+		if (ret)
+			ehca_err(device, "Could not munmap fwh ehca_cq=%p "
+				 "cq_num=%x", my_cq, cq_num);
+	}
+
+	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
+	if (h_ret == H_R_STATE) {
+		/* cq in err: read err data and destroy it forcibly */
+		ehca_dbg(device, "ehca_cq=%p cq_num=%x ressource=%lx in err "
+			 "state. Try to delete it forcibly.",
+			 my_cq, cq_num, my_cq->ipz_cq_handle.handle);
+		ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
+		h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
+		if (h_ret == H_SUCCESS)
+			ehca_dbg(device, "cq_num=%x deleted successfully.",
+				 cq_num);
+	}
+	if (h_ret != H_SUCCESS) {
+		ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lx "
+			 "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
+		return ehca2ib_return_code(h_ret);
+	}
+	ipz_queue_dtor(&my_cq->ipz_queue);
+	kmem_cache_free(cq_cache, my_cq);
+
+	return 0;
+}
+
+int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+{
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	u32 cur_pid = current->tgid;
+
+	if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+		ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_cq->ownpid);
+		return -EINVAL;
+	}
+
+	/* TODO: proper resize needs to be done */
+	ehca_err(cq->device, "not implemented yet");
+
+	return -EFAULT;
+}
+
+int ehca_init_cq_cache(void)
+{
+	cq_cache = kmem_cache_create("ehca_cache_cq",
+				     sizeof(struct ehca_cq), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!cq_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_cq_cache(void)
+{
+	if (cq_cache)
+		kmem_cache_destroy(cq_cache);
+}
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@ -0,0 +1,185 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Event queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "ehca_iverbs.h"
+#include "ehca_qes.h"
+#include "hcp_if.h"
+#include "ipz_pt_fn.h"
+
+int ehca_create_eq(struct ehca_shca *shca,
+		   struct ehca_eq *eq,
+		   const enum ehca_eq_type type, const u32 length)
+{
+	u64 ret;
+	u32 nr_pages;
+	u32 i;
+	void *vpage;
+	struct ib_device *ib_dev = &shca->ib_device;
+
+	spin_lock_init(&eq->spinlock);
+	eq->is_initialized = 0;
+
+	if (type != EHCA_EQ && type != EHCA_NEQ) {
+		ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq);
+		return -EINVAL;
+	}
+	if (!length) {
+		ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq);
+		return -EINVAL;
+	}
+
+	ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
+				       &eq->pf,
+				       type,
+				       length,
+				       &eq->ipz_eq_handle,
+				       &eq->length,
+				       &nr_pages, &eq->ist);
+
+	if (ret != H_SUCCESS) {
+		ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
+		return -EINVAL;
+	}
+
+	ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages,
+			     EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0);
+	if (!ret) {
+		ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
+		goto create_eq_exit1;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		u64 rpage;
+
+		if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) {
+			ret = H_RESOURCE;
+			goto create_eq_exit2;
+		}
+
+		rpage = virt_to_abs(vpage);
+		ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
+					       eq->ipz_eq_handle,
+					       &eq->pf,
+					       0, 0, rpage, 1);
+
+		if (i == (nr_pages - 1)) {
+			/* last page */
+			vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
+			if (ret != H_SUCCESS || vpage)
+				goto create_eq_exit2;
+		} else {
+			if (ret != H_PAGE_REGISTERED || !vpage)
+				goto create_eq_exit2;
+		}
+	}
+
+	ipz_qeit_reset(&eq->ipz_queue);
+
+	/* register interrupt handlers and initialize work queues */
+	if (type == EHCA_EQ) {
+		ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_eq,
+					  SA_INTERRUPT, "ehca_eq",
+					  (void *)shca);
+		if (ret < 0)
+			ehca_err(ib_dev, "Can't map interrupt handler.");
+
+		tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
+	} else if (type == EHCA_NEQ) {
+		ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_neq,
+					  SA_INTERRUPT, "ehca_neq",
+					  (void *)shca);
+		if (ret < 0)
+			ehca_err(ib_dev, "Can't map interrupt handler.");
+
+		tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
+	}
+
+	eq->is_initialized = 1;
+
+	return 0;
+
+create_eq_exit2:
+	ipz_queue_dtor(&eq->ipz_queue);
+
+create_eq_exit1:
+	hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+
+	return -EINVAL;
+}
+
+void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq)
+{
+	unsigned long flags;
+	void *eqe;
+
+	spin_lock_irqsave(&eq->spinlock, flags);
+	eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue);
+	spin_unlock_irqrestore(&eq->spinlock, flags);
+
+	return eqe;
+}
+
+int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
+{
+	unsigned long flags;
+	u64 h_ret;
+
+	spin_lock_irqsave(&eq->spinlock, flags);
+	ibmebus_free_irq(NULL, eq->ist, (void *)shca);
+
+	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+
+	spin_unlock_irqrestore(&eq->spinlock, flags);
+
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't free EQ resources.");
+		return -EINVAL;
+	}
+	ipz_queue_dtor(&eq->ipz_queue);
+
+	return 0;
+}
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@ -0,0 +1,241 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HCA query functions
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+					      ib_device);
+	struct hipz_query_hca *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query device properties");
+		ret = -EINVAL;
+		goto query_device1;
+	}
+
+	memset(props, 0, sizeof(struct ib_device_attr));
+	props->fw_ver          = rblock->hw_ver;
+	props->max_mr_size     = rblock->max_mr_size;
+	props->vendor_id       = rblock->vendor_id >> 8;
+	props->vendor_part_id  = rblock->vendor_part_id >> 16;
+	props->hw_ver          = rblock->hw_ver;
+	props->max_qp          = min_t(int, rblock->max_qp, INT_MAX);
+	props->max_qp_wr       = min_t(int, rblock->max_wqes_wq, INT_MAX);
+	props->max_sge         = min_t(int, rblock->max_sge, INT_MAX);
+	props->max_sge_rd      = min_t(int, rblock->max_sge_rd, INT_MAX);
+	props->max_cq          = min_t(int, rblock->max_cq, INT_MAX);
+	props->max_cqe         = min_t(int, rblock->max_cqe, INT_MAX);
+	props->max_mr          = min_t(int, rblock->max_mr, INT_MAX);
+	props->max_mw          = min_t(int, rblock->max_mw, INT_MAX);
+	props->max_pd          = min_t(int, rblock->max_pd, INT_MAX);
+	props->max_ah          = min_t(int, rblock->max_ah, INT_MAX);
+	props->max_fmr         = min_t(int, rblock->max_mr, INT_MAX);
+	props->max_srq         = 0;
+	props->max_srq_wr      = 0;
+	props->max_srq_sge     = 0;
+	props->max_pkeys       = 16;
+	props->local_ca_ack_delay
+		= rblock->local_ca_ack_delay;
+	props->max_raw_ipv6_qp
+		= min_t(int, rblock->max_raw_ipv6_qp, INT_MAX);
+	props->max_raw_ethy_qp
+		= min_t(int, rblock->max_raw_ethy_qp, INT_MAX);
+	props->max_mcast_grp
+		= min_t(int, rblock->max_mcast_grp, INT_MAX);
+	props->max_mcast_qp_attach
+		= min_t(int, rblock->max_mcast_qp_attach, INT_MAX);
+	props->max_total_mcast_qp_attach
+		= min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
+
+query_device1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_query_port(struct ib_device *ibdev,
+		    u8 port, struct ib_port_attr *props)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+					      ib_device);
+	struct hipz_query_port *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto query_port1;
+	}
+
+	memset(props, 0, sizeof(struct ib_port_attr));
+	props->state = rblock->state;
+
+	switch (rblock->max_mtu) {
+	case 0x1:
+		props->active_mtu = props->max_mtu = IB_MTU_256;
+		break;
+	case 0x2:
+		props->active_mtu = props->max_mtu = IB_MTU_512;
+		break;
+	case 0x3:
+		props->active_mtu = props->max_mtu = IB_MTU_1024;
+		break;
+	case 0x4:
+		props->active_mtu = props->max_mtu = IB_MTU_2048;
+		break;
+	case 0x5:
+		props->active_mtu = props->max_mtu = IB_MTU_4096;
+		break;
+	default:
+		ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
+			 rblock->max_mtu);
+		break;
+	}
+
+	props->gid_tbl_len     = rblock->gid_tbl_len;
+	props->max_msg_sz      = rblock->max_msg_sz;
+	props->bad_pkey_cntr   = rblock->bad_pkey_cntr;
+	props->qkey_viol_cntr  = rblock->qkey_viol_cntr;
+	props->pkey_tbl_len    = rblock->pkey_tbl_len;
+	props->lid             = rblock->lid;
+	props->sm_lid          = rblock->sm_lid;
+	props->lmc             = rblock->lmc;
+	props->sm_sl           = rblock->sm_sl;
+	props->subnet_timeout  = rblock->subnet_timeout;
+	props->init_type_reply = rblock->init_type_reply;
+
+	props->active_width    = IB_WIDTH_12X;
+	props->active_speed    = 0x1;
+
+query_port1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
+	struct hipz_query_port *rblock;
+
+	if (index > 16) {
+		ehca_err(&shca->ib_device, "Invalid index: %x.", index);
+		return -EINVAL;
+	}
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto query_pkey1;
+	}
+
+	memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16));
+
+query_pkey1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_query_gid(struct ib_device *ibdev, u8 port,
+		   int index, union ib_gid *gid)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+					      ib_device);
+	struct hipz_query_port *rblock;
+
+	if (index > 255) {
+		ehca_err(&shca->ib_device, "Invalid index: %x.", index);
+		return -EINVAL;
+	}
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto query_gid1;
+	}
+
+	memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64));
+	memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64));
+
+query_gid1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_modify_port(struct ib_device *ibdev,
+		     u8 port, int port_modify_mask,
+		     struct ib_port_modify *props)
+{
+	/* Not implemented yet */
+	return -EFAULT;
+}
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@ -0,0 +1,762 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Functions for EQs, NEQs and interrupts
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "ehca_iverbs.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM(1,1)
+#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM(8,31)
+#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM(2,7)
+#define EQE_CQ_NUMBER          EHCA_BMASK_IBM(8,31)
+#define EQE_QP_NUMBER          EHCA_BMASK_IBM(8,31)
+#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32,63)
+#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32,63)
+
+#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM(1,1)
+#define NEQE_EVENT_CODE        EHCA_BMASK_IBM(2,7)
+#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM(8,15)
+#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
+
+#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52,63)
+#define ERROR_DATA_TYPE        EHCA_BMASK_IBM(0,7)
+
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+
+static void queue_comp_task(struct ehca_cq *__cq);
+
+static struct ehca_comp_pool* pool;
+static struct notifier_block comp_pool_callback_nb;
+
+#endif
+
+static inline void comp_event_callback(struct ehca_cq *cq)
+{
+	if (!cq->ib_cq.comp_handler)
+		return;
+
+	spin_lock(&cq->cb_lock);
+	cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
+	spin_unlock(&cq->cb_lock);
+
+	return;
+}
+
+static void print_error_data(struct ehca_shca * shca, void* data,
+			     u64* rblock, int length)
+{
+	u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
+	u64 resource = rblock[1];
+
+	switch (type) {
+	case 0x1: /* Queue Pair */
+	{
+		struct ehca_qp *qp = (struct ehca_qp*)data;
+
+		/* only print error data if AER is set */
+		if (rblock[6] == 0)
+			return;
+
+		ehca_err(&shca->ib_device,
+			 "QP 0x%x (resource=%lx) has errors.",
+			 qp->ib_qp.qp_num, resource);
+		break;
+	}
+	case 0x4: /* Completion Queue */
+	{
+		struct ehca_cq *cq = (struct ehca_cq*)data;
+
+		ehca_err(&shca->ib_device,
+			 "CQ 0x%x (resource=%lx) has errors.",
+			 cq->cq_number, resource);
+		break;
+	}
+	default:
+		ehca_err(&shca->ib_device,
+			 "Unknown errror type: %lx on %s.",
+			 type, shca->ib_device.name);
+		break;
+	}
+
+	ehca_err(&shca->ib_device, "Error data is available: %lx.", resource);
+	ehca_err(&shca->ib_device, "EHCA ----- error data begin "
+		 "---------------------------------------------------");
+	ehca_dmp(rblock, length, "resource=%lx", resource);
+	ehca_err(&shca->ib_device, "EHCA ----- error data end "
+		 "----------------------------------------------------");
+
+	return;
+}
+
+int ehca_error_data(struct ehca_shca *shca, void *data,
+		    u64 resource)
+{
+
+	unsigned long ret;
+	u64 *rblock;
+	unsigned long block_count;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
+		ret = -ENOMEM;
+		goto error_data1;
+	}
+
+	ret = hipz_h_error_data(shca->ipz_hca_handle,
+				resource,
+				rblock,
+				&block_count);
+
+	if (ret == H_R_STATE) {
+		ehca_err(&shca->ib_device,
+			 "No error data is available: %lx.", resource);
+	}
+	else if (ret == H_SUCCESS) {
+		int length;
+
+		length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
+
+		if (length > PAGE_SIZE)
+			length = PAGE_SIZE;
+
+		print_error_data(shca, data, rblock, length);
+	}
+	else {
+		ehca_err(&shca->ib_device,
+			 "Error data could not be fetched: %lx", resource);
+	}
+
+	kfree(rblock);
+
+error_data1:
+	return ret;
+
+}
+
+static void qp_event_callback(struct ehca_shca *shca,
+			      u64 eqe,
+			      enum ib_event_type event_type)
+{
+	struct ib_event event;
+	struct ehca_qp *qp;
+	unsigned long flags;
+	u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
+
+	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+	qp = idr_find(&ehca_qp_idr, token);
+	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+
+	if (!qp)
+		return;
+
+	ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
+
+	if (!qp->ib_qp.event_handler)
+		return;
+
+	event.device     = &shca->ib_device;
+	event.event      = event_type;
+	event.element.qp = &qp->ib_qp;
+
+	qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+
+	return;
+}
+
+static void cq_event_callback(struct ehca_shca *shca,
+					  u64 eqe)
+{
+	struct ehca_cq *cq;
+	unsigned long flags;
+	u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
+
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	cq = idr_find(&ehca_cq_idr, token);
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+	if (!cq)
+		return;
+
+	ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
+
+	return;
+}
+
+static void parse_identifier(struct ehca_shca *shca, u64 eqe)
+{
+	u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
+
+	switch (identifier) {
+	case 0x02: /* path migrated */
+		qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG);
+		break;
+	case 0x03: /* communication established */
+		qp_event_callback(shca, eqe, IB_EVENT_COMM_EST);
+		break;
+	case 0x04: /* send queue drained */
+		qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED);
+		break;
+	case 0x05: /* QP error */
+	case 0x06: /* QP error */
+		qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL);
+		break;
+	case 0x07: /* CQ error */
+	case 0x08: /* CQ error */
+		cq_event_callback(shca, eqe);
+		break;
+	case 0x09: /* MRMWPTE error */
+		ehca_err(&shca->ib_device, "MRMWPTE error.");
+		break;
+	case 0x0A: /* port event */
+		ehca_err(&shca->ib_device, "Port event.");
+		break;
+	case 0x0B: /* MR access error */
+		ehca_err(&shca->ib_device, "MR access error.");
+		break;
+	case 0x0C: /* EQ error */
+		ehca_err(&shca->ib_device, "EQ error.");
+		break;
+	case 0x0D: /* P/Q_Key mismatch */
+		ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
+		break;
+	case 0x10: /* sampling complete */
+		ehca_err(&shca->ib_device, "Sampling complete.");
+		break;
+	case 0x11: /* unaffiliated access error */
+		ehca_err(&shca->ib_device, "Unaffiliated access error.");
+		break;
+	case 0x12: /* path migrating error */
+		ehca_err(&shca->ib_device, "Path migration error.");
+		break;
+	case 0x13: /* interface trace stopped */
+		ehca_err(&shca->ib_device, "Interface trace stopped.");
+		break;
+	case 0x14: /* first error capture info available */
+	default:
+		ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
+			 identifier, shca->ib_device.name);
+		break;
+	}
+
+	return;
+}
+
+static void parse_ec(struct ehca_shca *shca, u64 eqe)
+{
+	struct ib_event event;
+	u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
+	u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
+
+	switch (ec) {
+	case 0x30: /* port availability change */
+		if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
+			ehca_info(&shca->ib_device,
+				  "port %x is active.", port);
+			event.device = &shca->ib_device;
+			event.event = IB_EVENT_PORT_ACTIVE;
+			event.element.port_num = port;
+			shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+			ib_dispatch_event(&event);
+		} else {
+			ehca_info(&shca->ib_device,
+				  "port %x is inactive.", port);
+			event.device = &shca->ib_device;
+			event.event = IB_EVENT_PORT_ERR;
+			event.element.port_num = port;
+			shca->sport[port - 1].port_state = IB_PORT_DOWN;
+			ib_dispatch_event(&event);
+		}
+		break;
+	case 0x31:
+		/* port configuration change
+		 * disruptive change is caused by
+		 * LID, PKEY or SM change
+		 */
+		ehca_warn(&shca->ib_device,
+			  "disruptive port %x configuration change", port);
+
+		ehca_info(&shca->ib_device,
+			 "port %x is inactive.", port);
+		event.device = &shca->ib_device;
+		event.event = IB_EVENT_PORT_ERR;
+		event.element.port_num = port;
+		shca->sport[port - 1].port_state = IB_PORT_DOWN;
+		ib_dispatch_event(&event);
+
+		ehca_info(&shca->ib_device,
+			 "port %x is active.", port);
+		event.device = &shca->ib_device;
+		event.event = IB_EVENT_PORT_ACTIVE;
+		event.element.port_num = port;
+		shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+		ib_dispatch_event(&event);
+		break;
+	case 0x32: /* adapter malfunction */
+		ehca_err(&shca->ib_device, "Adapter malfunction.");
+		break;
+	case 0x33:  /* trace stopped */
+		ehca_err(&shca->ib_device, "Traced stopped.");
+		break;
+	default:
+		ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
+			 ec, shca->ib_device.name);
+		break;
+	}
+
+	return;
+}
+
+static inline void reset_eq_pending(struct ehca_cq *cq)
+{
+	u64 CQx_EP;
+	struct h_galpa gal = cq->galpas.kernel;
+
+	hipz_galpa_store_cq(gal, cqx_ep, 0x0);
+	CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
+
+	return;
+}
+
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+	tasklet_hi_schedule(&shca->neq.interrupt_task);
+
+	return IRQ_HANDLED;
+}
+
+void ehca_tasklet_neq(unsigned long data)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)data;
+	struct ehca_eqe *eqe;
+	u64 ret;
+
+	eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+
+	while (eqe) {
+		if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
+			parse_ec(shca, eqe->entry);
+
+		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+	}
+
+	ret = hipz_h_reset_event(shca->ipz_hca_handle,
+				 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
+
+	if (ret != H_SUCCESS)
+		ehca_err(&shca->ib_device, "Can't clear notification events.");
+
+	return;
+}
+
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+	tasklet_hi_schedule(&shca->eq.interrupt_task);
+
+	return IRQ_HANDLED;
+}
+
+void ehca_tasklet_eq(unsigned long data)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)data;
+	struct ehca_eqe *eqe;
+	int int_state;
+	int query_cnt = 0;
+
+	do {
+		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+
+		if ((shca->hw_level >= 2) && eqe)
+			int_state = 1;
+		else
+			int_state = 0;
+
+		while ((int_state == 1) || eqe) {
+			while (eqe) {
+				u64 eqe_value = eqe->entry;
+
+				ehca_dbg(&shca->ib_device,
+					 "eqe_value=%lx", eqe_value);
+
+				/* TODO: better structure */
+				if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT,
+						   eqe_value)) {
+					unsigned long flags;
+					u32 token;
+					struct ehca_cq *cq;
+
+					ehca_dbg(&shca->ib_device,
+						 "... completion event");
+					token =
+						EHCA_BMASK_GET(EQE_CQ_TOKEN,
+							       eqe_value);
+					spin_lock_irqsave(&ehca_cq_idr_lock,
+							  flags);
+					cq = idr_find(&ehca_cq_idr, token);
+
+					if (cq == NULL) {
+						spin_unlock(&ehca_cq_idr_lock);
+						break;
+					}
+
+					reset_eq_pending(cq);
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+					queue_comp_task(cq);
+					spin_unlock_irqrestore(&ehca_cq_idr_lock,
+							       flags);
+#else
+					spin_unlock_irqrestore(&ehca_cq_idr_lock,
+							       flags);
+					comp_event_callback(cq);
+#endif
+				} else {
+					ehca_dbg(&shca->ib_device,
+						 "... non completion event");
+					parse_identifier(shca, eqe_value);
+				}
+				eqe =
+					(struct ehca_eqe *)ehca_poll_eq(shca,
+								    &shca->eq);
+			}
+
+			if (shca->hw_level >= 2) {
+				int_state =
+				    hipz_h_query_int_state(shca->ipz_hca_handle,
+							   shca->eq.ist);
+				query_cnt++;
+				iosync();
+				if (query_cnt >= 100) {
+					query_cnt = 0;
+					int_state = 0;
+				}
+			}
+			eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+
+		}
+	} while (int_state != 0);
+
+	return;
+}
+
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+
+static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
+{
+	unsigned long flags_last_cpu;
+
+	if (ehca_debug_level)
+		ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
+
+	spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
+	pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
+	if (pool->last_cpu == NR_CPUS)
+		pool->last_cpu = first_cpu(cpu_online_map);
+	spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+
+	return pool->last_cpu;
+}
+
+static void __queue_comp_task(struct ehca_cq *__cq,
+			      struct ehca_cpu_comp_task *cct)
+{
+	unsigned long flags_cct;
+	unsigned long flags_cq;
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+	spin_lock_irqsave(&__cq->task_lock, flags_cq);
+
+	if (__cq->nr_callbacks == 0) {
+		__cq->nr_callbacks++;
+		list_add_tail(&__cq->entry, &cct->cq_list);
+		cct->cq_jobs++;
+		wake_up(&cct->wait_queue);
+	}
+	else
+		__cq->nr_callbacks++;
+
+	spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+}
+
+static void queue_comp_task(struct ehca_cq *__cq)
+{
+	int cpu;
+	int cpu_id;
+	struct ehca_cpu_comp_task *cct;
+
+	cpu = get_cpu();
+	cpu_id = find_next_online_cpu(pool);
+
+	BUG_ON(!cpu_online(cpu_id));
+
+	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+
+	if (cct->cq_jobs > 0) {
+		cpu_id = find_next_online_cpu(pool);
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+	}
+
+	__queue_comp_task(__cq, cct);
+
+	put_cpu();
+
+	return;
+}
+
+static void run_comp_task(struct ehca_cpu_comp_task* cct)
+{
+	struct ehca_cq *cq;
+	unsigned long flags_cct;
+	unsigned long flags_cq;
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+	while (!list_empty(&cct->cq_list)) {
+		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+		spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+		comp_event_callback(cq);
+		spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+		spin_lock_irqsave(&cq->task_lock, flags_cq);
+		cq->nr_callbacks--;
+		if (cq->nr_callbacks == 0) {
+			list_del_init(cct->cq_list.next);
+			cct->cq_jobs--;
+		}
+		spin_unlock_irqrestore(&cq->task_lock, flags_cq);
+
+	}
+
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+	return;
+}
+
+static int comp_task(void *__cct)
+{
+	struct ehca_cpu_comp_task* cct = __cct;
+	DECLARE_WAITQUEUE(wait, current);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	while(!kthread_should_stop()) {
+		add_wait_queue(&cct->wait_queue, &wait);
+
+		if (list_empty(&cct->cq_list))
+			schedule();
+		else
+			__set_current_state(TASK_RUNNING);
+
+		remove_wait_queue(&cct->wait_queue, &wait);
+
+		if (!list_empty(&cct->cq_list))
+			run_comp_task(__cct);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
+static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
+					    int cpu)
+{
+	struct ehca_cpu_comp_task *cct;
+
+	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+	spin_lock_init(&cct->task_lock);
+	INIT_LIST_HEAD(&cct->cq_list);
+	init_waitqueue_head(&cct->wait_queue);
+	cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
+
+	return cct->task;
+}
+
+static void destroy_comp_task(struct ehca_comp_pool *pool,
+			      int cpu)
+{
+	struct ehca_cpu_comp_task *cct;
+	struct task_struct *task;
+	unsigned long flags_cct;
+
+	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+	task = cct->task;
+	cct->task = NULL;
+	cct->cq_jobs = 0;
+
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+	if (task)
+		kthread_stop(task);
+
+	return;
+}
+
+static void take_over_work(struct ehca_comp_pool *pool,
+			   int cpu)
+{
+	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+	LIST_HEAD(list);
+	struct ehca_cq *cq;
+	unsigned long flags_cct;
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+	list_splice_init(&cct->cq_list, &list);
+
+	while(!list_empty(&list)) {
+	       cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+
+	       list_del(&cq->entry);
+	       __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
+						 smp_processor_id()));
+	}
+
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+}
+
+static int comp_pool_callback(struct notifier_block *nfb,
+			      unsigned long action,
+			      void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct ehca_cpu_comp_task *cct;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
+		if(!create_comp_task(pool, cpu)) {
+			ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
+			return NOTIFY_BAD;
+		}
+		break;
+	case CPU_UP_CANCELED:
+		ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+		kthread_bind(cct->task, any_online_cpu(cpu_online_map));
+		destroy_comp_task(pool, cpu);
+		break;
+	case CPU_ONLINE:
+		ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+		kthread_bind(cct->task, cpu);
+		wake_up_process(cct->task);
+		break;
+	case CPU_DOWN_PREPARE:
+		ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
+		break;
+	case CPU_DOWN_FAILED:
+		ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
+		break;
+	case CPU_DEAD:
+		ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
+		destroy_comp_task(pool, cpu);
+		take_over_work(pool, cpu);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+#endif
+
+int ehca_create_comp_pool(void)
+{
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+	int cpu;
+	struct task_struct *task;
+
+	pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
+	if (pool == NULL)
+		return -ENOMEM;
+
+	spin_lock_init(&pool->last_cpu_lock);
+	pool->last_cpu = any_online_cpu(cpu_online_map);
+
+	pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
+	if (pool->cpu_comp_tasks == NULL) {
+		kfree(pool);
+		return -EINVAL;
+	}
+
+	for_each_online_cpu(cpu) {
+		task = create_comp_task(pool, cpu);
+		if (task) {
+			kthread_bind(task, cpu);
+			wake_up_process(task);
+		}
+	}
+
+	comp_pool_callback_nb.notifier_call = comp_pool_callback;
+	comp_pool_callback_nb.priority =0;
+	register_cpu_notifier(&comp_pool_callback_nb);
+#endif
+
+	return 0;
+}
+
+void ehca_destroy_comp_pool(void)
+{
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+	int i;
+
+	unregister_cpu_notifier(&comp_pool_callback_nb);
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (cpu_online(i))
+			destroy_comp_task(pool, i);
+	}
+#endif
+
+	return;
+}
--- a/drivers/infiniband/hw/ehca/ehca_irq.h
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@ -0,0 +1,77 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Function definitions and structs for EQs, NEQs and interrupts
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_IRQ_H
+#define __EHCA_IRQ_H
+
+
+struct ehca_shca;
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <asm/atomic.h>
+
+int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
+
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs);
+void ehca_tasklet_neq(unsigned long data);
+
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs);
+void ehca_tasklet_eq(unsigned long data);
+
+struct ehca_cpu_comp_task {
+	wait_queue_head_t wait_queue;
+	struct list_head cq_list;
+	struct task_struct *task;
+	spinlock_t task_lock;
+	int cq_jobs;
+};
+
+struct ehca_comp_pool {
+	struct ehca_cpu_comp_task *cpu_comp_tasks;
+	int last_cpu;
+	spinlock_t last_cpu_lock;
+};
+
+int ehca_create_comp_pool(void);
+void ehca_destroy_comp_pool(void);
+
+#endif
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@ -0,0 +1,182 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Function definitions for internal functions
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Dietmar Decker <ddecker@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_IVERBS_H__
+#define __EHCA_IVERBS_H__
+
+#include "ehca_classes.h"
+
+int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props);
+
+int ehca_query_port(struct ib_device *ibdev, u8 port,
+		    struct ib_port_attr *props);
+
+int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
+
+int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
+		   union ib_gid *gid);
+
+int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask,
+		     struct ib_port_modify *props);
+
+struct ib_pd *ehca_alloc_pd(struct ib_device *device,
+			    struct ib_ucontext *context,
+			    struct ib_udata *udata);
+
+int ehca_dealloc_pd(struct ib_pd *pd);
+
+struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+
+int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+int ehca_destroy_ah(struct ib_ah *ah);
+
+struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
+
+struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
+			       struct ib_phys_buf *phys_buf_array,
+			       int num_phys_buf,
+			       int mr_access_flags, u64 *iova_start);
+
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
+			       struct ib_umem *region,
+			       int mr_access_flags, struct ib_udata *udata);
+
+int ehca_rereg_phys_mr(struct ib_mr *mr,
+		       int mr_rereg_mask,
+		       struct ib_pd *pd,
+		       struct ib_phys_buf *phys_buf_array,
+		       int num_phys_buf, int mr_access_flags, u64 *iova_start);
+
+int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
+
+int ehca_dereg_mr(struct ib_mr *mr);
+
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd);
+
+int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+		 struct ib_mw_bind *mw_bind);
+
+int ehca_dealloc_mw(struct ib_mw *mw);
+
+struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
+			      int mr_access_flags,
+			      struct ib_fmr_attr *fmr_attr);
+
+int ehca_map_phys_fmr(struct ib_fmr *fmr,
+		      u64 *page_list, int list_len, u64 iova);
+
+int ehca_unmap_fmr(struct list_head *fmr_list);
+
+int ehca_dealloc_fmr(struct ib_fmr *fmr);
+
+enum ehca_eq_type {
+	EHCA_EQ = 0, /* Event Queue              */
+	EHCA_NEQ     /* Notification Event Queue */
+};
+
+int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq,
+		   enum ehca_eq_type type, const u32 length);
+
+int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
+
+void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
+
+
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+			     struct ib_ucontext *context,
+			     struct ib_udata *udata);
+
+int ehca_destroy_cq(struct ib_cq *cq);
+
+int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+
+int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+
+int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
+
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify);
+
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+			     struct ib_qp_init_attr *init_attr,
+			     struct ib_udata *udata);
+
+int ehca_destroy_qp(struct ib_qp *qp);
+
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		   struct ib_udata *udata);
+
+int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+
+int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
+		   struct ib_send_wr **bad_send_wr);
+
+int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
+		   struct ib_recv_wr **bad_recv_wr);
+
+u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
+		    struct ib_qp_init_attr *qp_init_attr);
+
+int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
+					struct ib_udata *udata);
+
+int ehca_dealloc_ucontext(struct ib_ucontext *context);
+
+int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+void ehca_poll_eqs(unsigned long data);
+
+int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped,
+		     struct vm_area_struct **vma);
+
+int ehca_mmap_register(u64 physical,void **mapped,
+		       struct vm_area_struct **vma);
+
+int ehca_munmap(unsigned long addr, size_t len);
+
+#endif
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@ -0,0 +1,818 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  module start stop, hca detection
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
+MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
+MODULE_VERSION("SVNEHCA_0016");
+
+int ehca_open_aqp1     = 0;
+int ehca_debug_level   = 0;
+int ehca_hw_level      = 0;
+int ehca_nr_ports      = 2;
+int ehca_use_hp_mr     = 0;
+int ehca_port_act_time = 30;
+int ehca_poll_all_eqs  = 1;
+int ehca_static_rate   = -1;
+
+module_param_named(open_aqp1,     ehca_open_aqp1,     int, 0);
+module_param_named(debug_level,   ehca_debug_level,   int, 0);
+module_param_named(hw_level,      ehca_hw_level,      int, 0);
+module_param_named(nr_ports,      ehca_nr_ports,      int, 0);
+module_param_named(use_hp_mr,     ehca_use_hp_mr,     int, 0);
+module_param_named(port_act_time, ehca_port_act_time, int, 0);
+module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  int, 0);
+module_param_named(static_rate,   ehca_static_rate,   int, 0);
+
+MODULE_PARM_DESC(open_aqp1,
+		 "AQP1 on startup (0: no (default), 1: yes)");
+MODULE_PARM_DESC(debug_level,
+		 "debug level"
+		 " (0: no debug traces (default), 1: with debug traces)");
+MODULE_PARM_DESC(hw_level,
+		 "hardware level"
+		 " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)");
+MODULE_PARM_DESC(nr_ports,
+		 "number of connected ports (default: 2)");
+MODULE_PARM_DESC(use_hp_mr,
+		 "high performance MRs (0: no (default), 1: yes)");
+MODULE_PARM_DESC(port_act_time,
+		 "time to wait for port activation (default: 30 sec)");
+MODULE_PARM_DESC(poll_all_eqs,
+		 "polls all event queues periodically"
+		 " (0: no, 1: yes (default))");
+MODULE_PARM_DESC(static_rate,
+		 "set permanent static rate (default: disabled)");
+
+spinlock_t ehca_qp_idr_lock;
+spinlock_t ehca_cq_idr_lock;
+DEFINE_IDR(ehca_qp_idr);
+DEFINE_IDR(ehca_cq_idr);
+
+static struct list_head shca_list; /* list of all registered ehcas */
+static spinlock_t shca_list_lock;
+
+static struct timer_list poll_eqs_timer;
+
+static int ehca_create_slab_caches(void)
+{
+	int ret;
+
+	ret = ehca_init_pd_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create PD SLAB cache.");
+		return ret;
+	}
+
+	ret = ehca_init_cq_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create CQ SLAB cache.");
+		goto create_slab_caches2;
+	}
+
+	ret = ehca_init_qp_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create QP SLAB cache.");
+		goto create_slab_caches3;
+	}
+
+	ret = ehca_init_av_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create AV SLAB cache.");
+		goto create_slab_caches4;
+	}
+
+	ret = ehca_init_mrmw_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create MR&MW SLAB cache.");
+		goto create_slab_caches5;
+	}
+
+	return 0;
+
+create_slab_caches5:
+	ehca_cleanup_av_cache();
+
+create_slab_caches4:
+	ehca_cleanup_qp_cache();
+
+create_slab_caches3:
+	ehca_cleanup_cq_cache();
+
+create_slab_caches2:
+	ehca_cleanup_pd_cache();
+
+	return ret;
+}
+
+static void ehca_destroy_slab_caches(void)
+{
+	ehca_cleanup_mrmw_cache();
+	ehca_cleanup_av_cache();
+	ehca_cleanup_qp_cache();
+	ehca_cleanup_cq_cache();
+	ehca_cleanup_pd_cache();
+}
+
+#define EHCA_HCAAVER  EHCA_BMASK_IBM(32,39)
+#define EHCA_REVID    EHCA_BMASK_IBM(40,63)
+
+int ehca_sense_attributes(struct ehca_shca *shca)
+{
+	int ret = 0;
+	u64 h_ret;
+	struct hipz_query_hca *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_gen_err("Cannot allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
+	if (h_ret != H_SUCCESS) {
+		ehca_gen_err("Cannot query device properties. h_ret=%lx",
+			     h_ret);
+		ret = -EPERM;
+		goto num_ports1;
+	}
+
+	if (ehca_nr_ports == 1)
+		shca->num_ports = 1;
+	else
+		shca->num_ports = (u8)rblock->num_ports;
+
+	ehca_gen_dbg(" ... found %x ports", rblock->num_ports);
+
+	if (ehca_hw_level == 0) {
+		u32 hcaaver;
+		u32 revid;
+
+		hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
+		revid   = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
+
+		ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
+
+		if ((hcaaver == 1) && (revid == 0))
+			shca->hw_level = 0;
+		else if ((hcaaver == 1) && (revid == 1))
+			shca->hw_level = 1;
+		else if ((hcaaver == 1) && (revid == 2))
+			shca->hw_level = 2;
+	}
+	ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
+
+	shca->sport[0].rate = IB_RATE_30_GBPS;
+	shca->sport[1].rate = IB_RATE_30_GBPS;
+
+num_ports1:
+	kfree(rblock);
+	return ret;
+}
+
+static int init_node_guid(struct ehca_shca *shca)
+{
+	int ret = 0;
+	struct hipz_query_hca *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query device properties");
+		ret = -EINVAL;
+		goto init_node_guid1;
+	}
+
+	memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
+
+init_node_guid1:
+	kfree(rblock);
+	return ret;
+}
+
+int ehca_register_device(struct ehca_shca *shca)
+{
+	int ret;
+
+	ret = init_node_guid(shca);
+	if (ret)
+		return ret;
+
+	strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
+	shca->ib_device.owner               = THIS_MODULE;
+
+	shca->ib_device.uverbs_abi_ver	    = 5;
+	shca->ib_device.uverbs_cmd_mask	    =
+		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
+		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
+		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
+		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
+		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
+		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
+
+	shca->ib_device.node_type           = RDMA_NODE_IB_CA;
+	shca->ib_device.phys_port_cnt       = shca->num_ports;
+	shca->ib_device.dma_device          = &shca->ibmebus_dev->ofdev.dev;
+	shca->ib_device.query_device        = ehca_query_device;
+	shca->ib_device.query_port          = ehca_query_port;
+	shca->ib_device.query_gid           = ehca_query_gid;
+	shca->ib_device.query_pkey          = ehca_query_pkey;
+	/* shca->in_device.modify_device    = ehca_modify_device    */
+	shca->ib_device.modify_port         = ehca_modify_port;
+	shca->ib_device.alloc_ucontext      = ehca_alloc_ucontext;
+	shca->ib_device.dealloc_ucontext    = ehca_dealloc_ucontext;
+	shca->ib_device.alloc_pd            = ehca_alloc_pd;
+	shca->ib_device.dealloc_pd          = ehca_dealloc_pd;
+	shca->ib_device.create_ah	    = ehca_create_ah;
+	/* shca->ib_device.modify_ah	    = ehca_modify_ah;	    */
+	shca->ib_device.query_ah	    = ehca_query_ah;
+	shca->ib_device.destroy_ah	    = ehca_destroy_ah;
+	shca->ib_device.create_qp	    = ehca_create_qp;
+	shca->ib_device.modify_qp	    = ehca_modify_qp;
+	shca->ib_device.query_qp	    = ehca_query_qp;
+	shca->ib_device.destroy_qp	    = ehca_destroy_qp;
+	shca->ib_device.post_send	    = ehca_post_send;
+	shca->ib_device.post_recv	    = ehca_post_recv;
+	shca->ib_device.create_cq	    = ehca_create_cq;
+	shca->ib_device.destroy_cq	    = ehca_destroy_cq;
+	shca->ib_device.resize_cq	    = ehca_resize_cq;
+	shca->ib_device.poll_cq		    = ehca_poll_cq;
+	/* shca->ib_device.peek_cq	    = ehca_peek_cq;	    */
+	shca->ib_device.req_notify_cq	    = ehca_req_notify_cq;
+	/* shca->ib_device.req_ncomp_notif  = ehca_req_ncomp_notif; */
+	shca->ib_device.get_dma_mr	    = ehca_get_dma_mr;
+	shca->ib_device.reg_phys_mr	    = ehca_reg_phys_mr;
+	shca->ib_device.reg_user_mr	    = ehca_reg_user_mr;
+	shca->ib_device.query_mr	    = ehca_query_mr;
+	shca->ib_device.dereg_mr	    = ehca_dereg_mr;
+	shca->ib_device.rereg_phys_mr	    = ehca_rereg_phys_mr;
+	shca->ib_device.alloc_mw	    = ehca_alloc_mw;
+	shca->ib_device.bind_mw		    = ehca_bind_mw;
+	shca->ib_device.dealloc_mw	    = ehca_dealloc_mw;
+	shca->ib_device.alloc_fmr	    = ehca_alloc_fmr;
+	shca->ib_device.map_phys_fmr	    = ehca_map_phys_fmr;
+	shca->ib_device.unmap_fmr	    = ehca_unmap_fmr;
+	shca->ib_device.dealloc_fmr	    = ehca_dealloc_fmr;
+	shca->ib_device.attach_mcast	    = ehca_attach_mcast;
+	shca->ib_device.detach_mcast	    = ehca_detach_mcast;
+	/* shca->ib_device.process_mad	    = ehca_process_mad;	    */
+	shca->ib_device.mmap		    = ehca_mmap;
+
+	ret = ib_register_device(&shca->ib_device);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "ib_register_device() failed ret=%x", ret);
+
+	return ret;
+}
+
+static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
+{
+	struct ehca_sport *sport = &shca->sport[port - 1];
+	struct ib_cq *ibcq;
+	struct ib_qp *ibqp;
+	struct ib_qp_init_attr qp_init_attr;
+	int ret;
+
+	if (sport->ibcq_aqp1) {
+		ehca_err(&shca->ib_device, "AQP1 CQ is already created.");
+		return -EPERM;
+	}
+
+	ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10);
+	if (IS_ERR(ibcq)) {
+		ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
+		return PTR_ERR(ibcq);
+	}
+	sport->ibcq_aqp1 = ibcq;
+
+	if (sport->ibqp_aqp1) {
+		ehca_err(&shca->ib_device, "AQP1 QP is already created.");
+		ret = -EPERM;
+		goto create_aqp1;
+	}
+
+	memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
+	qp_init_attr.send_cq          = ibcq;
+	qp_init_attr.recv_cq          = ibcq;
+	qp_init_attr.sq_sig_type      = IB_SIGNAL_ALL_WR;
+	qp_init_attr.cap.max_send_wr  = 100;
+	qp_init_attr.cap.max_recv_wr  = 100;
+	qp_init_attr.cap.max_send_sge = 2;
+	qp_init_attr.cap.max_recv_sge = 1;
+	qp_init_attr.qp_type          = IB_QPT_GSI;
+	qp_init_attr.port_num         = port;
+	qp_init_attr.qp_context       = NULL;
+	qp_init_attr.event_handler    = NULL;
+	qp_init_attr.srq              = NULL;
+
+	ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
+	if (IS_ERR(ibqp)) {
+		ehca_err(&shca->ib_device, "Cannot create AQP1 QP.");
+		ret = PTR_ERR(ibqp);
+		goto create_aqp1;
+	}
+	sport->ibqp_aqp1 = ibqp;
+
+	return 0;
+
+create_aqp1:
+	ib_destroy_cq(sport->ibcq_aqp1);
+	return ret;
+}
+
+static int ehca_destroy_aqp1(struct ehca_sport *sport)
+{
+	int ret;
+
+	ret = ib_destroy_qp(sport->ibqp_aqp1);
+	if (ret) {
+		ehca_gen_err("Cannot destroy AQP1 QP. ret=%x", ret);
+		return ret;
+	}
+
+	ret = ib_destroy_cq(sport->ibcq_aqp1);
+	if (ret)
+		ehca_gen_err("Cannot destroy AQP1 CQ. ret=%x", ret);
+
+	return ret;
+}
+
+static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
+{
+	return  snprintf(buf, PAGE_SIZE, "%d\n",
+			 ehca_debug_level);
+}
+
+static ssize_t ehca_store_debug_level(struct device_driver *ddp,
+				      const char *buf, size_t count)
+{
+	int value = (*buf) - '0';
+	if (value >= 0 && value <= 9)
+		ehca_debug_level = value;
+	return 1;
+}
+
+DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
+	    ehca_show_debug_level, ehca_store_debug_level);
+
+void ehca_create_driver_sysfs(struct ibmebus_driver *drv)
+{
+	driver_create_file(&drv->driver, &driver_attr_debug_level);
+}
+
+void ehca_remove_driver_sysfs(struct ibmebus_driver *drv)
+{
+	driver_remove_file(&drv->driver, &driver_attr_debug_level);
+}
+
+#define EHCA_RESOURCE_ATTR(name)                                           \
+static ssize_t  ehca_show_##name(struct device *dev,                       \
+				 struct device_attribute *attr,            \
+				 char *buf)                                \
+{									   \
+	struct ehca_shca *shca;						   \
+	struct hipz_query_hca *rblock;				           \
+	int data;                                                          \
+									   \
+	shca = dev->driver_data;					   \
+									   \
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);			   \
+	if (!rblock) {						           \
+		dev_err(dev, "Can't allocate rblock memory.");		   \
+		return 0;						   \
+	}								   \
+									   \
+	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
+		dev_err(dev, "Can't query device properties");	   	   \
+		kfree(rblock);					   	   \
+		return 0;					   	   \
+	}								   \
+									   \
+	data = rblock->name;                                               \
+	kfree(rblock);                                                     \
+									   \
+	if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1))	   \
+		return snprintf(buf, 256, "1\n");			   \
+	else								   \
+		return snprintf(buf, 256, "%d\n", data);		   \
+									   \
+}									   \
+static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
+
+EHCA_RESOURCE_ATTR(num_ports);
+EHCA_RESOURCE_ATTR(hw_ver);
+EHCA_RESOURCE_ATTR(max_eq);
+EHCA_RESOURCE_ATTR(cur_eq);
+EHCA_RESOURCE_ATTR(max_cq);
+EHCA_RESOURCE_ATTR(cur_cq);
+EHCA_RESOURCE_ATTR(max_qp);
+EHCA_RESOURCE_ATTR(cur_qp);
+EHCA_RESOURCE_ATTR(max_mr);
+EHCA_RESOURCE_ATTR(cur_mr);
+EHCA_RESOURCE_ATTR(max_mw);
+EHCA_RESOURCE_ATTR(cur_mw);
+EHCA_RESOURCE_ATTR(max_pd);
+EHCA_RESOURCE_ATTR(max_ah);
+
+static ssize_t ehca_show_adapter_handle(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct ehca_shca *shca = dev->driver_data;
+
+	return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle);
+
+}
+static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
+
+
+void ehca_create_device_sysfs(struct ibmebus_dev *dev)
+{
+	device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
+	device_create_file(&dev->ofdev.dev, &dev_attr_num_ports);
+	device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_eq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_cq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_qp);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_mr);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_mw);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_pd);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_ah);
+}
+
+void ehca_remove_device_sysfs(struct ibmebus_dev *dev)
+{
+	device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah);
+}
+
+static int __devinit ehca_probe(struct ibmebus_dev *dev,
+				const struct of_device_id *id)
+{
+	struct ehca_shca *shca;
+	u64 *handle;
+	struct ib_pd *ibpd;
+	int ret;
+
+	handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL);
+	if (!handle) {
+		ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
+			     dev->ofdev.node->full_name);
+		return -ENODEV;
+	}
+
+	if (!(*handle)) {
+		ehca_gen_err("Wrong eHCA handle for adapter: %s.",
+			     dev->ofdev.node->full_name);
+		return -ENODEV;
+	}
+
+	shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
+	if (!shca) {
+		ehca_gen_err("Cannot allocate shca memory.");
+		return -ENOMEM;
+	}
+
+	shca->ibmebus_dev = dev;
+	shca->ipz_hca_handle.handle = *handle;
+	dev->ofdev.dev.driver_data = shca;
+
+	ret = ehca_sense_attributes(shca);
+	if (ret < 0) {
+		ehca_gen_err("Cannot sense eHCA attributes.");
+		goto probe1;
+	}
+
+	ret = ehca_register_device(shca);
+	if (ret) {
+		ehca_gen_err("Cannot register Infiniband device");
+		goto probe1;
+	}
+
+	/* create event queues */
+	ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048);
+	if (ret) {
+		ehca_err(&shca->ib_device, "Cannot create EQ.");
+		goto probe2;
+	}
+
+	ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
+	if (ret) {
+		ehca_err(&shca->ib_device, "Cannot create NEQ.");
+		goto probe3;
+	}
+
+	/* create internal protection domain */
+	ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL);
+	if (IS_ERR(ibpd)) {
+		ehca_err(&shca->ib_device, "Cannot create internal PD.");
+		ret = PTR_ERR(ibpd);
+		goto probe4;
+	}
+
+	shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
+	shca->pd->ib_pd.device = &shca->ib_device;
+
+	/* create internal max MR */
+	ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
+
+	if (ret) {
+		ehca_err(&shca->ib_device, "Cannot create internal MR ret=%x",
+			 ret);
+		goto probe5;
+	}
+
+	/* create AQP1 for port 1 */
+	if (ehca_open_aqp1 == 1) {
+		shca->sport[0].port_state = IB_PORT_DOWN;
+		ret = ehca_create_aqp1(shca, 1);
+		if (ret) {
+			ehca_err(&shca->ib_device,
+				 "Cannot create AQP1 for port 1.");
+			goto probe6;
+		}
+	}
+
+	/* create AQP1 for port 2 */
+	if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
+		shca->sport[1].port_state = IB_PORT_DOWN;
+		ret = ehca_create_aqp1(shca, 2);
+		if (ret) {
+			ehca_err(&shca->ib_device,
+				 "Cannot create AQP1 for port 2.");
+			goto probe7;
+		}
+	}
+
+	ehca_create_device_sysfs(dev);
+
+	spin_lock(&shca_list_lock);
+	list_add(&shca->shca_list, &shca_list);
+	spin_unlock(&shca_list_lock);
+
+	return 0;
+
+probe7:
+	ret = ehca_destroy_aqp1(&shca->sport[0]);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy AQP1 for port 1. ret=%x", ret);
+
+probe6:
+	ret = ehca_dereg_internal_maxmr(shca);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal MR. ret=%x", ret);
+
+probe5:
+	ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal PD. ret=%x", ret);
+
+probe4:
+	ret = ehca_destroy_eq(shca, &shca->neq);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy NEQ. ret=%x", ret);
+
+probe3:
+	ret = ehca_destroy_eq(shca, &shca->eq);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy EQ. ret=%x", ret);
+
+probe2:
+	ib_unregister_device(&shca->ib_device);
+
+probe1:
+	ib_dealloc_device(&shca->ib_device);
+
+	return -EINVAL;
+}
+
+static int __devexit ehca_remove(struct ibmebus_dev *dev)
+{
+	struct ehca_shca *shca = dev->ofdev.dev.driver_data;
+	int ret;
+
+	ehca_remove_device_sysfs(dev);
+
+	if (ehca_open_aqp1 == 1) {
+		int i;
+		for (i = 0; i < shca->num_ports; i++) {
+			ret = ehca_destroy_aqp1(&shca->sport[i]);
+			if (ret)
+				ehca_err(&shca->ib_device,
+					 "Cannot destroy AQP1 for port %x "
+					 "ret=%x", ret, i);
+		}
+	}
+
+	ib_unregister_device(&shca->ib_device);
+
+	ret = ehca_dereg_internal_maxmr(shca);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal MR. ret=%x", ret);
+
+	ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal PD. ret=%x", ret);
+
+	ret = ehca_destroy_eq(shca, &shca->eq);
+	if (ret)
+		ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret);
+
+	ret = ehca_destroy_eq(shca, &shca->neq);
+	if (ret)
+		ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%x", ret);
+
+	ib_dealloc_device(&shca->ib_device);
+
+	spin_lock(&shca_list_lock);
+	list_del(&shca->shca_list);
+	spin_unlock(&shca_list_lock);
+
+	return ret;
+}
+
+static struct of_device_id ehca_device_table[] =
+{
+	{
+		.name       = "lhca",
+		.compatible = "IBM,lhca",
+	},
+	{},
+};
+
+static struct ibmebus_driver ehca_driver = {
+	.name     = "ehca",
+	.id_table = ehca_device_table,
+	.probe    = ehca_probe,
+	.remove   = ehca_remove,
+};
+
+void ehca_poll_eqs(unsigned long data)
+{
+	struct ehca_shca *shca;
+
+	spin_lock(&shca_list_lock);
+	list_for_each_entry(shca, &shca_list, shca_list) {
+		if (shca->eq.is_initialized)
+			ehca_tasklet_eq((unsigned long)(void*)shca);
+	}
+	mod_timer(&poll_eqs_timer, jiffies + HZ);
+	spin_unlock(&shca_list_lock);
+}
+
+int __init ehca_module_init(void)
+{
+	int ret;
+
+	printk(KERN_INFO "eHCA Infiniband Device Driver "
+	                 "(Rel.: SVNEHCA_0016)\n");
+	idr_init(&ehca_qp_idr);
+	idr_init(&ehca_cq_idr);
+	spin_lock_init(&ehca_qp_idr_lock);
+	spin_lock_init(&ehca_cq_idr_lock);
+
+	INIT_LIST_HEAD(&shca_list);
+	spin_lock_init(&shca_list_lock);
+
+	if ((ret = ehca_create_comp_pool())) {
+		ehca_gen_err("Cannot create comp pool.");
+		return ret;
+	}
+
+	if ((ret = ehca_create_slab_caches())) {
+		ehca_gen_err("Cannot create SLAB caches");
+		ret = -ENOMEM;
+		goto module_init1;
+	}
+
+	if ((ret = ibmebus_register_driver(&ehca_driver))) {
+		ehca_gen_err("Cannot register eHCA device driver");
+		ret = -EINVAL;
+		goto module_init2;
+	}
+
+	ehca_create_driver_sysfs(&ehca_driver);
+
+	if (ehca_poll_all_eqs != 1) {
+		ehca_gen_err("WARNING!!!");
+		ehca_gen_err("It is possible to lose interrupts.");
+	} else {
+		init_timer(&poll_eqs_timer);
+		poll_eqs_timer.function = ehca_poll_eqs;
+		poll_eqs_timer.expires = jiffies + HZ;
+		add_timer(&poll_eqs_timer);
+	}
+
+	return 0;
+
+module_init2:
+	ehca_destroy_slab_caches();
+
+module_init1:
+	ehca_destroy_comp_pool();
+	return ret;
+};
+
+void __exit ehca_module_exit(void)
+{
+	if (ehca_poll_all_eqs == 1)
+		del_timer_sync(&poll_eqs_timer);
+
+	ehca_remove_driver_sysfs(&ehca_driver);
+	ibmebus_unregister_driver(&ehca_driver);
+
+	ehca_destroy_slab_caches();
+
+	ehca_destroy_comp_pool();
+
+	idr_destroy(&ehca_cq_idr);
+	idr_destroy(&ehca_qp_idr);
+};
+
+module_init(ehca_module_init);
+module_exit(ehca_module_exit);
--- a/drivers/infiniband/hw/ehca/ehca_mcast.c
+++ b/drivers/infiniband/hw/ehca/ehca_mcast.c
@ -0,0 +1,131 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  mcast  functions
+ *
+ *  Authors: Khadija Souissi <souissik@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+#define MAX_MC_LID 0xFFFE
+#define MIN_MC_LID 0xC000	/* Multicast limits */
+#define EHCA_VALID_MULTICAST_GID(gid)  ((gid)[0] == 0xFF)
+#define EHCA_VALID_MULTICAST_LID(lid) \
+	(((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID))
+
+int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+					      ib_device);
+	union ib_gid my_gid;
+	u64 subnet_prefix, interface_id, h_ret;
+
+	if (ibqp->qp_type != IB_QPT_UD) {
+		ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type);
+		return -EINVAL;
+	}
+
+	if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
+		ehca_err(ibqp->device, "invalid mulitcast gid");
+		return -EINVAL;
+	} else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
+		ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
+		return -EINVAL;
+	}
+
+	memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+
+	subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
+	interface_id = be64_to_cpu(my_gid.global.interface_id);
+	h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle,
+				   my_qp->ipz_qp_handle,
+				   my_qp->galpas.kernel,
+				   lid, subnet_prefix, interface_id);
+	if (h_ret != H_SUCCESS)
+		ehca_err(ibqp->device,
+			 "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
+			 "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+
+	return ehca2ib_return_code(h_ret);
+}
+
+int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca = container_of(ibqp->pd->device,
+					      struct ehca_shca, ib_device);
+	union ib_gid my_gid;
+	u64 subnet_prefix, interface_id, h_ret;
+
+	if (ibqp->qp_type != IB_QPT_UD) {
+		ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type);
+		return -EINVAL;
+	}
+
+	if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
+		ehca_err(ibqp->device, "invalid mulitcast gid");
+		return -EINVAL;
+	} else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
+		ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
+		return -EINVAL;
+	}
+
+	memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+
+	subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
+	interface_id = be64_to_cpu(my_gid.global.interface_id);
+	h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle,
+				   my_qp->ipz_qp_handle,
+				   my_qp->galpas.kernel,
+				   lid, subnet_prefix, interface_id);
+	if (h_ret != H_SUCCESS)
+		ehca_err(ibqp->device,
+			 "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
+			 "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+
+	return ehca2ib_return_code(h_ret);
+}
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.h
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h
@ -0,0 +1,140 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  MR/MW declarations and inline functions
+ *
+ *  Authors: Dietmar Decker <ddecker@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _EHCA_MRMW_H_
+#define _EHCA_MRMW_H_
+
+int ehca_reg_mr(struct ehca_shca *shca,
+		struct ehca_mr *e_mr,
+		u64 *iova_start,
+		u64 size,
+		int acl,
+		struct ehca_pd *e_pd,
+		struct ehca_mr_pginfo *pginfo,
+		u32 *lkey,
+		u32 *rkey);
+
+int ehca_reg_mr_rpages(struct ehca_shca *shca,
+		       struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo);
+
+int ehca_rereg_mr(struct ehca_shca *shca,
+		  struct ehca_mr *e_mr,
+		  u64 *iova_start,
+		  u64 size,
+		  int mr_access_flags,
+		  struct ehca_pd *e_pd,
+		  struct ehca_mr_pginfo *pginfo,
+		  u32 *lkey,
+		  u32 *rkey);
+
+int ehca_unmap_one_fmr(struct ehca_shca *shca,
+		       struct ehca_mr *e_fmr);
+
+int ehca_reg_smr(struct ehca_shca *shca,
+		 struct ehca_mr *e_origmr,
+		 struct ehca_mr *e_newmr,
+		 u64 *iova_start,
+		 int acl,
+		 struct ehca_pd *e_pd,
+		 u32 *lkey,
+		 u32 *rkey);
+
+int ehca_reg_internal_maxmr(struct ehca_shca *shca,
+			    struct ehca_pd *e_pd,
+			    struct ehca_mr **maxmr);
+
+int ehca_reg_maxmr(struct ehca_shca *shca,
+		   struct ehca_mr *e_newmr,
+		   u64 *iova_start,
+		   int acl,
+		   struct ehca_pd *e_pd,
+		   u32 *lkey,
+		   u32 *rkey);
+
+int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
+
+int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
+				  int num_phys_buf,
+				  u64 *iova_start,
+				  u64 *size);
+
+int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
+			     u64 *page_list,
+			     int list_len);
+
+int ehca_set_pagebuf(struct ehca_mr *e_mr,
+		     struct ehca_mr_pginfo *pginfo,
+		     u32 number,
+		     u64 *kpage);
+
+int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo,
+		       u64 *rpage);
+
+int ehca_mr_is_maxmr(u64 size,
+		     u64 *iova_start);
+
+void ehca_mrmw_map_acl(int ib_acl,
+		       u32 *hipz_acl);
+
+void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl);
+
+void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
+			       int *ib_acl);
+
+int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc);
+
+void ehca_mr_deletenew(struct ehca_mr *mr);
+
+#endif  /*_EHCA_MRMW_H_*/
--- a/drivers/infiniband/hw/ehca/ehca_pd.c
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@ -0,0 +1,114 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  PD functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+
+static struct kmem_cache *pd_cache;
+
+struct ib_pd *ehca_alloc_pd(struct ib_device *device,
+			    struct ib_ucontext *context, struct ib_udata *udata)
+{
+	struct ehca_pd *pd;
+
+	pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL);
+	if (!pd) {
+		ehca_err(device, "device=%p context=%p out of memory",
+			 device, context);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(pd, 0, sizeof(struct ehca_pd));
+	pd->ownpid = current->tgid;
+
+	/*
+	 * Kernel PD: when device = -1, 0
+	 * User   PD: when context != -1
+	 */
+	if (!context) {
+		/*
+		 * Kernel PDs after init reuses always
+		 * the one created in ehca_shca_reopen()
+		 */
+		struct ehca_shca *shca = container_of(device, struct ehca_shca,
+						      ib_device);
+		pd->fw_pd.value = shca->pd->fw_pd.value;
+	} else
+		pd->fw_pd.value = (u64)pd;
+
+	return &pd->ib_pd;
+}
+
+int ehca_dealloc_pd(struct ib_pd *pd)
+{
+	u32 cur_pid = current->tgid;
+	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(pd->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	kmem_cache_free(pd_cache,
+			container_of(pd, struct ehca_pd, ib_pd));
+
+	return 0;
+}
+
+int ehca_init_pd_cache(void)
+{
+	pd_cache = kmem_cache_create("ehca_cache_pd",
+				     sizeof(struct ehca_pd), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!pd_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_pd_cache(void)
+{
+	if (pd_cache)
+		kmem_cache_destroy(pd_cache);
+}
--- a/drivers/infiniband/hw/ehca/ehca_qes.h
+++ b/drivers/infiniband/hw/ehca/ehca_qes.h
@ -0,0 +1,259 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Hardware request structures
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _EHCA_QES_H_
+#define _EHCA_QES_H_
+
+#include "ehca_tools.h"
+
+/* virtual scatter gather entry to specify remote adresses with length */
+struct ehca_vsgentry {
+	u64 vaddr;
+	u32 lkey;
+	u32 length;
+};
+
+#define GRH_FLAG_MASK        EHCA_BMASK_IBM(7,7)
+#define GRH_IPVERSION_MASK   EHCA_BMASK_IBM(0,3)
+#define GRH_TCLASS_MASK      EHCA_BMASK_IBM(4,12)
+#define GRH_FLOWLABEL_MASK   EHCA_BMASK_IBM(13,31)
+#define GRH_PAYLEN_MASK      EHCA_BMASK_IBM(32,47)
+#define GRH_NEXTHEADER_MASK  EHCA_BMASK_IBM(48,55)
+#define GRH_HOPLIMIT_MASK    EHCA_BMASK_IBM(56,63)
+
+/*
+ * Unreliable Datagram Address Vector Format
+ * see IBTA Vol1 chapter 8.3 Global Routing Header
+ */
+struct ehca_ud_av {
+	u8 sl;
+	u8 lnh;
+	u16 dlid;
+	u8 reserved1;
+	u8 reserved2;
+	u8 reserved3;
+	u8 slid_path_bits;
+	u8 reserved4;
+	u8 ipd;
+	u8 reserved5;
+	u8 pmtu;
+	u32 reserved6;
+	u64 reserved7;
+	union {
+		struct {
+			u64 word_0; /* always set to 6  */
+			/*should be 0x1B for IB transport */
+			u64 word_1;
+			u64 word_2;
+			u64 word_3;
+			u64 word_4;
+		} grh;
+		struct {
+			u32 wd_0;
+			u32 wd_1;
+			/* DWord_1 --> SGID */
+
+			u32 sgid_wd3;
+			u32 sgid_wd2;
+
+			u32 sgid_wd1;
+			u32 sgid_wd0;
+			/* DWord_3 --> DGID */
+
+			u32 dgid_wd3;
+			u32 dgid_wd2;
+
+			u32 dgid_wd1;
+			u32 dgid_wd0;
+		} grh_l;
+	};
+};
+
+/* maximum number of sg entries allowed in a WQE */
+#define MAX_WQE_SG_ENTRIES 252
+
+#define WQE_OPTYPE_SEND             0x80
+#define WQE_OPTYPE_RDMAREAD         0x40
+#define WQE_OPTYPE_RDMAWRITE        0x20
+#define WQE_OPTYPE_CMPSWAP          0x10
+#define WQE_OPTYPE_FETCHADD         0x08
+#define WQE_OPTYPE_BIND             0x04
+
+#define WQE_WRFLAG_REQ_SIGNAL_COM   0x80
+#define WQE_WRFLAG_FENCE            0x40
+#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20
+#define WQE_WRFLAG_SOLIC_EVENT      0x10
+
+#define WQEF_CACHE_HINT             0x80
+#define WQEF_CACHE_HINT_RD_WR       0x40
+#define WQEF_TIMED_WQE              0x20
+#define WQEF_PURGE                  0x08
+#define WQEF_HIGH_NIBBLE            0xF0
+
+#define MW_BIND_ACCESSCTRL_R_WRITE   0x40
+#define MW_BIND_ACCESSCTRL_R_READ    0x20
+#define MW_BIND_ACCESSCTRL_R_ATOMIC  0x10
+
+struct ehca_wqe {
+	u64 work_request_id;
+	u8 optype;
+	u8 wr_flag;
+	u16 pkeyi;
+	u8 wqef;
+	u8 nr_of_data_seg;
+	u16 wqe_provided_slid;
+	u32 destination_qp_number;
+	u32 resync_psn_sqp;
+	u32 local_ee_context_qkey;
+	u32 immediate_data;
+	union {
+		struct {
+			u64 remote_virtual_adress;
+			u32 rkey;
+			u32 reserved;
+			u64 atomic_1st_op_dma_len;
+			u64 atomic_2nd_op;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+
+		} nud;
+		struct {
+			u64 ehca_ud_av_ptr;
+			u64 reserved1;
+			u64 reserved2;
+			u64 reserved3;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+		} ud_avp;
+		struct {
+			struct ehca_ud_av ud_av;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES -
+						     2];
+		} ud_av;
+		struct {
+			u64 reserved0;
+			u64 reserved1;
+			u64 reserved2;
+			u64 reserved3;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+		} all_rcv;
+
+		struct {
+			u64 reserved;
+			u32 rkey;
+			u32 old_rkey;
+			u64 reserved1;
+			u64 reserved2;
+			u64 virtual_address;
+			u32 reserved3;
+			u32 length;
+			u32 reserved4;
+			u16 reserved5;
+			u8 reserved6;
+			u8 lr_ctl;
+			u32 lkey;
+			u32 reserved7;
+			u64 reserved8;
+			u64 reserved9;
+			u64 reserved10;
+			u64 reserved11;
+		} bind;
+		struct {
+			u64 reserved12;
+			u64 reserved13;
+			u32 size;
+			u32 start;
+		} inline_data;
+	} u;
+
+};
+
+#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0)
+#define WC_IMM_DATA     EHCA_BMASK_IBM(1,1)
+#define WC_GRH_PRESENT  EHCA_BMASK_IBM(2,2)
+#define WC_SE_BIT       EHCA_BMASK_IBM(3,3)
+#define WC_STATUS_ERROR_BIT 0x80000000
+#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
+#define WC_STATUS_PURGE_BIT 0x10
+
+struct ehca_cqe {
+	u64 work_request_id;
+	u8 optype;
+	u8 w_completion_flags;
+	u16 reserved1;
+	u32 nr_bytes_transferred;
+	u32 immediate_data;
+	u32 local_qp_number;
+	u8 freed_resource_count;
+	u8 service_level;
+	u16 wqe_count;
+	u32 qp_token;
+	u32 qkey_ee_token;
+	u32 remote_qp_number;
+	u16 dlid;
+	u16 rlid;
+	u16 reserved2;
+	u16 pkey_index;
+	u32 cqe_timestamp;
+	u32 wqe_timestamp;
+	u8 wqe_timestamp_valid;
+	u8 reserved3;
+	u8 reserved4;
+	u8 cqe_flags;
+	u32 status;
+};
+
+struct ehca_eqe {
+	u64 entry;
+};
+
+struct ehca_mrte {
+	u64 starting_va;
+	u64 length; /* length of memory region in bytes*/
+	u32 pd;
+	u8 key_instance;
+	u8 pagesize;
+	u8 mr_control;
+	u8 local_remote_access_ctrl;
+	u8 reserved[0x20 - 0x18];
+	u64 at_pointer[4];
+};
+#endif /*_EHCA_QES_H_*/
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@ -0,0 +1,653 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  post_send/recv, poll_cq, req_notify
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm-powerpc/system.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
+				  struct ehca_wqe *wqe_p,
+				  struct ib_recv_wr *recv_wr)
+{
+	u8 cnt_ds;
+	if (unlikely((recv_wr->num_sge < 0) ||
+		     (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {
+		ehca_gen_err("Invalid number of WQE SGE. "
+			 "num_sqe=%x max_nr_of_sg=%x",
+			 recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);
+		return -EINVAL; /* invalid SG list length */
+	}
+
+	/* clear wqe header until sglist */
+	memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
+
+	wqe_p->work_request_id = recv_wr->wr_id;
+	wqe_p->nr_of_data_seg = recv_wr->num_sge;
+
+	for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
+		wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =
+			recv_wr->sg_list[cnt_ds].addr;
+		wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =
+			recv_wr->sg_list[cnt_ds].lkey;
+		wqe_p->u.all_rcv.sg_list[cnt_ds].length =
+			recv_wr->sg_list[cnt_ds].length;
+	}
+
+	if (ehca_debug_level) {
+		ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue);
+		ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
+	}
+
+	return 0;
+}
+
+#if defined(DEBUG_GSI_SEND_WR)
+
+/* need ib_mad struct */
+#include <rdma/ib_mad.h>
+
+static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
+{
+	int idx;
+	int j;
+	while (send_wr) {
+		struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;
+		struct ib_sge *sge = send_wr->sg_list;
+		ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "
+			     "send_flags=%x opcode=%x",idx, send_wr->wr_id,
+			     send_wr->num_sge, send_wr->send_flags,
+			     send_wr->opcode);
+		if (mad_hdr) {
+			ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x "
+				     "mgmt_class=%x class_version=%x method=%x "
+				     "status=%x class_specific=%x tid=%lx "
+				     "attr_id=%x resv=%x attr_mod=%x",
+				     idx, mad_hdr->base_version,
+				     mad_hdr->mgmt_class,
+				     mad_hdr->class_version, mad_hdr->method,
+				     mad_hdr->status, mad_hdr->class_specific,
+				     mad_hdr->tid, mad_hdr->attr_id,
+				     mad_hdr->resv,
+				     mad_hdr->attr_mod);
+		}
+		for (j = 0; j < send_wr->num_sge; j++) {
+			u8 *data = (u8 *) abs_to_virt(sge->addr);
+			ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
+				     "lkey=%x",
+				     idx, j, data, sge->length, sge->lkey);
+			/* assume length is n*16 */
+			ehca_dmp(data, sge->length, "send_wr#%x sge#%x",
+				 idx, j);
+			sge++;
+		} /* eof for j */
+		idx++;
+		send_wr = send_wr->next;
+	} /* eof while send_wr */
+}
+
+#endif /* DEBUG_GSI_SEND_WR */
+
+static inline int ehca_write_swqe(struct ehca_qp *qp,
+				  struct ehca_wqe *wqe_p,
+				  const struct ib_send_wr *send_wr)
+{
+	u32 idx;
+	u64 dma_length;
+	struct ehca_av *my_av;
+	u32 remote_qkey = send_wr->wr.ud.remote_qkey;
+
+	if (unlikely((send_wr->num_sge < 0) ||
+		     (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
+		ehca_gen_err("Invalid number of WQE SGE. "
+			 "num_sqe=%x max_nr_of_sg=%x",
+			 send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);
+		return -EINVAL; /* invalid SG list length */
+	}
+
+	/* clear wqe header until sglist */
+	memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
+
+	wqe_p->work_request_id = send_wr->wr_id;
+
+	switch (send_wr->opcode) {
+	case IB_WR_SEND:
+	case IB_WR_SEND_WITH_IMM:
+		wqe_p->optype = WQE_OPTYPE_SEND;
+		break;
+	case IB_WR_RDMA_WRITE:
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		wqe_p->optype = WQE_OPTYPE_RDMAWRITE;
+		break;
+	case IB_WR_RDMA_READ:
+		wqe_p->optype = WQE_OPTYPE_RDMAREAD;
+		break;
+	default:
+		ehca_gen_err("Invalid opcode=%x", send_wr->opcode);
+		return -EINVAL; /* invalid opcode */
+	}
+
+	wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;
+
+	wqe_p->wr_flag = 0;
+
+	if (send_wr->send_flags & IB_SEND_SIGNALED)
+		wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
+
+	if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
+	    send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+		/* this might not work as long as HW does not support it */
+		wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data);
+		wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
+	}
+
+	wqe_p->nr_of_data_seg = send_wr->num_sge;
+
+	switch (qp->qp_type) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		/* no break is intential here */
+	case IB_QPT_UD:
+		/* IB 1.2 spec C10-15 compliance */
+		if (send_wr->wr.ud.remote_qkey & 0x80000000)
+			remote_qkey = qp->qkey;
+
+		wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
+		wqe_p->local_ee_context_qkey = remote_qkey;
+		if (!send_wr->wr.ud.ah) {
+			ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
+			return -EINVAL;
+		}
+		my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);
+		wqe_p->u.ud_av.ud_av = my_av->av;
+
+		/*
+		 * omitted check of IB_SEND_INLINE
+		 * since HW does not support it
+		 */
+		for (idx = 0; idx < send_wr->num_sge; idx++) {
+			wqe_p->u.ud_av.sg_list[idx].vaddr =
+				send_wr->sg_list[idx].addr;
+			wqe_p->u.ud_av.sg_list[idx].lkey =
+				send_wr->sg_list[idx].lkey;
+			wqe_p->u.ud_av.sg_list[idx].length =
+				send_wr->sg_list[idx].length;
+		} /* eof for idx */
+		if (qp->qp_type == IB_QPT_SMI ||
+		    qp->qp_type == IB_QPT_GSI)
+			wqe_p->u.ud_av.ud_av.pmtu = 1;
+		if (qp->qp_type == IB_QPT_GSI) {
+			wqe_p->pkeyi = send_wr->wr.ud.pkey_index;
+#ifdef DEBUG_GSI_SEND_WR
+			trace_send_wr_ud(send_wr);
+#endif /* DEBUG_GSI_SEND_WR */
+		}
+		break;
+
+	case IB_QPT_UC:
+		if (send_wr->send_flags & IB_SEND_FENCE)
+			wqe_p->wr_flag |= WQE_WRFLAG_FENCE;
+		/* no break is intentional here */
+	case IB_QPT_RC:
+		/* TODO: atomic not implemented */
+		wqe_p->u.nud.remote_virtual_adress =
+			send_wr->wr.rdma.remote_addr;
+		wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
+
+		/*
+		 * omitted checking of IB_SEND_INLINE
+		 * since HW does not support it
+		 */
+		dma_length = 0;
+		for (idx = 0; idx < send_wr->num_sge; idx++) {
+			wqe_p->u.nud.sg_list[idx].vaddr =
+				send_wr->sg_list[idx].addr;
+			wqe_p->u.nud.sg_list[idx].lkey =
+				send_wr->sg_list[idx].lkey;
+			wqe_p->u.nud.sg_list[idx].length =
+				send_wr->sg_list[idx].length;
+			dma_length += send_wr->sg_list[idx].length;
+		} /* eof idx */
+		wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
+
+		break;
+
+	default:
+		ehca_gen_err("Invalid qptype=%x", qp->qp_type);
+		return -EINVAL;
+	}
+
+	if (ehca_debug_level) {
+		ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
+		ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
+	}
+	return 0;
+}
+
+/* map_ib_wc_status converts raw cqe_status to ib_wc_status */
+static inline void map_ib_wc_status(u32 cqe_status,
+				    enum ib_wc_status *wc_status)
+{
+	if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {
+		switch (cqe_status & 0x3F) {
+		case 0x01:
+		case 0x21:
+			*wc_status = IB_WC_LOC_LEN_ERR;
+			break;
+		case 0x02:
+		case 0x22:
+			*wc_status = IB_WC_LOC_QP_OP_ERR;
+			break;
+		case 0x03:
+		case 0x23:
+			*wc_status = IB_WC_LOC_EEC_OP_ERR;
+			break;
+		case 0x04:
+		case 0x24:
+			*wc_status = IB_WC_LOC_PROT_ERR;
+			break;
+		case 0x05:
+		case 0x25:
+			*wc_status = IB_WC_WR_FLUSH_ERR;
+			break;
+		case 0x06:
+			*wc_status = IB_WC_MW_BIND_ERR;
+			break;
+		case 0x07: /* remote error - look into bits 20:24 */
+			switch ((cqe_status
+				 & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {
+			case 0x0:
+				/*
+				 * PSN Sequence Error!
+				 * couldn't find a matching status!
+				 */
+				*wc_status = IB_WC_GENERAL_ERR;
+				break;
+			case 0x1:
+				*wc_status = IB_WC_REM_INV_REQ_ERR;
+				break;
+			case 0x2:
+				*wc_status = IB_WC_REM_ACCESS_ERR;
+				break;
+			case 0x3:
+				*wc_status = IB_WC_REM_OP_ERR;
+				break;
+			case 0x4:
+				*wc_status = IB_WC_REM_INV_RD_REQ_ERR;
+				break;
+			}
+			break;
+		case 0x08:
+			*wc_status = IB_WC_RETRY_EXC_ERR;
+			break;
+		case 0x09:
+			*wc_status = IB_WC_RNR_RETRY_EXC_ERR;
+			break;
+		case 0x0A:
+		case 0x2D:
+			*wc_status = IB_WC_REM_ABORT_ERR;
+			break;
+		case 0x0B:
+		case 0x2E:
+			*wc_status = IB_WC_INV_EECN_ERR;
+			break;
+		case 0x0C:
+		case 0x2F:
+			*wc_status = IB_WC_INV_EEC_STATE_ERR;
+			break;
+		case 0x0D:
+			*wc_status = IB_WC_BAD_RESP_ERR;
+			break;
+		case 0x10:
+			/* WQE purged */
+			*wc_status = IB_WC_WR_FLUSH_ERR;
+			break;
+		default:
+			*wc_status = IB_WC_FATAL_ERR;
+
+		}
+	} else
+		*wc_status = IB_WC_SUCCESS;
+}
+
+int ehca_post_send(struct ib_qp *qp,
+		   struct ib_send_wr *send_wr,
+		   struct ib_send_wr **bad_send_wr)
+{
+	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+	struct ib_send_wr *cur_send_wr;
+	struct ehca_wqe *wqe_p;
+	int wqe_cnt = 0;
+	int ret = 0;
+	unsigned long spl_flags;
+
+	/* LOCK the QUEUE */
+	spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+
+	/* loop processes list of send reqs */
+	for (cur_send_wr = send_wr; cur_send_wr != NULL;
+	     cur_send_wr = cur_send_wr->next) {
+		u64 start_offset = my_qp->ipz_squeue.current_q_offset;
+		/* get pointer next to free WQE */
+		wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
+		if (unlikely(!wqe_p)) {
+			/* too many posted work requests: queue overflow */
+			if (bad_send_wr)
+				*bad_send_wr = cur_send_wr;
+			if (wqe_cnt == 0) {
+				ret = -ENOMEM;
+				ehca_err(qp->device, "Too many posted WQEs "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_send_exit0;
+		}
+		/* write a SEND WQE into the QUEUE */
+		ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr);
+		/*
+		 * if something failed,
+		 * reset the free entry pointer to the start value
+		 */
+		if (unlikely(ret)) {
+			my_qp->ipz_squeue.current_q_offset = start_offset;
+			*bad_send_wr = cur_send_wr;
+			if (wqe_cnt == 0) {
+				ret = -EINVAL;
+				ehca_err(qp->device, "Could not write WQE "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_send_exit0;
+		}
+		wqe_cnt++;
+		ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d",
+			 my_qp, qp->qp_num, wqe_cnt);
+	} /* eof for cur_send_wr */
+
+post_send_exit0:
+	/* UNLOCK the QUEUE */
+	spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
+	iosync(); /* serialize GAL register access */
+	hipz_update_sqa(my_qp, wqe_cnt);
+	return ret;
+}
+
+int ehca_post_recv(struct ib_qp *qp,
+		   struct ib_recv_wr *recv_wr,
+		   struct ib_recv_wr **bad_recv_wr)
+{
+	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+	struct ib_recv_wr *cur_recv_wr;
+	struct ehca_wqe *wqe_p;
+	int wqe_cnt = 0;
+	int ret = 0;
+	unsigned long spl_flags;
+
+	/* LOCK the QUEUE */
+	spin_lock_irqsave(&my_qp->spinlock_r, spl_flags);
+
+	/* loop processes list of send reqs */
+	for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
+	     cur_recv_wr = cur_recv_wr->next) {
+		u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
+		/* get pointer next to free WQE */
+		wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
+		if (unlikely(!wqe_p)) {
+			/* too many posted work requests: queue overflow */
+			if (bad_recv_wr)
+				*bad_recv_wr = cur_recv_wr;
+			if (wqe_cnt == 0) {
+				ret = -ENOMEM;
+				ehca_err(qp->device, "Too many posted WQEs "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_recv_exit0;
+		}
+		/* write a RECV WQE into the QUEUE */
+		ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
+		/*
+		 * if something failed,
+		 * reset the free entry pointer to the start value
+		 */
+		if (unlikely(ret)) {
+			my_qp->ipz_rqueue.current_q_offset = start_offset;
+			*bad_recv_wr = cur_recv_wr;
+			if (wqe_cnt == 0) {
+				ret = -EINVAL;
+				ehca_err(qp->device, "Could not write WQE "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_recv_exit0;
+		}
+		wqe_cnt++;
+		ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d",
+		     my_qp, qp->qp_num, wqe_cnt);
+	} /* eof for cur_recv_wr */
+
+post_recv_exit0:
+	spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags);
+	iosync(); /* serialize GAL register access */
+	hipz_update_rqa(my_qp, wqe_cnt);
+	return ret;
+}
+
+/*
+ * ib_wc_opcode table converts ehca wc opcode to ib
+ * Since we use zero to indicate invalid opcode, the actual ib opcode must
+ * be decremented!!!
+ */
+static const u8 ib_wc_opcode[255] = {
+	[0x01] = IB_WC_RECV+1,
+	[0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
+	[0x04] = IB_WC_BIND_MW+1,
+	[0x08] = IB_WC_FETCH_ADD+1,
+	[0x10] = IB_WC_COMP_SWAP+1,
+	[0x20] = IB_WC_RDMA_WRITE+1,
+	[0x40] = IB_WC_RDMA_READ+1,
+	[0x80] = IB_WC_SEND+1
+};
+
+/* internal function to poll one entry of cq */
+static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
+{
+	int ret = 0;
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	struct ehca_cqe *cqe;
+	int cqe_count = 0;
+
+poll_cq_one_read_cqe:
+	cqe = (struct ehca_cqe *)
+		ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
+	if (!cqe) {
+		ret = -EAGAIN;
+		ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p "
+			 "cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret);
+		goto  poll_cq_one_exit0;
+	}
+
+	/* prevents loads being reordered across this point */
+	rmb();
+
+	cqe_count++;
+	if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
+		struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number);
+		int purgeflag;
+		unsigned long spl_flags;
+		if (!qp) {
+			ehca_err(cq->device, "cq_num=%x qp_num=%x "
+				 "could not find qp -> ignore cqe",
+				 my_cq->cq_number, cqe->local_qp_number);
+			ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
+				 my_cq->cq_number, cqe->local_qp_number);
+			/* ignore this purged cqe */
+			goto poll_cq_one_read_cqe;
+		}
+		spin_lock_irqsave(&qp->spinlock_s, spl_flags);
+		purgeflag = qp->sqerr_purgeflag;
+		spin_unlock_irqrestore(&qp->spinlock_s, spl_flags);
+
+		if (purgeflag) {
+			ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x "
+				 "src_qp=%x",
+				 cqe->local_qp_number, cqe->remote_qp_number);
+			if (ehca_debug_level)
+				ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
+					 cqe->local_qp_number,
+					 cqe->remote_qp_number);
+			/*
+			 * ignore this to avoid double cqes of bad wqe
+			 * that caused sqe and turn off purge flag
+			 */
+			qp->sqerr_purgeflag = 0;
+			goto poll_cq_one_read_cqe;
+		}
+	}
+
+	/* tracing cqe */
+	if (ehca_debug_level) {
+		ehca_dbg(cq->device,
+			 "Received COMPLETION ehca_cq=%p cq_num=%x -----",
+			 my_cq, my_cq->cq_number);
+		ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
+			 my_cq, my_cq->cq_number);
+		ehca_dbg(cq->device,
+			 "ehca_cq=%p cq_num=%x -------------------------",
+			 my_cq, my_cq->cq_number);
+	}
+
+	/* we got a completion! */
+	wc->wr_id = cqe->work_request_id;
+
+	/* eval ib_wc_opcode */
+	wc->opcode = ib_wc_opcode[cqe->optype]-1;
+	if (unlikely(wc->opcode == -1)) {
+		ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "
+			 "ehca_cq=%p cq_num=%x",
+			 cqe->optype, cqe->status, my_cq, my_cq->cq_number);
+		/* dump cqe for other infos */
+		ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
+			 my_cq, my_cq->cq_number);
+		/* update also queue adder to throw away this entry!!! */
+		goto poll_cq_one_exit0;
+	}
+	/* eval ib_wc_status */
+	if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) {
+		/* complete with errors */
+		map_ib_wc_status(cqe->status, &wc->status);
+		wc->vendor_err = wc->status;
+	} else
+		wc->status = IB_WC_SUCCESS;
+
+	wc->qp_num = cqe->local_qp_number;
+	wc->byte_len = cqe->nr_bytes_transferred;
+	wc->pkey_index = cqe->pkey_index;
+	wc->slid = cqe->rlid;
+	wc->dlid_path_bits = cqe->dlid;
+	wc->src_qp = cqe->remote_qp_number;
+	wc->wc_flags = cqe->w_completion_flags;
+	wc->imm_data = cpu_to_be32(cqe->immediate_data);
+	wc->sl = cqe->service_level;
+
+	if (wc->status != IB_WC_SUCCESS)
+		ehca_dbg(cq->device,
+			 "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe "
+			 "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx "
+			 "cqe=%p", my_cq, my_cq->cq_number, cqe->optype,
+			 cqe->status, cqe->local_qp_number,
+			 cqe->remote_qp_number, cqe->work_request_id, cqe);
+
+poll_cq_one_exit0:
+	if (cqe_count > 0)
+		hipz_update_feca(my_cq, cqe_count);
+
+	return ret;
+}
+
+int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
+{
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	int nr;
+	struct ib_wc *current_wc = wc;
+	int ret = 0;
+	unsigned long spl_flags;
+
+	if (num_entries < 1) {
+		ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
+			 "cq_num=%x", num_entries, my_cq, my_cq->cq_number);
+		ret = -EINVAL;
+		goto poll_cq_exit0;
+	}
+
+	spin_lock_irqsave(&my_cq->spinlock, spl_flags);
+	for (nr = 0; nr < num_entries; nr++) {
+		ret = ehca_poll_cq_one(cq, current_wc);
+		if (ret)
+			break;
+		current_wc++;
+	} /* eof for nr */
+	spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
+	if (ret == -EAGAIN  || !ret)
+		ret = nr;
+
+poll_cq_exit0:
+	return ret;
+}
+
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify)
+{
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+
+	switch (cq_notify) {
+	case IB_CQ_SOLICITED:
+		hipz_set_cqx_n0(my_cq, 1);
+		break;
+	case IB_CQ_NEXT_COMP:
+		hipz_set_cqx_n1(my_cq, 1);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
--- a/drivers/infiniband/hw/ehca/ehca_sqp.c
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@ -0,0 +1,111 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  SQP functions
+ *
+ *  Authors: Khadija Souissi <souissi@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+
+/**
+ * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
+ * pair is created successfully, the corresponding port gets active.
+ *
+ * Define Special Queue pair 0 (SMI QP) is still not supported.
+ *
+ * @qp_init_attr: Queue pair init attributes with port and queue pair type
+ */
+
+u64 ehca_define_sqp(struct ehca_shca *shca,
+		    struct ehca_qp *ehca_qp,
+		    struct ib_qp_init_attr *qp_init_attr)
+{
+	u32 pma_qp_nr, bma_qp_nr;
+	u64 ret;
+	u8 port = qp_init_attr->port_num;
+	int counter;
+
+	shca->sport[port - 1].port_state = IB_PORT_DOWN;
+
+	switch (qp_init_attr->qp_type) {
+	case IB_QPT_SMI:
+		/* function not supported yet */
+		break;
+	case IB_QPT_GSI:
+		ret = hipz_h_define_aqp1(shca->ipz_hca_handle,
+					 ehca_qp->ipz_qp_handle,
+					 ehca_qp->galpas.kernel,
+					 (u32) qp_init_attr->port_num,
+					 &pma_qp_nr, &bma_qp_nr);
+
+		if (ret != H_SUCCESS) {
+			ehca_err(&shca->ib_device,
+				 "Can't define AQP1 for port %x. rc=%lx",
+				 port, ret);
+			return ret;
+		}
+		break;
+	default:
+		ehca_err(&shca->ib_device, "invalid qp_type=%x",
+			 qp_init_attr->qp_type);
+		return H_PARAMETER;
+	}
+
+	for (counter = 0;
+	     shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
+		     counter < ehca_port_act_time;
+	     counter++) {
+		ehca_dbg(&shca->ib_device, "... wait until port %x is active",
+			 port);
+		msleep_interruptible(1000);
+	}
+
+	if (counter == ehca_port_act_time) {
+		ehca_err(&shca->ib_device, "Port %x is not active.", port);
+		return H_HARDWARE;
+	}
+
+	return H_SUCCESS;
+}
--- a/drivers/infiniband/hw/ehca/ehca_tools.h
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@ -0,0 +1,172 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  auxiliary functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Khadija Souissi <souissik@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef EHCA_TOOLS_H
+#define EHCA_TOOLS_H
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+#include <linux/version.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+
+#include <asm/abs_addr.h>
+#include <asm/ibmebus.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+extern int ehca_debug_level;
+
+#define ehca_dbg(ib_dev, format, arg...) \
+	do { \
+		if (unlikely(ehca_debug_level)) \
+			dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
+				   "PU%04x EHCA_DBG:%s " format "\n", \
+				   get_paca()->paca_index, __FUNCTION__, \
+				   ## arg); \
+	} while (0)
+
+#define ehca_info(ib_dev, format, arg...) \
+	dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
+		 get_paca()->paca_index, __FUNCTION__, ## arg)
+
+#define ehca_warn(ib_dev, format, arg...) \
+	dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
+		 get_paca()->paca_index, __FUNCTION__, ## arg)
+
+#define ehca_err(ib_dev, format, arg...) \
+	dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
+		get_paca()->paca_index, __FUNCTION__, ## arg)
+
+/* use this one only if no ib_dev available */
+#define ehca_gen_dbg(format, arg...) \
+	do { \
+		if (unlikely(ehca_debug_level)) \
+			printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\
+			       get_paca()->paca_index, __FUNCTION__, ## arg); \
+	} while (0)
+
+#define ehca_gen_warn(format, arg...) \
+	do { \
+		if (unlikely(ehca_debug_level)) \
+			printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\
+			       get_paca()->paca_index, __FUNCTION__, ## arg); \
+	} while (0)
+
+#define ehca_gen_err(format, arg...) \
+	printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
+		get_paca()->paca_index, __FUNCTION__, ## arg)
+
+/**
+ * ehca_dmp - printk a memory block, whose length is n*8 bytes.
+ * Each line has the following layout:
+ * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
+ */
+#define ehca_dmp(adr, len, format, args...) \
+	do {				       \
+		unsigned int x;			      \
+		unsigned int l = (unsigned int)(len); \
+		unsigned char *deb = (unsigned char*)(adr);	\
+		for (x = 0; x < l; x += 16) { \
+			printk("EHCA_DMP:%s" format \
+			       " adr=%p ofs=%04x %016lx %016lx\n", \
+			       __FUNCTION__, ##args, deb, x, \
+			       *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
+			deb += 16; \
+		} \
+	} while (0)
+
+/* define a bitmask, little endian version */
+#define EHCA_BMASK(pos,length) (((pos)<<16)+(length))
+
+/* define a bitmask, the ibm way... */
+#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1))
+
+/* internal function, don't use */
+#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff)
+
+/* internal function, don't use */
+#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff))
+
+/**
+ * EHCA_BMASK_SET - return value shifted and masked by mask
+ * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable
+ * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
+ * in variable
+ */
+#define EHCA_BMASK_SET(mask,value) \
+	((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask))
+
+/**
+ * EHCA_BMASK_GET - extract a parameter from value by mask
+ */
+#define EHCA_BMASK_GET(mask,value) \
+	(EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask)))
+
+
+/* Converts ehca to ib return code */
+static inline int ehca2ib_return_code(u64 ehca_rc)
+{
+	switch (ehca_rc) {
+	case H_SUCCESS:
+		return 0;
+	case H_BUSY:
+		return -EBUSY;
+	case H_NO_MEM:
+		return -ENOMEM;
+	default:
+		return -EINVAL;
+	}
+}
+
+
+#endif /* EHCA_TOOLS_H */
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@ -0,0 +1,392 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  userspace support verbs
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
+					struct ib_udata *udata)
+{
+	struct ehca_ucontext *my_context;
+
+	my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
+	if (!my_context) {
+		ehca_err(device, "Out of memory device=%p", device);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return &my_context->ib_ucontext;
+}
+
+int ehca_dealloc_ucontext(struct ib_ucontext *context)
+{
+	kfree(container_of(context, struct ehca_ucontext, ib_ucontext));
+	return 0;
+}
+
+struct page *ehca_nopage(struct vm_area_struct *vma,
+			 unsigned long address, int *type)
+{
+	struct page *mypage = NULL;
+	u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
+	u32 idr_handle = fileoffset >> 32;
+	u32 q_type = (fileoffset >> 28) & 0xF;	  /* CQ, QP,...        */
+	u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
+	u32 cur_pid = current->tgid;
+	unsigned long flags;
+	struct ehca_cq *cq;
+	struct ehca_qp *qp;
+	struct ehca_pd *pd;
+	u64 offset;
+	void *vaddr;
+
+	switch (q_type) {
+	case 1: /* CQ */
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		cq = idr_find(&ehca_cq_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!cq) {
+			ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS");
+			return NOPAGE_SIGBUS;
+		}
+
+		if (cq->ownpid != cur_pid) {
+			ehca_err(cq->ib_cq.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, cq->ownpid);
+			return NOPAGE_SIGBUS;
+		}
+
+		if (rsrc_type == 2) {
+			ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq);
+			offset = address - vma->vm_start;
+			vaddr = ipz_qeit_calc(&cq->ipz_queue, offset);
+			ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p",
+				 offset, vaddr);
+			mypage = virt_to_page(vaddr);
+		}
+		break;
+
+	case 2: /* QP */
+		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+		qp = idr_find(&ehca_qp_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!qp) {
+			ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS");
+			return NOPAGE_SIGBUS;
+		}
+
+		pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
+		if (pd->ownpid != cur_pid) {
+			ehca_err(qp->ib_qp.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, pd->ownpid);
+			return NOPAGE_SIGBUS;
+		}
+
+		if (rsrc_type == 2) {	/* rqueue */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp);
+			offset = address - vma->vm_start;
+			vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset);
+			ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
+				 offset, vaddr);
+			mypage = virt_to_page(vaddr);
+		} else if (rsrc_type == 3) {	/* squeue */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp);
+			offset = address - vma->vm_start;
+			vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset);
+			ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
+				 offset, vaddr);
+			mypage = virt_to_page(vaddr);
+		}
+		break;
+
+	default:
+		ehca_gen_err("bad queue type %x", q_type);
+		return NOPAGE_SIGBUS;
+	}
+
+	if (!mypage) {
+		ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS");
+		return NOPAGE_SIGBUS;
+	}
+	get_page(mypage);
+
+	return mypage;
+}
+
+static struct vm_operations_struct ehcau_vm_ops = {
+	.nopage = ehca_nopage,
+};
+
+int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
+	u32 idr_handle = fileoffset >> 32;
+	u32 q_type = (fileoffset >> 28) & 0xF;	  /* CQ, QP,...        */
+	u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
+	u32 cur_pid = current->tgid;
+	u32 ret;
+	u64 vsize, physical;
+	unsigned long flags;
+	struct ehca_cq *cq;
+	struct ehca_qp *qp;
+	struct ehca_pd *pd;
+
+	switch (q_type) {
+	case  1: /* CQ */
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		cq = idr_find(&ehca_cq_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!cq)
+			return -EINVAL;
+
+		if (cq->ownpid != cur_pid) {
+			ehca_err(cq->ib_cq.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, cq->ownpid);
+			return -ENOMEM;
+		}
+
+		if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
+			return -EINVAL;
+
+		switch (rsrc_type) {
+		case 1: /* galpa fw handle */
+			ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq);
+			vma->vm_flags |= VM_RESERVED;
+			vsize = vma->vm_end - vma->vm_start;
+			if (vsize != EHCA_PAGESIZE) {
+				ehca_err(cq->ib_cq.device, "invalid vsize=%lx",
+					 vma->vm_end - vma->vm_start);
+				return -EINVAL;
+			}
+
+			physical = cq->galpas.user.fw_handle;
+			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+			vma->vm_flags |= VM_IO | VM_RESERVED;
+
+			ehca_dbg(cq->ib_cq.device,
+				 "vsize=%lx physical=%lx", vsize, physical);
+			ret = remap_pfn_range(vma, vma->vm_start,
+					      physical >> PAGE_SHIFT, vsize,
+					      vma->vm_page_prot);
+			if (ret) {
+				ehca_err(cq->ib_cq.device,
+					 "remap_pfn_range() failed ret=%x",
+					 ret);
+				return -ENOMEM;
+			}
+			break;
+
+		case 2: /* cq queue_addr */
+			ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq);
+			vma->vm_flags |= VM_RESERVED;
+			vma->vm_ops = &ehcau_vm_ops;
+			break;
+
+		default:
+			ehca_err(cq->ib_cq.device, "bad resource type %x",
+				 rsrc_type);
+			return -EINVAL;
+		}
+		break;
+
+	case 2: /* QP */
+		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+		qp = idr_find(&ehca_qp_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!qp)
+			return -EINVAL;
+
+		pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
+		if (pd->ownpid != cur_pid) {
+			ehca_err(qp->ib_qp.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, pd->ownpid);
+			return -ENOMEM;
+		}
+
+		if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context)
+			return -EINVAL;
+
+		switch (rsrc_type) {
+		case 1: /* galpa fw handle */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp);
+			vma->vm_flags |= VM_RESERVED;
+			vsize = vma->vm_end - vma->vm_start;
+			if (vsize != EHCA_PAGESIZE) {
+				ehca_err(qp->ib_qp.device, "invalid vsize=%lx",
+					 vma->vm_end - vma->vm_start);
+				return -EINVAL;
+			}
+
+			physical = qp->galpas.user.fw_handle;
+			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+			vma->vm_flags |= VM_IO | VM_RESERVED;
+
+			ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx",
+				 vsize, physical);
+			ret = remap_pfn_range(vma, vma->vm_start,
+					      physical >> PAGE_SHIFT, vsize,
+					      vma->vm_page_prot);
+			if (ret) {
+				ehca_err(qp->ib_qp.device,
+					 "remap_pfn_range() failed ret=%x",
+					 ret);
+				return -ENOMEM;
+			}
+			break;
+
+		case 2: /* qp rqueue_addr */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp);
+			vma->vm_flags |= VM_RESERVED;
+			vma->vm_ops = &ehcau_vm_ops;
+			break;
+
+		case 3: /* qp squeue_addr */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp);
+			vma->vm_flags |= VM_RESERVED;
+			vma->vm_ops = &ehcau_vm_ops;
+			break;
+
+		default:
+			ehca_err(qp->ib_qp.device, "bad resource type %x",
+				 rsrc_type);
+			return -EINVAL;
+		}
+		break;
+
+	default:
+		ehca_gen_err("bad queue type %x", q_type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped,
+		     struct vm_area_struct **vma)
+{
+	down_write(&current->mm->mmap_sem);
+	*mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE,
+				 MAP_SHARED | MAP_ANONYMOUS,
+				 foffset);
+	up_write(&current->mm->mmap_sem);
+	if (!(*mapped)) {
+		ehca_gen_err("couldn't mmap foffset=%lx length=%lx",
+			     foffset, length);
+		return -EINVAL;
+	}
+
+	*vma = find_vma(current->mm, (u64)*mapped);
+	if (!(*vma)) {
+		down_write(&current->mm->mmap_sem);
+		do_munmap(current->mm, 0, length);
+		up_write(&current->mm->mmap_sem);
+		ehca_gen_err("couldn't find vma queue=%p", *mapped);
+		return -EINVAL;
+	}
+	(*vma)->vm_flags |= VM_RESERVED;
+	(*vma)->vm_ops = &ehcau_vm_ops;
+
+	return 0;
+}
+
+int ehca_mmap_register(u64 physical, void **mapped,
+		       struct vm_area_struct **vma)
+{
+	int ret;
+	unsigned long vsize;
+	/* ehca hw supports only 4k page */
+	ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma);
+	if (ret) {
+		ehca_gen_err("could'nt mmap physical=%lx", physical);
+		return ret;
+	}
+
+	(*vma)->vm_flags |= VM_RESERVED;
+	vsize = (*vma)->vm_end - (*vma)->vm_start;
+	if (vsize != EHCA_PAGESIZE) {
+		ehca_gen_err("invalid vsize=%lx",
+			     (*vma)->vm_end - (*vma)->vm_start);
+		return -EINVAL;
+	}
+
+	(*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot);
+	(*vma)->vm_flags |= VM_IO | VM_RESERVED;
+
+	ret = remap_pfn_range((*vma), (*vma)->vm_start,
+			      physical >> PAGE_SHIFT, vsize,
+			      (*vma)->vm_page_prot);
+	if (ret) {
+		ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
+		return -ENOMEM;
+	}
+
+	return 0;
+
+}
+
+int ehca_munmap(unsigned long addr, size_t len) {
+	int ret = 0;
+	struct mm_struct *mm = current->mm;
+	if (mm) {
+		down_write(&mm->mmap_sem);
+		ret = do_munmap(mm, addr, len);
+		up_write(&mm->mmap_sem);
+	}
+	return ret;
+}
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@ -0,0 +1,874 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware Infiniband Interface code for POWER
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/hvcall.h>
+#include "ehca_tools.h"
+#include "hcp_if.h"
+#include "hcp_phyp.h"
+#include "hipz_fns.h"
+#include "ipz_pt_fn.h"
+
+#define H_ALL_RES_QP_ENHANCED_OPS       EHCA_BMASK_IBM(9, 11)
+#define H_ALL_RES_QP_PTE_PIN            EHCA_BMASK_IBM(12, 12)
+#define H_ALL_RES_QP_SERVICE_TYPE       EHCA_BMASK_IBM(13, 15)
+#define H_ALL_RES_QP_LL_RQ_CQE_POSTING  EHCA_BMASK_IBM(18, 18)
+#define H_ALL_RES_QP_LL_SQ_CQE_POSTING  EHCA_BMASK_IBM(19, 21)
+#define H_ALL_RES_QP_SIGNALING_TYPE     EHCA_BMASK_IBM(22, 23)
+#define H_ALL_RES_QP_UD_AV_LKEY_CTRL    EHCA_BMASK_IBM(31, 31)
+#define H_ALL_RES_QP_RESOURCE_TYPE      EHCA_BMASK_IBM(56, 63)
+
+#define H_ALL_RES_QP_MAX_OUTST_SEND_WR  EHCA_BMASK_IBM(0, 15)
+#define H_ALL_RES_QP_MAX_OUTST_RECV_WR  EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
+#define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
+
+#define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
+#define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
+#define H_ALL_RES_QP_ACT_RECV_SGE       EHCA_BMASK_IBM(24, 31)
+
+#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0, 31)
+#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32, 63)
+
+/* direct access qp controls */
+#define DAQP_CTRL_ENABLE    0x01
+#define DAQP_CTRL_SEND_COMP 0x20
+#define DAQP_CTRL_RECV_COMP 0x40
+
+static u32 get_longbusy_msecs(int longbusy_rc)
+{
+	switch (longbusy_rc) {
+	case H_LONG_BUSY_ORDER_1_MSEC:
+		return 1;
+	case H_LONG_BUSY_ORDER_10_MSEC:
+		return 10;
+	case H_LONG_BUSY_ORDER_100_MSEC:
+		return 100;
+	case H_LONG_BUSY_ORDER_1_SEC:
+		return 1000;
+	case H_LONG_BUSY_ORDER_10_SEC:
+		return 10000;
+	case H_LONG_BUSY_ORDER_100_SEC:
+		return 100000;
+	default:
+		return 1;
+	}
+}
+
+static long ehca_plpar_hcall_norets(unsigned long opcode,
+				    unsigned long arg1,
+				    unsigned long arg2,
+				    unsigned long arg3,
+				    unsigned long arg4,
+				    unsigned long arg5,
+				    unsigned long arg6,
+				    unsigned long arg7)
+{
+	long ret;
+	int i, sleep_msecs;
+
+	ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
+		     "arg5=%lx arg6=%lx arg7=%lx",
+		     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+
+	for (i = 0; i < 5; i++) {
+		ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
+					 arg5, arg6, arg7);
+
+		if (H_IS_LONG_BUSY(ret)) {
+			sleep_msecs = get_longbusy_msecs(ret);
+			msleep_interruptible(sleep_msecs);
+			continue;
+		}
+
+		if (ret < H_SUCCESS)
+			ehca_gen_err("opcode=%lx ret=%lx"
+				     " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
+				     " arg5=%lx arg6=%lx arg7=%lx ",
+				     opcode, ret,
+				     arg1, arg2, arg3, arg4, arg5,
+				     arg6, arg7);
+
+		ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret);
+		return ret;
+
+	}
+
+	return H_BUSY;
+}
+
+static long ehca_plpar_hcall9(unsigned long opcode,
+			      unsigned long *outs, /* array of 9 outputs */
+			      unsigned long arg1,
+			      unsigned long arg2,
+			      unsigned long arg3,
+			      unsigned long arg4,
+			      unsigned long arg5,
+			      unsigned long arg6,
+			      unsigned long arg7,
+			      unsigned long arg8,
+			      unsigned long arg9)
+{
+	long ret;
+	int i, sleep_msecs;
+
+	ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
+		     "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
+		     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7,
+		     arg8, arg9);
+
+	for (i = 0; i < 5; i++) {
+		ret = plpar_hcall9(opcode, outs,
+				   arg1, arg2, arg3, arg4, arg5,
+				   arg6, arg7, arg8, arg9);
+
+		if (H_IS_LONG_BUSY(ret)) {
+			sleep_msecs = get_longbusy_msecs(ret);
+			msleep_interruptible(sleep_msecs);
+			continue;
+		}
+
+		if (ret < H_SUCCESS)
+			ehca_gen_err("opcode=%lx ret=%lx"
+				     " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
+				     " arg5=%lx arg6=%lx arg7=%lx arg8=%lx"
+				     " arg9=%lx"
+				     " out1=%lx out2=%lx out3=%lx out4=%lx"
+				     " out5=%lx out6=%lx out7=%lx out8=%lx"
+				     " out9=%lx",
+				     opcode, ret,
+				     arg1, arg2, arg3, arg4, arg5,
+				     arg6, arg7, arg8, arg9,
+				     outs[0], outs[1], outs[2], outs[3],
+				     outs[4], outs[5], outs[6], outs[7],
+				     outs[8]);
+
+		ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx "
+			     "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx "
+			     "out9=%lx",
+			     opcode, ret, outs[0], outs[1], outs[2], outs[3],
+			     outs[4], outs[5], outs[6], outs[7], outs[8]);
+		return ret;
+
+	}
+
+	return H_BUSY;
+}
+u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u32 neq_control,
+			     const u32 number_of_entries,
+			     struct ipz_eq_handle *eq_handle,
+			     u32 *act_nr_of_entries,
+			     u32 *act_pages,
+			     u32 *eq_ist)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+	u64 allocate_controls;
+
+	/* resource type */
+	allocate_controls = 3ULL;
+
+	/* ISN is associated */
+	if (neq_control != 1)
+		allocate_controls = (1ULL << (63 - 7)) | allocate_controls;
+	else /* notification event queue */
+		allocate_controls = (1ULL << 63) | allocate_controls;
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,  /* r4 */
+				allocate_controls,      /* r5 */
+				number_of_entries,      /* r6 */
+				0, 0, 0, 0, 0, 0);
+	eq_handle->handle = outs[0];
+	*act_nr_of_entries = (u32)outs[3];
+	*act_pages = (u32)outs[4];
+	*eq_ist = (u32)outs[5];
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resource - ret=%lx ", ret);
+
+	return ret;
+}
+
+u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
+		       struct ipz_eq_handle eq_handle,
+		       const u64 event_mask)
+{
+	return ehca_plpar_hcall_norets(H_RESET_EVENTS,
+				       adapter_handle.handle, /* r4 */
+				       eq_handle.handle,      /* r5 */
+				       event_mask,	      /* r6 */
+				       0, 0, 0, 0);
+}
+
+u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_cq *cq,
+			     struct ehca_alloc_cq_parms *param)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,   /* r4  */
+				2,	                 /* r5  */
+				param->eq_handle.handle, /* r6  */
+				cq->token,	         /* r7  */
+				param->nr_cqe,           /* r8  */
+				0, 0, 0, 0);
+	cq->ipz_cq_handle.handle = outs[0];
+	param->act_nr_of_entries = (u32)outs[3];
+	param->act_pages = (u32)outs[4];
+
+	if (ret == H_SUCCESS)
+		hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_qp *qp,
+			     struct ehca_alloc_qp_parms *parms)
+{
+	u64 ret;
+	u64 allocate_controls;
+	u64 max_r10_reg;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+	u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1;
+	u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1;
+	int daqp_ctrl = parms->daqp_ctrl;
+
+	allocate_controls =
+		EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS,
+			       (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
+				 (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
+				 (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
+				 parms->ud_av_l_key_ctl)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
+
+	max_r10_reg =
+		EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
+			       max_nr_send_wqes)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
+				 max_nr_receive_wqes)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
+				 parms->max_send_sge)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
+				 parms->max_recv_sge);
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,	           /* r4  */
+				allocate_controls,	           /* r5  */
+				qp->send_cq->ipz_cq_handle.handle,
+				qp->recv_cq->ipz_cq_handle.handle,
+				parms->ipz_eq_handle.handle,
+				((u64)qp->token << 32) | parms->pd.value,
+				max_r10_reg,	                   /* r10 */
+				parms->ud_av_l_key_ctl,            /* r11 */
+				0);
+	qp->ipz_qp_handle.handle = outs[0];
+	qp->real_qp_num = (u32)outs[1];
+	parms->act_nr_send_sges =
+		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
+	parms->act_nr_recv_wqes =
+		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
+	parms->act_nr_send_sges =
+		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
+	parms->act_nr_recv_sges =
+		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
+	parms->nr_sq_pages =
+		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
+	parms->nr_rq_pages =
+		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
+
+	if (ret == H_SUCCESS)
+		hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
+		      const u8 port_id,
+		      struct hipz_query_port *query_port_response_block)
+{
+	u64 ret;
+	u64 r_cb = virt_to_abs(query_port_response_block);
+
+	if (r_cb & (EHCA_PAGESIZE-1)) {
+		ehca_gen_err("response block not page aligned");
+		return H_PARAMETER;
+	}
+
+	ret = ehca_plpar_hcall_norets(H_QUERY_PORT,
+				      adapter_handle.handle, /* r4 */
+				      port_id,	             /* r5 */
+				      r_cb,	             /* r6 */
+				      0, 0, 0, 0);
+
+	if (ehca_debug_level)
+		ehca_dmp(query_port_response_block, 64, "response_block");
+
+	return ret;
+}
+
+u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
+		     struct hipz_query_hca *query_hca_rblock)
+{
+	u64 r_cb = virt_to_abs(query_hca_rblock);
+
+	if (r_cb & (EHCA_PAGESIZE-1)) {
+		ehca_gen_err("response_block=%p not page aligned",
+			     query_hca_rblock);
+		return H_PARAMETER;
+	}
+
+	return ehca_plpar_hcall_norets(H_QUERY_HCA,
+				       adapter_handle.handle, /* r4 */
+				       r_cb,                  /* r5 */
+				       0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
+			  const u8 pagesize,
+			  const u8 queue_type,
+			  const u64 resource_handle,
+			  const u64 logical_address_of_page,
+			  u64 count)
+{
+	return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
+				       adapter_handle.handle,      /* r4  */
+				       queue_type | pagesize << 8, /* r5  */
+				       resource_handle,	           /* r6  */
+				       logical_address_of_page,    /* r7  */
+				       count,	                   /* r8  */
+				       0, 0);
+}
+
+u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_eq_handle eq_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count)
+{
+	if (count != 1) {
+		ehca_gen_err("Ppage counter=%lx", count);
+		return H_PARAMETER;
+	}
+	return hipz_h_register_rpage(adapter_handle,
+				     pagesize,
+				     queue_type,
+				     eq_handle.handle,
+				     logical_address_of_page, count);
+}
+
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
+			   u32 ist)
+{
+	u64 ret;
+	ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE,
+				      adapter_handle.handle, /* r4 */
+				      ist,                   /* r5 */
+				      0, 0, 0, 0, 0);
+
+	if (ret != H_SUCCESS && ret != H_BUSY)
+		ehca_gen_err("Could not query interrupt state.");
+
+	return ret;
+}
+
+u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_cq_handle cq_handle,
+			     struct ehca_pfcq *pfcq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa gal)
+{
+	if (count != 1) {
+		ehca_gen_err("Page counter=%lx", count);
+		return H_PARAMETER;
+	}
+
+	return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
+				     cq_handle.handle, logical_address_of_page,
+				     count);
+}
+
+u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_qp_handle qp_handle,
+			     struct ehca_pfqp *pfqp,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa galpa)
+{
+	if (count != 1) {
+		ehca_gen_err("Page counter=%lx", count);
+		return H_PARAMETER;
+	}
+
+	return hipz_h_register_rpage(adapter_handle,pagesize,queue_type,
+				     qp_handle.handle,logical_address_of_page,
+				     count);
+}
+
+u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
+			       const struct ipz_qp_handle qp_handle,
+			       struct ehca_pfqp *pfqp,
+			       void **log_addr_next_sq_wqe2processed,
+			       void **log_addr_next_rq_wqe2processed,
+			       int dis_and_get_function_code)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+				adapter_handle.handle,     /* r4 */
+				dis_and_get_function_code, /* r5 */
+				qp_handle.handle,	   /* r6 */
+				0, 0, 0, 0, 0, 0);
+	if (log_addr_next_sq_wqe2processed)
+		*log_addr_next_sq_wqe2processed = (void*)outs[0];
+	if (log_addr_next_rq_wqe2processed)
+		*log_addr_next_rq_wqe2processed = (void*)outs[1];
+
+	return ret;
+}
+
+u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
+		     const struct ipz_qp_handle qp_handle,
+		     struct ehca_pfqp *pfqp,
+		     const u64 update_mask,
+		     struct hcp_modify_qp_control_block *mqpcb,
+		     struct h_galpa gal)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+	ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
+				adapter_handle.handle, /* r4 */
+				qp_handle.handle,      /* r5 */
+				update_mask,	       /* r6 */
+				virt_to_abs(mqpcb),    /* r7 */
+				0, 0, 0, 0, 0);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Insufficient resources ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
+		    const struct ipz_qp_handle qp_handle,
+		    struct ehca_pfqp *pfqp,
+		    struct hcp_modify_qp_control_block *qqpcb,
+		    struct h_galpa gal)
+{
+	return ehca_plpar_hcall_norets(H_QUERY_QP,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       virt_to_abs(qqpcb),    /* r6 */
+				       0, 0, 0, 0);
+}
+
+u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_qp *qp)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = hcp_galpas_dtor(&qp->galpas);
+	if (ret) {
+		ehca_gen_err("Could not destruct qp->galpas");
+		return H_RESOURCE;
+	}
+	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+				adapter_handle.handle,     /* r4 */
+				/* function code */
+				1,	                   /* r5 */
+				qp->ipz_qp_handle.handle,  /* r6 */
+				0, 0, 0, 0, 0, 0);
+	if (ret == H_HARDWARE)
+		ehca_gen_err("HCA not operational. ret=%lx", ret);
+
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      qp->ipz_qp_handle.handle,  /* r5 */
+				      0, 0, 0, 0, 0);
+
+	if (ret == H_RESOURCE)
+		ehca_gen_err("Resource still in use. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port)
+{
+	return ehca_plpar_hcall_norets(H_DEFINE_AQP0,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       port,                  /* r6 */
+				       0, 0, 0, 0);
+}
+
+u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port, u32 * pma_qp_nr,
+		       u32 * bma_qp_nr)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
+				adapter_handle.handle, /* r4 */
+				qp_handle.handle,      /* r5 */
+				port,	               /* r6 */
+				0, 0, 0, 0, 0, 0);
+	*pma_qp_nr = (u32)outs[0];
+	*bma_qp_nr = (u32)outs[1];
+
+	if (ret == H_ALIAS_EXIST)
+		ehca_gen_err("AQP1 already exists. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id)
+{
+	u64 ret;
+
+	ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP,
+				      adapter_handle.handle,  /* r4 */
+				      qp_handle.handle,       /* r5 */
+				      mcg_dlid,               /* r6 */
+				      interface_id,           /* r7 */
+				      subnet_prefix,          /* r8 */
+				      0, 0);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id)
+{
+	return ehca_plpar_hcall_norets(H_DETACH_MCQP,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       mcg_dlid,              /* r6 */
+				       interface_id,          /* r7 */
+				       subnet_prefix,         /* r8 */
+				       0, 0);
+}
+
+u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_cq *cq,
+		      u8 force_flag)
+{
+	u64 ret;
+
+	ret = hcp_galpas_dtor(&cq->galpas);
+	if (ret) {
+		ehca_gen_err("Could not destruct cp->galpas");
+		return H_RESOURCE;
+	}
+
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      cq->ipz_cq_handle.handle,  /* r5 */
+				      force_flag != 0 ? 1L : 0L, /* r6 */
+				      0, 0, 0, 0);
+
+	if (ret == H_RESOURCE)
+		ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret);
+
+	return ret;
+}
+
+u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_eq *eq)
+{
+	u64 ret;
+
+	ret = hcp_galpas_dtor(&eq->galpas);
+	if (ret) {
+		ehca_gen_err("Could not destruct eq->galpas");
+		return H_RESOURCE;
+	}
+
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      eq->ipz_eq_handle.handle,  /* r5 */
+				      0, 0, 0, 0, 0);
+
+	if (ret == H_RESOURCE)
+		ehca_gen_err("Resource in use. ret=%lx ", ret);
+
+	return ret;
+}
+
+u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u64 vaddr,
+			     const u64 length,
+			     const u32 access_ctrl,
+			     const struct ipz_pd pd,
+			     struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,            /* r4 */
+				5,                                /* r5 */
+				vaddr,                            /* r6 */
+				length,                           /* r7 */
+				(((u64)access_ctrl) << 32ULL),    /* r8 */
+				pd.value,                         /* r9 */
+				0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count)
+{
+	u64 ret;
+
+	if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
+		ehca_gen_err("logical_address_of_page not on a 4k boundary "
+			     "adapter_handle=%lx mr=%p mr_handle=%lx "
+			     "pagesize=%x queue_type=%x "
+			     "logical_address_of_page=%lx count=%lx",
+			     adapter_handle.handle, mr,
+			     mr->ipz_mr_handle.handle, pagesize, queue_type,
+			     logical_address_of_page, count);
+		ret = H_PARAMETER;
+	} else
+		ret = hipz_h_register_rpage(adapter_handle, pagesize,
+					    queue_type,
+					    mr->ipz_mr_handle.handle,
+					    logical_address_of_page, count);
+	return ret;
+}
+
+u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mr *mr,
+		    struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
+				adapter_handle.handle,     /* r4 */
+				mr->ipz_mr_handle.handle,  /* r5 */
+				0, 0, 0, 0, 0, 0, 0);
+	outparms->len = outs[0];
+	outparms->vaddr = outs[1];
+	outparms->acl  = outs[4] >> 32;
+	outparms->lkey = (u32)(outs[5] >> 32);
+	outparms->rkey = (u32)(outs[5] & (0xffffffff));
+
+	return ret;
+}
+
+u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mr *mr)
+{
+	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				       adapter_handle.handle,    /* r4 */
+				       mr->ipz_mr_handle.handle, /* r5 */
+				       0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
+			  const struct ehca_mr *mr,
+			  const u64 vaddr_in,
+			  const u64 length,
+			  const u32 access_ctrl,
+			  const struct ipz_pd pd,
+			  const u64 mr_addr_cb,
+			  struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
+				adapter_handle.handle,    /* r4 */
+				mr->ipz_mr_handle.handle, /* r5 */
+				vaddr_in,	          /* r6 */
+				length,                   /* r7 */
+				/* r8 */
+				((((u64)access_ctrl) << 32ULL) | pd.value),
+				mr_addr_cb,               /* r9 */
+				0, 0, 0);
+	outparms->vaddr = outs[1];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
+			const struct ehca_mr *mr,
+			const struct ehca_mr *orig_mr,
+			const u64 vaddr_in,
+			const u32 access_ctrl,
+			const struct ipz_pd pd,
+			struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
+				adapter_handle.handle,            /* r4 */
+				orig_mr->ipz_mr_handle.handle,    /* r5 */
+				vaddr_in,                         /* r6 */
+				(((u64)access_ctrl) << 32ULL),    /* r7 */
+				pd.value,                         /* r8 */
+				0, 0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mw *mw,
+			     const struct ipz_pd pd,
+			     struct ehca_mw_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,      /* r4 */
+				6,                          /* r5 */
+				pd.value,                   /* r6 */
+				0, 0, 0, 0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mw *mw,
+		    struct ehca_mw_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
+				adapter_handle.handle,    /* r4 */
+				mw->ipz_mw_handle.handle, /* r5 */
+				0, 0, 0, 0, 0, 0, 0);
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mw *mw)
+{
+	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				       adapter_handle.handle,    /* r4 */
+				       mw->ipz_mw_handle.handle, /* r5 */
+				       0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
+		      const u64 ressource_handle,
+		      void *rblock,
+		      unsigned long *byte_count)
+{
+	u64 r_cb = virt_to_abs(rblock);
+
+	if (r_cb & (EHCA_PAGESIZE-1)) {
+		ehca_gen_err("rblock not page aligned.");
+		return H_PARAMETER;
+	}
+
+	return ehca_plpar_hcall_norets(H_ERROR_DATA,
+				       adapter_handle.handle,
+				       ressource_handle,
+				       r_cb,
+				       0, 0, 0, 0);
+}
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@ -0,0 +1,261 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware Infiniband Interface code for POWER
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HCP_IF_H__
+#define __HCP_IF_H__
+
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "hipz_hw.h"
+
+/*
+ * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize
+ * resources, create the empty EQPT (ring).
+ */
+u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u32 neq_control,
+			     const u32 number_of_entries,
+			     struct ipz_eq_handle *eq_handle,
+			     u32 * act_nr_of_entries,
+			     u32 * act_pages,
+			     u32 * eq_ist);
+
+u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
+		       struct ipz_eq_handle eq_handle,
+		       const u64 event_mask);
+/*
+ * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize
+ * resources, create the empty CQPT (ring).
+ */
+u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_cq *cq,
+			     struct ehca_alloc_cq_parms *param);
+
+
+/*
+ * hipz_h_alloc_resource_qp allocates QP resources in HW and FW,
+ * initialize resources, create empty QPPTs (2 rings).
+ */
+u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_qp *qp,
+			     struct ehca_alloc_qp_parms *parms);
+
+u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
+		      const u8 port_id,
+		      struct hipz_query_port *query_port_response_block);
+
+u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
+		     struct hipz_query_hca *query_hca_rblock);
+
+/*
+ * hipz_h_register_rpage internal function in hcp_if.h for all
+ * hcp_H_REGISTER_RPAGE calls.
+ */
+u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
+			  const u8 pagesize,
+			  const u8 queue_type,
+			  const u64 resource_handle,
+			  const u64 logical_address_of_page,
+			  u64 count);
+
+u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_eq_handle eq_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count);
+
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle
+			   hcp_adapter_handle,
+			   u32 ist);
+
+u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_cq_handle cq_handle,
+			     struct ehca_pfcq *pfcq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa gal);
+
+u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_qp_handle qp_handle,
+			     struct ehca_pfqp *pfqp,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa galpa);
+
+u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
+			       const struct ipz_qp_handle qp_handle,
+			       struct ehca_pfqp *pfqp,
+			       void **log_addr_next_sq_wqe_tb_processed,
+			       void **log_addr_next_rq_wqe_tb_processed,
+			       int dis_and_get_function_code);
+enum hcall_sigt {
+	HCALL_SIGT_NO_CQE = 0,
+	HCALL_SIGT_BY_WQE = 1,
+	HCALL_SIGT_EVERY = 2
+};
+
+u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
+		     const struct ipz_qp_handle qp_handle,
+		     struct ehca_pfqp *pfqp,
+		     const u64 update_mask,
+		     struct hcp_modify_qp_control_block *mqpcb,
+		     struct h_galpa gal);
+
+u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
+		    const struct ipz_qp_handle qp_handle,
+		    struct ehca_pfqp *pfqp,
+		    struct hcp_modify_qp_control_block *qqpcb,
+		    struct h_galpa gal);
+
+u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_qp *qp);
+
+u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port);
+
+u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port, u32 * pma_qp_nr,
+		       u32 * bma_qp_nr);
+
+u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id);
+
+u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id);
+
+u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_cq *cq,
+		      u8 force_flag);
+
+u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_eq *eq);
+
+/*
+ * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize
+ * resources.
+ */
+u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u64 vaddr,
+			     const u64 length,
+			     const u32 access_ctrl,
+			     const struct ipz_pd pd,
+			     struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */
+u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count);
+
+/* hipz_h_query_mr queries MR in HW and FW */
+u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mr *mr,
+		    struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_free_resource_mr frees MR resources in HW and FW */
+u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mr *mr);
+
+/* hipz_h_reregister_pmr reregisters MR in HW and FW */
+u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
+			  const struct ehca_mr *mr,
+			  const u64 vaddr_in,
+			  const u64 length,
+			  const u32 access_ctrl,
+			  const struct ipz_pd pd,
+			  const u64 mr_addr_cb,
+			  struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_register_smr register shared MR in HW and FW */
+u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
+			const struct ehca_mr *mr,
+			const struct ehca_mr *orig_mr,
+			const u64 vaddr_in,
+			const u32 access_ctrl,
+			const struct ipz_pd pd,
+			struct ehca_mr_hipzout_parms *outparms);
+
+/*
+ * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize
+ * resources.
+ */
+u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mw *mw,
+			     const struct ipz_pd pd,
+			     struct ehca_mw_hipzout_parms *outparms);
+
+/* hipz_h_query_mw queries MW in HW and FW */
+u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mw *mw,
+		    struct ehca_mw_hipzout_parms *outparms);
+
+/* hipz_h_free_resource_mw frees MW resources in HW and FW */
+u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mw *mw);
+
+u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
+		      const u64 ressource_handle,
+		      void *rblock,
+		      unsigned long *byte_count);
+
+#endif /* __HCP_IF_H__ */
--- a/drivers/infiniband/hw/ehca/hcp_phyp.c
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.c
@ -0,0 +1,80 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *   load store abstraction for ehca register access with tracing
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "hipz_hw.h"
+
+int hcall_map_page(u64 physaddr, u64 *mapaddr)
+{
+	*mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE));
+	return 0;
+}
+
+int hcall_unmap_page(u64 mapaddr)
+{
+	iounmap((volatile void __iomem*)mapaddr);
+	return 0;
+}
+
+int hcp_galpas_ctor(struct h_galpas *galpas,
+		    u64 paddr_kernel, u64 paddr_user)
+{
+	int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
+	if (ret)
+		return ret;
+
+	galpas->user.fw_handle = paddr_user;
+
+	return 0;
+}
+
+int hcp_galpas_dtor(struct h_galpas *galpas)
+{
+	if (galpas->kernel.fw_handle) {
+		int ret = hcall_unmap_page(galpas->kernel.fw_handle);
+		if (ret)
+			return ret;
+	}
+
+	galpas->user.fw_handle = galpas->kernel.fw_handle = 0;
+
+	return 0;
+}
--- a/drivers/infiniband/hw/ehca/hcp_phyp.h
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.h
@ -0,0 +1,90 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware calls
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HCP_PHYP_H__
+#define __HCP_PHYP_H__
+
+
+/*
+ * eHCA page (mapped into memory)
+ * resource to access eHCA register pages in CPU address space
+*/
+struct h_galpa {
+	u64 fw_handle;
+	/* for pSeries this is a 64bit memory address where
+	   I/O memory is mapped into CPU address space (kv) */
+};
+
+/*
+ * resource to access eHCA address space registers, all types
+ */
+struct h_galpas {
+	u32 pid;		/*PID of userspace galpa checking */
+	struct h_galpa user;	/* user space accessible resource,
+				   set to 0 if unused */
+	struct h_galpa kernel;	/* kernel space accessible resource,
+				   set to 0 if unused */
+};
+
+static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset)
+{
+	u64 addr = galpa.fw_handle + offset;
+	return *(volatile u64 __force *)addr;
+}
+
+static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
+{
+	u64 addr = galpa.fw_handle + offset;
+	*(volatile u64 __force *)addr = value;
+}
+
+int hcp_galpas_ctor(struct h_galpas *galpas,
+		    u64 paddr_kernel, u64 paddr_user);
+
+int hcp_galpas_dtor(struct h_galpas *galpas);
+
+int hcall_map_page(u64 physaddr, u64 * mapaddr);
+
+int hcall_unmap_page(u64 mapaddr);
+
+#endif
--- a/drivers/infiniband/hw/ehca/hipz_fns.h
+++ b/drivers/infiniband/hw/ehca/hipz_fns.h
@ -0,0 +1,68 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HW abstraction register functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_FNS_H__
+#define __HIPZ_FNS_H__
+
+#include "ehca_classes.h"
+#include "hipz_hw.h"
+
+#include "hipz_fns_core.h"
+
+#define hipz_galpa_store_eq(gal, offset, value) \
+	hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_eq(gal, offset) \
+	hipz_galpa_load(gal, EQTEMM_OFFSET(offset))
+
+#define hipz_galpa_store_qped(gal, offset, value) \
+	hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_qped(gal, offset) \
+	hipz_galpa_load(gal, QPEDMM_OFFSET(offset))
+
+#define hipz_galpa_store_mrmw(gal, offset, value) \
+	hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_mrmw(gal, offset) \
+	hipz_galpa_load(gal, MRMWMM_OFFSET(offset))
+
+#endif
--- a/drivers/infiniband/hw/ehca/hipz_fns_core.h
+++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h
@ -0,0 +1,100 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HW abstraction register functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_FNS_CORE_H__
+#define __HIPZ_FNS_CORE_H__
+
+#include "hcp_phyp.h"
+#include "hipz_hw.h"
+
+#define hipz_galpa_store_cq(gal, offset, value) \
+	hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_cq(gal, offset) \
+	hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
+
+#define hipz_galpa_store_qp(gal,offset, value) \
+	hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
+#define hipz_galpa_load_qp(gal, offset) \
+	hipz_galpa_load(gal,QPTEMM_OFFSET(offset))
+
+static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
+{
+	/*  ringing doorbell :-) */
+	hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa,
+			    EHCA_BMASK_SET(QPX_SQADDER, nr_wqes));
+}
+
+static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes)
+{
+	/*  ringing doorbell :-) */
+	hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa,
+			    EHCA_BMASK_SET(QPX_RQADDER, nr_wqes));
+}
+
+static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes)
+{
+	hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca,
+			    EHCA_BMASK_SET(CQX_FECADDER, nr_cqes));
+}
+
+static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value)
+{
+	u64 cqx_n0_reg;
+
+	hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0,
+			    EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT,
+					   value));
+	cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0);
+}
+
+static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value)
+{
+	u64 cqx_n1_reg;
+
+	hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1,
+			    EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value));
+	cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1);
+}
+
+#endif /* __HIPZ_FNC_CORE_H__ */
--- a/drivers/infiniband/hw/ehca/hipz_hw.h
+++ b/drivers/infiniband/hw/ehca/hipz_hw.h
@ -0,0 +1,388 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  eHCA register definitions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_HW_H__
+#define __HIPZ_HW_H__
+
+#include "ehca_tools.h"
+
+/* QP Table Entry Memory Map */
+struct hipz_qptemm {
+	u64 qpx_hcr;
+	u64 qpx_c;
+	u64 qpx_herr;
+	u64 qpx_aer;
+/* 0x20*/
+	u64 qpx_sqa;
+	u64 qpx_sqc;
+	u64 qpx_rqa;
+	u64 qpx_rqc;
+/* 0x40*/
+	u64 qpx_st;
+	u64 qpx_pmstate;
+	u64 qpx_pmfa;
+	u64 qpx_pkey;
+/* 0x60*/
+	u64 qpx_pkeya;
+	u64 qpx_pkeyb;
+	u64 qpx_pkeyc;
+	u64 qpx_pkeyd;
+/* 0x80*/
+	u64 qpx_qkey;
+	u64 qpx_dqp;
+	u64 qpx_dlidp;
+	u64 qpx_portp;
+/* 0xa0*/
+	u64 qpx_slidp;
+	u64 qpx_slidpp;
+	u64 qpx_dlida;
+	u64 qpx_porta;
+/* 0xc0*/
+	u64 qpx_slida;
+	u64 qpx_slidpa;
+	u64 qpx_slvl;
+	u64 qpx_ipd;
+/* 0xe0*/
+	u64 qpx_mtu;
+	u64 qpx_lato;
+	u64 qpx_rlimit;
+	u64 qpx_rnrlimit;
+/* 0x100*/
+	u64 qpx_t;
+	u64 qpx_sqhp;
+	u64 qpx_sqptp;
+	u64 qpx_nspsn;
+/* 0x120*/
+	u64 qpx_nspsnhwm;
+	u64 reserved1;
+	u64 qpx_sdsi;
+	u64 qpx_sdsbc;
+/* 0x140*/
+	u64 qpx_sqwsize;
+	u64 qpx_sqwts;
+	u64 qpx_lsn;
+	u64 qpx_nssn;
+/* 0x160 */
+	u64 qpx_mor;
+	u64 qpx_cor;
+	u64 qpx_sqsize;
+	u64 qpx_erc;
+/* 0x180*/
+	u64 qpx_rnrrc;
+	u64 qpx_ernrwt;
+	u64 qpx_rnrresp;
+	u64 qpx_lmsna;
+/* 0x1a0 */
+	u64 qpx_sqhpc;
+	u64 qpx_sqcptp;
+	u64 qpx_sigt;
+	u64 qpx_wqecnt;
+/* 0x1c0*/
+	u64 qpx_rqhp;
+	u64 qpx_rqptp;
+	u64 qpx_rqsize;
+	u64 qpx_nrr;
+/* 0x1e0*/
+	u64 qpx_rdmac;
+	u64 qpx_nrpsn;
+	u64 qpx_lapsn;
+	u64 qpx_lcr;
+/* 0x200*/
+	u64 qpx_rwc;
+	u64 qpx_rwva;
+	u64 qpx_rdsi;
+	u64 qpx_rdsbc;
+/* 0x220*/
+	u64 qpx_rqwsize;
+	u64 qpx_crmsn;
+	u64 qpx_rdd;
+	u64 qpx_larpsn;
+/* 0x240*/
+	u64 qpx_pd;
+	u64 qpx_scqn;
+	u64 qpx_rcqn;
+	u64 qpx_aeqn;
+/* 0x260*/
+	u64 qpx_aaelog;
+	u64 qpx_ram;
+	u64 qpx_rdmaqe0;
+	u64 qpx_rdmaqe1;
+/* 0x280*/
+	u64 qpx_rdmaqe2;
+	u64 qpx_rdmaqe3;
+	u64 qpx_nrpsnhwm;
+/* 0x298*/
+	u64 reserved[(0x400 - 0x298) / 8];
+/* 0x400 extended data */
+	u64 reserved_ext[(0x500 - 0x400) / 8];
+/* 0x500 */
+	u64 reserved2[(0x1000 - 0x500) / 8];
+/* 0x1000      */
+};
+
+#define QPX_SQADDER EHCA_BMASK_IBM(48,63)
+#define QPX_RQADDER EHCA_BMASK_IBM(48,63)
+
+#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x)
+
+/* MRMWPT Entry Memory Map */
+struct hipz_mrmwmm {
+	/* 0x00 */
+	u64 mrx_hcr;
+
+	u64 mrx_c;
+	u64 mrx_herr;
+	u64 mrx_aer;
+	/* 0x20 */
+	u64 mrx_pp;
+	u64 reserved1;
+	u64 reserved2;
+	u64 reserved3;
+	/* 0x40 */
+	u64 reserved4[(0x200 - 0x40) / 8];
+	/* 0x200 */
+	u64 mrx_ctl[64];
+
+};
+
+#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x)
+
+struct hipz_qpedmm {
+	/* 0x00 */
+	u64 reserved0[(0x400) / 8];
+	/* 0x400 */
+	u64 qpedx_phh;
+	u64 qpedx_ppsgp;
+	/* 0x410 */
+	u64 qpedx_ppsgu;
+	u64 qpedx_ppdgp;
+	/* 0x420 */
+	u64 qpedx_ppdgu;
+	u64 qpedx_aph;
+	/* 0x430 */
+	u64 qpedx_apsgp;
+	u64 qpedx_apsgu;
+	/* 0x440 */
+	u64 qpedx_apdgp;
+	u64 qpedx_apdgu;
+	/* 0x450 */
+	u64 qpedx_apav;
+	u64 qpedx_apsav;
+	/* 0x460  */
+	u64 qpedx_hcr;
+	u64 reserved1[4];
+	/* 0x488 */
+	u64 qpedx_rrl0;
+	/* 0x490 */
+	u64 qpedx_rrrkey0;
+	u64 qpedx_rrva0;
+	/* 0x4a0 */
+	u64 reserved2;
+	u64 qpedx_rrl1;
+	/* 0x4b0 */
+	u64 qpedx_rrrkey1;
+	u64 qpedx_rrva1;
+	/* 0x4c0 */
+	u64 reserved3;
+	u64 qpedx_rrl2;
+	/* 0x4d0 */
+	u64 qpedx_rrrkey2;
+	u64 qpedx_rrva2;
+	/* 0x4e0 */
+	u64 reserved4;
+	u64 qpedx_rrl3;
+	/* 0x4f0 */
+	u64 qpedx_rrrkey3;
+	u64 qpedx_rrva3;
+};
+
+#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x)
+
+/* CQ Table Entry Memory Map */
+struct hipz_cqtemm {
+	u64 cqx_hcr;
+	u64 cqx_c;
+	u64 cqx_herr;
+	u64 cqx_aer;
+/* 0x20  */
+	u64 cqx_ptp;
+	u64 cqx_tp;
+	u64 cqx_fec;
+	u64 cqx_feca;
+/* 0x40  */
+	u64 cqx_ep;
+	u64 cqx_eq;
+/* 0x50  */
+	u64 reserved1;
+	u64 cqx_n0;
+/* 0x60  */
+	u64 cqx_n1;
+	u64 reserved2[(0x1000 - 0x60) / 8];
+/* 0x1000 */
+};
+
+#define CQX_FEC_CQE_CNT           EHCA_BMASK_IBM(32,63)
+#define CQX_FECADDER              EHCA_BMASK_IBM(32,63)
+#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0)
+#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0)
+
+#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x)
+
+/* EQ Table Entry Memory Map */
+struct hipz_eqtemm {
+	u64 eqx_hcr;
+	u64 eqx_c;
+
+	u64 eqx_herr;
+	u64 eqx_aer;
+/* 0x20 */
+	u64 eqx_ptp;
+	u64 eqx_tp;
+	u64 eqx_ssba;
+	u64 eqx_psba;
+
+/* 0x40 */
+	u64 eqx_cec;
+	u64 eqx_meql;
+	u64 eqx_xisbi;
+	u64 eqx_xisc;
+/* 0x60 */
+	u64 eqx_it;
+
+};
+
+#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x)
+
+/* access control defines for MR/MW */
+#define HIPZ_ACCESSCTRL_L_WRITE  0x00800000
+#define HIPZ_ACCESSCTRL_R_WRITE  0x00400000
+#define HIPZ_ACCESSCTRL_R_READ   0x00200000
+#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000
+#define HIPZ_ACCESSCTRL_MW_BIND  0x00080000
+
+/* query hca response block */
+struct hipz_query_hca {
+	u32 cur_reliable_dg;
+	u32 cur_qp;
+	u32 cur_cq;
+	u32 cur_eq;
+	u32 cur_mr;
+	u32 cur_mw;
+	u32 cur_ee_context;
+	u32 cur_mcast_grp;
+	u32 cur_qp_attached_mcast_grp;
+	u32 reserved1;
+	u32 cur_ipv6_qp;
+	u32 cur_eth_qp;
+	u32 cur_hp_mr;
+	u32 reserved2[3];
+	u32 max_rd_domain;
+	u32 max_qp;
+	u32 max_cq;
+	u32 max_eq;
+	u32 max_mr;
+	u32 max_hp_mr;
+	u32 max_mw;
+	u32 max_mrwpte;
+	u32 max_special_mrwpte;
+	u32 max_rd_ee_context;
+	u32 max_mcast_grp;
+	u32 max_total_mcast_qp_attach;
+	u32 max_mcast_qp_attach;
+	u32 max_raw_ipv6_qp;
+	u32 max_raw_ethy_qp;
+	u32 internal_clock_frequency;
+	u32 max_pd;
+	u32 max_ah;
+	u32 max_cqe;
+	u32 max_wqes_wq;
+	u32 max_partitions;
+	u32 max_rr_ee_context;
+	u32 max_rr_qp;
+	u32 max_rr_hca;
+	u32 max_act_wqs_ee_context;
+	u32 max_act_wqs_qp;
+	u32 max_sge;
+	u32 max_sge_rd;
+	u32 memory_page_size_supported;
+	u64 max_mr_size;
+	u32 local_ca_ack_delay;
+	u32 num_ports;
+	u32 vendor_id;
+	u32 vendor_part_id;
+	u32 hw_ver;
+	u64 node_guid;
+	u64 hca_cap_indicators;
+	u32 data_counter_register_size;
+	u32 max_shared_rq;
+	u32 max_isns_eq;
+	u32 max_neq;
+} __attribute__ ((packed));
+
+/* query port response block */
+struct hipz_query_port {
+	u32 state;
+	u32 bad_pkey_cntr;
+	u32 lmc;
+	u32 lid;
+	u32 subnet_timeout;
+	u32 qkey_viol_cntr;
+	u32 sm_sl;
+	u32 sm_lid;
+	u32 capability_mask;
+	u32 init_type_reply;
+	u32 pkey_tbl_len;
+	u32 gid_tbl_len;
+	u64 gid_prefix;
+	u32 port_nr;
+	u16 pkey_entries[16];
+	u8  reserved1[32];
+	u32 trent_size;
+	u32 trbuf_size;
+	u64 max_msg_sz;
+	u32 max_mtu;
+	u32 vl_cap;
+	u8  reserved2[1900];
+	u64 guid_entries[255];
+} __attribute__ ((packed));
+
+#endif
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@ -0,0 +1,149 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  internal queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "ipz_pt_fn.h"
+
+void *ipz_qpageit_get_inc(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	queue->current_q_offset += queue->pagesize;
+	if (queue->current_q_offset > queue->queue_length) {
+		queue->current_q_offset -= queue->pagesize;
+		ret = NULL;
+	}
+	if (((u64)ret) % EHCA_PAGESIZE) {
+		ehca_gen_err("ERROR!! not at PAGE-Boundary");
+		return NULL;
+	}
+	return ret;
+}
+
+void *ipz_qeit_eq_get_inc(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	u64 last_entry_in_q = queue->queue_length - queue->qe_size;
+
+	queue->current_q_offset += queue->qe_size;
+	if (queue->current_q_offset > last_entry_in_q) {
+		queue->current_q_offset = 0;
+		queue->toggle_state = (~queue->toggle_state) & 1;
+	}
+
+	return ret;
+}
+
+int ipz_queue_ctor(struct ipz_queue *queue,
+		   const u32 nr_of_pages,
+		   const u32 pagesize, const u32 qe_size, const u32 nr_of_sg)
+{
+	int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
+	int f;
+
+	if (pagesize > PAGE_SIZE) {
+		ehca_gen_err("FATAL ERROR: pagesize=%x is greater "
+			     "than kernel page size", pagesize);
+		return 0;
+	}
+	if (!pages_per_kpage) {
+		ehca_gen_err("FATAL ERROR: invalid kernel page size. "
+			     "pages_per_kpage=%x", pages_per_kpage);
+		return 0;
+	}
+	queue->queue_length = nr_of_pages * pagesize;
+	queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
+	if (!queue->queue_pages) {
+		ehca_gen_err("ERROR!! didn't get the memory");
+		return 0;
+	}
+	memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
+	/*
+	 * allocate pages for queue:
+	 * outer loop allocates whole kernel pages (page aligned) and
+	 * inner loop divides a kernel page into smaller hca queue pages
+	 */
+	f = 0;
+	while (f < nr_of_pages) {
+		u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL);
+		int k;
+		if (!kpage)
+			goto ipz_queue_ctor_exit0; /*NOMEM*/
+		for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) {
+			(queue->queue_pages)[f] = (struct ipz_page *)kpage;
+			kpage += EHCA_PAGESIZE;
+			f++;
+		}
+	}
+
+	queue->current_q_offset = 0;
+	queue->qe_size = qe_size;
+	queue->act_nr_of_sg = nr_of_sg;
+	queue->pagesize = pagesize;
+	queue->toggle_state = 1;
+	return 1;
+
+ ipz_queue_ctor_exit0:
+	ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x",
+		     queue, f, nr_of_pages);
+	for (f = 0; f < nr_of_pages; f += pages_per_kpage) {
+		if (!(queue->queue_pages)[f])
+			break;
+		free_page((unsigned long)(queue->queue_pages)[f]);
+	}
+	return 0;
+}
+
+int ipz_queue_dtor(struct ipz_queue *queue)
+{
+	int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
+	int g;
+	int nr_pages;
+
+	if (!queue || !queue->queue_pages) {
+		ehca_gen_dbg("queue or queue_pages is NULL");
+		return 0;
+	}
+	nr_pages = queue->queue_length / queue->pagesize;
+	for (g = 0; g < nr_pages; g += pages_per_kpage)
+		free_page((unsigned long)(queue->queue_pages)[g]);
+	vfree(queue->queue_pages);
+
+	return 1;
+}
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@ -0,0 +1,247 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  internal queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IPZ_PT_FN_H__
+#define __IPZ_PT_FN_H__
+
+#define EHCA_PAGESHIFT   12
+#define EHCA_PAGESIZE   4096UL
+#define EHCA_PAGEMASK   (~(EHCA_PAGESIZE-1))
+#define EHCA_PT_ENTRIES 512UL
+
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+
+/* struct generic ehca page */
+struct ipz_page {
+	u8 entries[EHCA_PAGESIZE];
+};
+
+/* struct generic queue in linux kernel virtual memory (kv) */
+struct ipz_queue {
+	u64 current_q_offset;	/* current queue entry */
+
+	struct ipz_page **queue_pages;	/* array of pages belonging to queue */
+	u32 qe_size;		/* queue entry size */
+	u32 act_nr_of_sg;
+	u32 queue_length;	/* queue length allocated in bytes */
+	u32 pagesize;
+	u32 toggle_state;	/* toggle flag - per page */
+	u32 dummy3;		/* 64 bit alignment */
+};
+
+/*
+ * return current Queue Entry for a certain q_offset
+ * returns address (kv) of Queue Entry
+ */
+static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
+{
+	struct ipz_page *current_page;
+	if (q_offset >= queue->queue_length)
+		return NULL;
+	current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
+	return  &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
+}
+
+/*
+ * return current Queue Entry
+ * returns address (kv) of Queue Entry
+ */
+static inline void *ipz_qeit_get(struct ipz_queue *queue)
+{
+	return ipz_qeit_calc(queue, queue->current_q_offset);
+}
+
+/*
+ * return current Queue Page , increment Queue Page iterator from
+ * page to page in struct ipz_queue, last increment will return 0! and
+ * NOT wrap
+ * returns address (kv) of Queue Page
+ * warning don't use in parallel with ipz_QE_get_inc()
+ */
+void *ipz_qpageit_get_inc(struct ipz_queue *queue);
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	queue->current_q_offset += queue->qe_size;
+	if (queue->current_q_offset >= queue->queue_length) {
+		queue->current_q_offset = 0;
+		/* toggle the valid flag */
+		queue->toggle_state = (~queue->toggle_state) & 1;
+	}
+
+	return ret;
+}
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * returns 0 and does not increment, if wrong valid state
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
+{
+	struct ehca_cqe *cqe = ipz_qeit_get(queue);
+	u32 cqe_flags = cqe->cqe_flags;
+
+	if ((cqe_flags >> 7) != (queue->toggle_state & 1))
+		return NULL;
+
+	ipz_qeit_get_inc(queue);
+	return cqe;
+}
+
+/*
+ * returns and resets Queue Entry iterator
+ * returns address (kv) of first Queue Entry
+ */
+static inline void *ipz_qeit_reset(struct ipz_queue *queue)
+{
+	queue->current_q_offset = 0;
+	return ipz_qeit_get(queue);
+}
+
+/* struct generic page table */
+struct ipz_pt {
+	u64 entries[EHCA_PT_ENTRIES];
+};
+
+/* struct page table for a queue, only to be used in pf */
+struct ipz_qpt {
+	/* queue page tables (kv), use u64 because we know the element length */
+	u64 *qpts;
+	u32 n_qpts;
+	u32 n_ptes;       /*  number of page table entries */
+	u64 *current_pte_addr;
+};
+
+/*
+ * constructor for a ipz_queue_t, placement new for ipz_queue_t,
+ * new for all dependent datastructors
+ * all QP Tables are the same
+ * flow:
+ *    allocate+pin queue
+ * see ipz_qpt_ctor()
+ * returns true if ok, false if out of memory
+ */
+int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages,
+		   const u32 pagesize, const u32 qe_size,
+		   const u32 nr_of_sg);
+
+/*
+ * destructor for a ipz_queue_t
+ *  -# free queue
+ *  see ipz_queue_ctor()
+ *  returns true if ok, false if queue was NULL-ptr of free failed
+ */
+int ipz_queue_dtor(struct ipz_queue *queue);
+
+/*
+ * constructor for a ipz_qpt_t,
+ * placement new for struct ipz_queue, new for all dependent datastructors
+ * all QP Tables are the same,
+ * flow:
+ * -# allocate+pin queue
+ * -# initialise ptcb
+ * -# allocate+pin PTs
+ * -# link PTs to a ring, according to HCA Arch, set bit62 id needed
+ * -# the ring must have room for exactly nr_of_PTEs
+ * see ipz_qpt_ctor()
+ */
+void ipz_qpt_ctor(struct ipz_qpt *qpt,
+		  const u32 nr_of_qes,
+		  const u32 pagesize,
+		  const u32 qe_size,
+		  const u8 lowbyte, const u8 toggle,
+		  u32 * act_nr_of_QEs, u32 * act_nr_of_pages);
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ * fix EQ page problems
+ */
+void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
+
+/*
+ * return current Event Queue Entry, increment Queue Entry iterator
+ * by one step in struct ipz_queue if valid, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * returns 0 and does not increment, if wrong valid state
+ * warning don't use in parallel with ipz_queue_QPageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	u32 qe = *(u8 *) ret;
+	if ((qe >> 7) != (queue->toggle_state & 1))
+		return NULL;
+	ipz_qeit_eq_get_inc(queue); /* this is a good one */
+	return ret;
+}
+
+/* returns address (GX) of first queue entry */
+static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
+{
+	return be64_to_cpu(qpt->qpts[0]);
+}
+
+/* returns address (kv) of first page of queue page table */
+static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt)
+{
+	return qpt->qpts;
+}
+
+#endif				/* __IPZ_PT_FN_H__ */
--- a/drivers/infiniband/hw/ipath/Kconfig
+++ b/drivers/infiniband/hw/ipath/Kconfig
@ -1,16 +1,9 @@
-config IPATH_CORE
-	tristate "QLogic InfiniPath Driver"
-	depends on 64BIT && PCI_MSI && NET
-	---help---
-	This is a low-level driver for QLogic InfiniPath host channel
-	adapters (HCAs) based on the HT-400 and PE-800 chips.
-
 config INFINIBAND_IPATH
-	tristate "QLogic InfiniPath Verbs Driver"
-	depends on IPATH_CORE && INFINIBAND
+	tristate "QLogic InfiniPath Driver"
+	depends on PCI_MSI && 64BIT && INFINIBAND
 	---help---
-	This is a driver that provides InfiniBand verbs support for
-	QLogic InfiniPath host channel adapters (HCAs).  This
-	allows these devices to be used with both kernel upper level
-	protocols such as IP-over-InfiniBand as well as with userspace
-	applications (in conjunction with InfiniBand userspace access).
+	This is a driver for QLogic InfiniPath host channel adapters,
+	including InfiniBand verbs support.  This driver allows these
+	devices to be used with both kernel upper level protocols such
+	as IP-over-InfiniBand as well as with userspace applications
+	(in conjunction with InfiniBand userspace access).
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@ -1,36 +1,35 @@
 EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \
 	-DIPATH_KERN_TYPE=0

-obj-$(CONFIG_IPATH_CORE) += ipath_core.o
 obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o

-ipath_core-y := \
+ib_ipath-y := \
+	ipath_cq.o \
 	ipath_diag.o \
 	ipath_driver.o \
 	ipath_eeprom.o \
 	ipath_file_ops.o \
 	ipath_fs.o \
-	ipath_ht400.o \
+	ipath_iba6110.o \
+	ipath_iba6120.o \
 	ipath_init_chip.o \
 	ipath_intr.o \
-	ipath_layer.o \
-	ipath_pe800.o \
-	ipath_stats.o \
-	ipath_sysfs.o \
-	ipath_user_pages.o
-
-ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o
-
-ib_ipath-y := \
-	ipath_cq.o \
 	ipath_keys.o \
+	ipath_layer.o \
 	ipath_mad.o \
+	ipath_mmap.o \
 	ipath_mr.o \
 	ipath_qp.o \
 	ipath_rc.o \
 	ipath_ruc.o \
 	ipath_srq.o \
+	ipath_stats.o \
+	ipath_sysfs.o \
 	ipath_uc.o \
 	ipath_ud.o \
-	ipath_verbs.o \
-	ipath_verbs_mcast.o
+	ipath_user_pages.o \
+	ipath_verbs_mcast.o \
+	ipath_verbs.o
+
+ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
+ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@ -106,9 +106,9 @@ struct infinipath_stats {
 	__u64 sps_ether_spkts;
 	/* number of "ethernet" packets received by driver */
 	__u64 sps_ether_rpkts;
-	/* number of SMA packets sent by driver */
+	/* number of SMA packets sent by driver. Obsolete. */
 	__u64 sps_sma_spkts;
-	/* number of SMA packets received by driver */
+	/* number of SMA packets received by driver. Obsolete. */
 	__u64 sps_sma_rpkts;
 	/* number of times all ports rcvhdrq was full and packet dropped */
 	__u64 sps_hdrqfull;
@ -138,7 +138,7 @@ struct infinipath_stats {
 	__u64 sps_pageunlocks;
 	/*
 	 * Number of packets dropped in kernel other than errors (ether
-	 * packets if ipath not configured, sma/mad, etc.)
+	 * packets if ipath not configured, etc.)
 	 */
 	__u64 sps_krdrops;
 	/* pad for future growth */
@ -153,8 +153,6 @@ struct infinipath_stats {
 #define IPATH_STATUS_DISABLED      0x2	/* hardware disabled */
 /* Device has been disabled via admin request */
 #define IPATH_STATUS_ADMIN_DISABLED    0x4
-#define IPATH_STATUS_OIB_SMA       0x8	/* ipath_mad kernel SMA running */
-#define IPATH_STATUS_SMA          0x10	/* user SMA running */
 /* Chip has been found and initted */
 #define IPATH_STATUS_CHIP_PRESENT 0x20
 /* IB link is at ACTIVE, usable for data traffic */
@ -465,12 +463,11 @@ struct __ipath_sendpkt {
 	struct ipath_iovec sps_iov[4];
 };

-/* Passed into SMA special file's ->read and ->write methods. */
-struct ipath_sma_pkt
-{
-	__u32 unit;	/* unit on which to send packet */
-	__u64 data;	/* address of payload in userspace */
-	__u32 len;	/* length of payload */
+/* Passed into diag data special file's ->write method. */
+struct ipath_diag_pkt {
+	__u32 unit;
+	__u64 data;
+	__u32 len;
 };

 /*
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@ -42,20 +42,28 @@
 * @entry: work completion entry to add
 * @sig: true if @entry is a solicitated entry
 *
- * This may be called with one of the qp->s_lock or qp->r_rq.lock held.
+ * This may be called with qp->s_lock held.
 */
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 {
+	struct ipath_cq_wc *wc = cq->queue;
 	unsigned long flags;
+	u32 head;
 	u32 next;

 	spin_lock_irqsave(&cq->lock, flags);

-	if (cq->head == cq->ibcq.cqe)
+	/*
+	 * Note that the head pointer might be writable by user processes.
+	 * Take care to verify it is a sane value.
+	 */
+	head = wc->head;
+	if (head >= (unsigned) cq->ibcq.cqe) {
+		head = cq->ibcq.cqe;
 		next = 0;
-	else
-		next = cq->head + 1;
-	if (unlikely(next == cq->tail)) {
+	} else
+		next = head + 1;
+	if (unlikely(next == wc->tail)) {
 		spin_unlock_irqrestore(&cq->lock, flags);
 		if (cq->ibcq.event_handler) {
 			struct ib_event ev;
@ -67,8 +75,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 		}
 		return;
 	}
-	cq->queue[cq->head] = *entry;
-	cq->head = next;
+	wc->queue[head] = *entry;
+	wc->head = next;

 	if (cq->notify == IB_CQ_NEXT_COMP ||
 	    (cq->notify == IB_CQ_SOLICITED && solicited)) {
@ -101,19 +109,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 {
 	struct ipath_cq *cq = to_icq(ibcq);
+	struct ipath_cq_wc *wc = cq->queue;
 	unsigned long flags;
 	int npolled;

 	spin_lock_irqsave(&cq->lock, flags);

 	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (cq->tail == cq->head)
+		if (wc->tail == wc->head)
 			break;
-		*entry = cq->queue[cq->tail];
-		if (cq->tail == cq->ibcq.cqe)
-			cq->tail = 0;
+		*entry = wc->queue[wc->tail];
+		if (wc->tail >= cq->ibcq.cqe)
+			wc->tail = 0;
 		else
-			cq->tail++;
+			wc->tail++;
 	}

 	spin_unlock_irqrestore(&cq->lock, flags);
@ -160,38 +169,74 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
 {
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	struct ipath_cq *cq;
-	struct ib_wc *wc;
+	struct ipath_cq_wc *wc;
 	struct ib_cq *ret;

-	if (entries > ib_ipath_max_cqes) {
+	if (entries < 1 || entries > ib_ipath_max_cqes) {
 		ret = ERR_PTR(-EINVAL);
-		goto bail;
+		goto done;
 	}

 	if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}

-	/*
-	 * Need to use vmalloc() if we want to support large #s of
-	 * entries.
-	 */
+	/* Allocate the completion queue structure. */
 	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
 	if (!cq) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}

 	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
+	 * Allocate the completion queue entries and head/tail pointers.
+	 * This is allocated separately so that it can be resized and
+	 * also mapped into user space.
+	 * We need to use vmalloc() in order to support mmap and large
+	 * numbers of entries.
 	 */
-	wc = vmalloc(sizeof(*wc) * (entries + 1));
+	wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
 	if (!wc) {
-		kfree(cq);
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto bail_cq;
 	}
+
+	/*
+	 * Return the address of the WC as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) wc;
+		int err;
+
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_wc;
+		}
+
+		/* Allocate info for ipath_mmap(). */
+		ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+		if (!ip) {
+			ret = ERR_PTR(-ENOMEM);
+			goto bail_wc;
+		}
+		cq->ip = ip;
+		ip->context = context;
+		ip->obj = wc;
+		kref_init(&ip->ref);
+		ip->mmap_cnt = 0;
+		ip->size = PAGE_ALIGN(sizeof(*wc) +
+				      sizeof(struct ib_wc) * entries);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	} else
+		cq->ip = NULL;
+
 	/*
 	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
 	 * The number of entries should be >= the number requested or return
@ -202,15 +247,22 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
 	cq->triggered = 0;
 	spin_lock_init(&cq->lock);
 	tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
-	cq->head = 0;
-	cq->tail = 0;
+	wc->head = 0;
+	wc->tail = 0;
 	cq->queue = wc;

 	ret = &cq->ibcq;

 	dev->n_cqs_allocated++;
+	goto done;

-bail:
+bail_wc:
+	vfree(wc);
+
+bail_cq:
+	kfree(cq);
+
+done:
 	return ret;
 }

@ -229,7 +281,10 @@ int ipath_destroy_cq(struct ib_cq *ibcq)

 	tasklet_kill(&cq->comptask);
 	dev->n_cqs_allocated--;
-	vfree(cq->queue);
+	if (cq->ip)
+		kref_put(&cq->ip->ref, ipath_release_mmap_info);
+	else
+		vfree(cq->queue);
 	kfree(cq);

 	return 0;
@ -253,7 +308,7 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
 	spin_lock_irqsave(&cq->lock, flags);
 	/*
 	 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-	 * any other transitions.
+	 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
 	 */
 	if (cq->notify != IB_CQ_NEXT_COMP)
 		cq->notify = notify;
@ -264,46 +319,86 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
 int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 {
 	struct ipath_cq *cq = to_icq(ibcq);
-	struct ib_wc *wc, *old_wc;
-	u32 n;
+	struct ipath_cq_wc *old_wc = cq->queue;
+	struct ipath_cq_wc *wc;
+	u32 head, tail, n;
 	int ret;

+	if (cqe < 1 || cqe > ib_ipath_max_cqes) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
 	 */
-	wc = vmalloc(sizeof(*wc) * (cqe + 1));
+	wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
 	if (!wc) {
 		ret = -ENOMEM;
 		goto bail;
 	}

+	/*
+	 * Return the address of the WC as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		__u64 offset = (__u64) wc;
+
+		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (ret)
+			goto bail;
+	}
+
 	spin_lock_irq(&cq->lock);
-	if (cq->head < cq->tail)
-		n = cq->ibcq.cqe + 1 + cq->head - cq->tail;
+	/*
+	 * Make sure head and tail are sane since they
+	 * might be user writable.
+	 */
+	head = old_wc->head;
+	if (head > (u32) cq->ibcq.cqe)
+		head = (u32) cq->ibcq.cqe;
+	tail = old_wc->tail;
+	if (tail > (u32) cq->ibcq.cqe)
+		tail = (u32) cq->ibcq.cqe;
+	if (head < tail)
+		n = cq->ibcq.cqe + 1 + head - tail;
 	else
-		n = cq->head - cq->tail;
+		n = head - tail;
 	if (unlikely((u32)cqe < n)) {
 		spin_unlock_irq(&cq->lock);
 		vfree(wc);
 		ret = -EOVERFLOW;
 		goto bail;
 	}
-	for (n = 0; cq->tail != cq->head; n++) {
-		wc[n] = cq->queue[cq->tail];
-		if (cq->tail == cq->ibcq.cqe)
-			cq->tail = 0;
+	for (n = 0; tail != head; n++) {
+		wc->queue[n] = old_wc->queue[tail];
+		if (tail == (u32) cq->ibcq.cqe)
+			tail = 0;
 		else
-			cq->tail++;
+			tail++;
 	}
 	cq->ibcq.cqe = cqe;
-	cq->head = n;
-	cq->tail = 0;
-	old_wc = cq->queue;
+	wc->head = n;
+	wc->tail = 0;
 	cq->queue = wc;
 	spin_unlock_irq(&cq->lock);

 	vfree(old_wc);

+	if (cq->ip) {
+		struct ipath_ibdev *dev = to_idev(ibcq->device);
+		struct ipath_mmap_info *ip = cq->ip;
+
+		ip->obj = wc;
+		ip->size = PAGE_ALIGN(sizeof(*wc) +
+				      sizeof(struct ib_wc) * cqe);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	}
+
 	ret = 0;

 bail:
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@ -60,7 +60,6 @@
 #define __IPATH_USER_SEND   0x1000	/* use user mode send */
 #define __IPATH_KERNEL_SEND 0x2000	/* use kernel mode send */
 #define __IPATH_EPKTDBG     0x4000	/* print ethernet packet data */
-#define __IPATH_SMADBG      0x8000	/* sma packet debug */
 #define __IPATH_IPATHDBG    0x10000	/* Ethernet (IPATH) gen debug */
 #define __IPATH_IPATHWARN   0x20000	/* Ethernet (IPATH) warnings */
 #define __IPATH_IPATHERR    0x40000	/* Ethernet (IPATH) errors */
@ -84,7 +83,6 @@
 /* print mmap/nopage stuff, not using VDBG any more */
 #define __IPATH_MMDBG     0x0
 #define __IPATH_EPKTDBG   0x0	/* print ethernet packet data */
-#define __IPATH_SMADBG    0x0   /* process startup (init)/exit messages */
 #define __IPATH_IPATHDBG  0x0	/* Ethernet (IPATH) table dump on */
 #define __IPATH_IPATHWARN 0x0	/* Ethernet (IPATH) warnings on   */
 #define __IPATH_IPATHERR  0x0	/* Ethernet (IPATH) errors on   */
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@ -41,11 +41,11 @@
 * through the /sys/bus/pci resource mmap interface.
 */

+#include <linux/io.h>
 #include <linux/pci.h>
 #include <asm/uaccess.h>

 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"

 int ipath_diag_inuse;
@ -274,6 +274,158 @@ static int ipath_diag_open(struct inode *in, struct file *fp)
 	return ret;
 }

+static ssize_t ipath_diagpkt_write(struct file *fp,
+				   const char __user *data,
+				   size_t count, loff_t *off);
+
+static struct file_operations diagpkt_file_ops = {
+	.owner = THIS_MODULE,
+	.write = ipath_diagpkt_write,
+};
+
+static struct cdev *diagpkt_cdev;
+static struct class_device *diagpkt_class_dev;
+
+int __init ipath_diagpkt_add(void)
+{
+	return ipath_cdev_init(IPATH_DIAGPKT_MINOR,
+			       "ipath_diagpkt", &diagpkt_file_ops,
+			       &diagpkt_cdev, &diagpkt_class_dev);
+}
+
+void __exit ipath_diagpkt_remove(void)
+{
+	ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev);
+}
+
+/**
+ * ipath_diagpkt_write - write an IB packet
+ * @fp: the diag data device file pointer
+ * @data: ipath_diag_pkt structure saying where to get the packet
+ * @count: size of data to write
+ * @off: unused by this code
+ */
+static ssize_t ipath_diagpkt_write(struct file *fp,
+				   const char __user *data,
+				   size_t count, loff_t *off)
+{
+	u32 __iomem *piobuf;
+	u32 plen, clen, pbufn;
+	struct ipath_diag_pkt dp;
+	u32 *tmpbuf = NULL;
+	struct ipath_devdata *dd;
+	ssize_t ret = 0;
+	u64 val;
+
+	if (count < sizeof(dp)) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	if (copy_from_user(&dp, data, sizeof(dp))) {
+		ret = -EFAULT;
+		goto bail;
+	}
+
+	/* send count must be an exact number of dwords */
+	if (dp.len & 3) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	clen = dp.len >> 2;
+
+	dd = ipath_lookup(dp.unit);
+	if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
+	    !dd->ipath_kregbase) {
+		ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
+			   dp.unit);
+		ret = -ENODEV;
+		goto bail;
+	}
+
+	if (ipath_diag_inuse && !diag_set_link &&
+	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+		diag_set_link = 1;
+		ipath_cdbg(VERBOSE, "Trying to set to set link active for "
+			   "diag pkt\n");
+		ipath_set_linkstate(dd, IPATH_IB_LINKARM);
+		ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
+	}
+
+	if (!(dd->ipath_flags & IPATH_INITTED)) {
+		/* no hardware, freeze, etc. */
+		ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
+		ret = -ENODEV;
+		goto bail;
+	}
+	val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
+	if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM &&
+	    val != IPATH_IBSTATE_ACTIVE) {
+		ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
+			   dd->ipath_unit, (unsigned long long) val);
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	/* need total length before first word written */
+	/* +1 word is for the qword padding */
+	plen = sizeof(u32) + dp.len;
+
+	if ((plen + 4) > dd->ipath_ibmaxlen) {
+		ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
+			  plen - 4, dd->ipath_ibmaxlen);
+		ret = -EINVAL;
+		goto bail;	/* before writing pbc */
+	}
+	tmpbuf = vmalloc(plen);
+	if (!tmpbuf) {
+		dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
+			 "failing\n");
+		ret = -ENOMEM;
+		goto bail;
+	}
+
+	if (copy_from_user(tmpbuf,
+			   (const void __user *) (unsigned long) dp.data,
+			   dp.len)) {
+		ret = -EFAULT;
+		goto bail;
+	}
+
+	piobuf = ipath_getpiobuf(dd, &pbufn);
+	if (!piobuf) {
+		ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
+			   dd->ipath_unit);
+		ret = -EBUSY;
+		goto bail;
+	}
+
+	plen >>= 2;		/* in dwords */
+
+	if (ipath_debug & __IPATH_PKTDBG)
+		ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
+			   dd->ipath_unit, plen - 1, pbufn);
+
+	/* we have to flush after the PBC for correctness on some cpus
+	 * or WC buffer can be written out of order */
+	writeq(plen, piobuf);
+	ipath_flush_wc();
+	/* copy all by the trigger word, then flush, so it's written
+	 * to chip before trigger word, then write trigger word, then
+	 * flush again, so packet is sent. */
+	__iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+	ipath_flush_wc();
+	__raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+	ipath_flush_wc();
+
+	ret = sizeof(dp);
+
+bail:
+	vfree(tmpbuf);
+	return ret;
+}
+
 static int ipath_diag_release(struct inode *in, struct file *fp)
 {
 	mutex_lock(&ipath_mutex);
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@ -39,7 +39,7 @@
 #include <linux/vmalloc.h>

 #include "ipath_kernel.h"
-#include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"

 static void ipath_update_pio_bufs(struct ipath_devdata *);
@ -51,8 +51,6 @@ const char *ipath_get_unit_name(int unit)
 	return iname;
 }

-EXPORT_SYMBOL_GPL(ipath_get_unit_name);
-
 #define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
 #define PFX IPATH_DRV_NAME ": "

@ -60,13 +58,13 @@ EXPORT_SYMBOL_GPL(ipath_get_unit_name);
 * The size has to be longer than this string, so we can append
 * board/chip information to it in the init code.
 */
-const char ipath_core_version[] = IPATH_IDSTR "\n";
+const char ib_ipath_version[] = IPATH_IDSTR "\n";

 static struct idr unit_table;
 DEFINE_SPINLOCK(ipath_devs_lock);
 LIST_HEAD(ipath_dev_list);

-wait_queue_head_t ipath_sma_state_wait;
+wait_queue_head_t ipath_state_wait;

 unsigned ipath_debug = __IPATH_INFO;

@ -403,10 +401,10 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 	/* setup the chip-specific functions, as early as possible. */
 	switch (ent->device) {
 	case PCI_DEVICE_ID_INFINIPATH_HT:
-		ipath_init_ht400_funcs(dd);
+		ipath_init_iba6110_funcs(dd);
 		break;
 	case PCI_DEVICE_ID_INFINIPATH_PE800:
-		ipath_init_pe800_funcs(dd);
+		ipath_init_iba6120_funcs(dd);
 		break;
 	default:
 		ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
@ -440,7 +438,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 	}
 	dd->ipath_pcirev = rev;

+#if defined(__powerpc__)
+	/* There isn't a generic way to specify writethrough mappings */
+	dd->ipath_kregbase = __ioremap(addr, len,
+		(_PAGE_NO_CACHE|_PAGE_WRITETHRU));
+#else
 	dd->ipath_kregbase = ioremap_nocache(addr, len);
+#endif

 	if (!dd->ipath_kregbase) {
 		ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
@ -503,7 +507,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 	ipathfs_add_device(dd);
 	ipath_user_add(dd);
 	ipath_diag_add(dd);
-	ipath_layer_add(dd);
+	ipath_register_ib_device(dd);

 	goto bail;

@ -532,7 +536,7 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
 		return;

 	dd = pci_get_drvdata(pdev);
-	ipath_layer_remove(dd);
+	ipath_unregister_ib_device(dd->verbs_dev);
 	ipath_diag_remove(dd);
 	ipath_user_remove(dd);
 	ipathfs_remove_device(dd);
@ -607,21 +611,23 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
 *
 * wait up to msecs milliseconds for IB link state change to occur for
 * now, take the easy polling route.  Currently used only by
- * ipath_layer_set_linkstate.  Returns 0 if state reached, otherwise
+ * ipath_set_linkstate.  Returns 0 if state reached, otherwise
 * -ETIMEDOUT state can have multiple states set, for any of several
 * transitions.
 */
-int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
+static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
+				int msecs)
 {
-	dd->ipath_sma_state_wanted = state;
-	wait_event_interruptible_timeout(ipath_sma_state_wait,
+	dd->ipath_state_wanted = state;
+	wait_event_interruptible_timeout(ipath_state_wait,
 					 (dd->ipath_flags & state),
 					 msecs_to_jiffies(msecs));
-	dd->ipath_sma_state_wanted = 0;
+	dd->ipath_state_wanted = 0;

 	if (!(dd->ipath_flags & state)) {
 		u64 val;
-		ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n",
+		ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
+			   " ms\n",
 			   /* test INIT ahead of DOWN, both can be set */
 			   (state & IPATH_LINKINIT) ? "INIT" :
 			   ((state & IPATH_LINKDOWN) ? "DOWN" :
@ -807,58 +813,6 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
 	return skb;
 }

-/**
- * ipath_rcv_layer - receive a packet for the layered (ethernet) driver
- * @dd: the infinipath device
- * @etail: the sk_buff number
- * @tlen: the total packet length
- * @hdr: the ethernet header
- *
- * Separate routine for better overall optimization
- */
-static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail,
-			    u32 tlen, struct ether_header *hdr)
-{
-	u32 elen;
-	u8 pad, *bthbytes;
-	struct sk_buff *skb, *nskb;
-
-	if (dd->ipath_port0_skbs &&
-			hdr->sub_opcode == IPATH_ITH4X_OPCODE_ENCAP) {
-		/*
-		 * Allocate a new sk_buff to replace the one we give
-		 * to the network stack.
-		 */
-		nskb = ipath_alloc_skb(dd, GFP_ATOMIC);
-		if (!nskb) {
-			/* count OK packets that we drop */
-			ipath_stats.sps_krdrops++;
-			return;
-		}
-
-		bthbytes = (u8 *) hdr->bth;
-		pad = (bthbytes[1] >> 4) & 3;
-		/* +CRC32 */
-		elen = tlen - (sizeof(*hdr) + pad + sizeof(u32));
-
-		skb = dd->ipath_port0_skbs[etail];
-		dd->ipath_port0_skbs[etail] = nskb;
-		skb_put(skb, elen);
-
-		dd->ipath_f_put_tid(dd, etail + (u64 __iomem *)
-				    ((char __iomem *) dd->ipath_kregbase
-				     + dd->ipath_rcvegrbase), 0,
-				    virt_to_phys(nskb->data));
-
-		__ipath_layer_rcv(dd, hdr, skb);
-
-		/* another ether packet received */
-		ipath_stats.sps_ether_rpkts++;
-	}
-	else if (hdr->sub_opcode == IPATH_ITH4X_OPCODE_LID_ARP)
-		__ipath_layer_rcv_lid(dd, hdr);
-}
-
 static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
 			     u32 eflags,
 			     u32 l,
@ -972,26 +926,17 @@ void ipath_kreceive(struct ipath_devdata *dd)
 		if (unlikely(eflags))
 			ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
 		else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-				int ret = __ipath_verbs_rcv(dd, rc + 1,
-							    ebuf, tlen);
-				if (ret == -ENODEV)
-					ipath_cdbg(VERBOSE,
-						   "received IB packet, "
-						   "not SMA (QP=%x)\n", qp);
-				if (dd->ipath_lli_counter)
-					dd->ipath_lli_counter--;
-
-		} else if (etype == RCVHQ_RCV_TYPE_EAGER) {
-			if (qp == IPATH_KD_QP &&
-			    bthbytes[0] == ipath_layer_rcv_opcode &&
-			    ebuf)
-				ipath_rcv_layer(dd, etail, tlen,
-						(struct ether_header *)hdr);
-			else
-				ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
-					   "qp=%x), len %x; ignored\n",
-					   etype, bthbytes[0], qp, tlen);
+			ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
+			if (dd->ipath_lli_counter)
+				dd->ipath_lli_counter--;
+			ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+				   "qp=%x), len %x; ignored\n",
+				   etype, bthbytes[0], qp, tlen);
 		}
+		else if (etype == RCVHQ_RCV_TYPE_EAGER)
+			ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+				   "qp=%x), len %x; ignored\n",
+				   etype, bthbytes[0], qp, tlen);
 		else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
 			ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
 				  be32_to_cpu(hdr->bth[0]) & 0xff);
@ -1024,7 +969,8 @@ void ipath_kreceive(struct ipath_devdata *dd)
 		 */
 		if (l == hdrqtail || (i && !(i&0xf))) {
 			u64 lval;
-			if (l == hdrqtail) /* PE-800 interrupt only on last */
+			if (l == hdrqtail)
+				/* request IBA6120 interrupt only on last */
 				lval = dd->ipath_rhdrhead_intr_off | l;
 			else
 				lval = l;
@ -1038,7 +984,7 @@ void ipath_kreceive(struct ipath_devdata *dd)
 	}

 	if (!dd->ipath_rhdrhead_intr_off && !reloop) {
-		/* HT-400 workaround; we can have a race clearing chip
+		/* IBA6110 workaround; we can have a race clearing chip
 		 * interrupt with another interrupt about to be delivered,
 		 * and can clear it before it is delivered on the GPIO
 		 * workaround.  By doing the extra check here for the
@ -1211,7 +1157,7 @@ int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
 *
 * do appropriate marking as busy, etc.
 * returns buffer number if one found (>=0), negative number is error.
- * Used by ipath_sma_send_pkt and ipath_layer_send
+ * Used by ipath_layer_send
 */
 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
 {
@ -1317,13 +1263,6 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
 		goto bail;
 	}

-	if (updated)
-		/*
-		 * ran out of bufs, now some (at least this one we just
-		 * got) are now available, so tell the layered driver.
-		 */
-		__ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
-
 	/*
 	 * set next starting place.  Since it's just an optimization,
 	 * it doesn't matter who wins on this, so no locking
@ -1500,7 +1439,7 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
 	return ret;
 }

-void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
+static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 {
 	static const char *what[4] = {
 		[0] = "DOWN",
@ -1511,7 +1450,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 	int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
 			INFINIPATH_IBCC_LINKCMD_MASK;

-	ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate "
+	ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
 		   "is %s\n", dd->ipath_unit,
 		   what[linkcmd],
 		   ipath_ibcstatus_str[
@ -1520,7 +1459,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 			    INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
 			   INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
 	/* flush all queued sends when going to DOWN or INIT, to be sure that
-	 * they don't block SMA and other MAD packets */
+	 * they don't block MAD packets */
 	if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
 				 INFINIPATH_S_ABORT);
@ -1534,6 +1473,180 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 			 dd->ipath_ibcctrl | which);
 }

+int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
+{
+	u32 lstate;
+	int ret;
+
+	switch (newstate) {
+	case IPATH_IB_LINKDOWN:
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
+				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		/* don't wait */
+		ret = 0;
+		goto bail;
+
+	case IPATH_IB_LINKDOWN_SLEEP:
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
+				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		/* don't wait */
+		ret = 0;
+		goto bail;
+
+	case IPATH_IB_LINKDOWN_DISABLE:
+		ipath_set_ib_lstate(dd,
+				    INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
+				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		/* don't wait */
+		ret = 0;
+		goto bail;
+
+	case IPATH_IB_LINKINIT:
+		if (dd->ipath_flags & IPATH_LINKINIT) {
+			ret = 0;
+			goto bail;
+		}
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
+				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		lstate = IPATH_LINKINIT;
+		break;
+
+	case IPATH_IB_LINKARM:
+		if (dd->ipath_flags & IPATH_LINKARMED) {
+			ret = 0;
+			goto bail;
+		}
+		if (!(dd->ipath_flags &
+		      (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
+			ret = -EINVAL;
+			goto bail;
+		}
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
+				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		/*
+		 * Since the port can transition to ACTIVE by receiving
+		 * a non VL 15 packet, wait for either state.
+		 */
+		lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
+		break;
+
+	case IPATH_IB_LINKACTIVE:
+		if (dd->ipath_flags & IPATH_LINKACTIVE) {
+			ret = 0;
+			goto bail;
+		}
+		if (!(dd->ipath_flags & IPATH_LINKARMED)) {
+			ret = -EINVAL;
+			goto bail;
+		}
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
+				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		lstate = IPATH_LINKACTIVE;
+		break;
+
+	default:
+		ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
+		ret = -EINVAL;
+		goto bail;
+	}
+	ret = ipath_wait_linkstate(dd, lstate, 2000);
+
+bail:
+	return ret;
+}
+
+/**
+ * ipath_set_mtu - set the MTU
+ * @dd: the infinipath device
+ * @arg: the new MTU
+ *
+ * we can handle "any" incoming size, the issue here is whether we
+ * need to restrict our outgoing size.   For now, we don't do any
+ * sanity checking on this, and we don't deal with what happens to
+ * programs that are already running when the size changes.
+ * NOTE: changing the MTU will usually cause the IBC to go back to
+ * link initialize (IPATH_IBSTATE_INIT) state...
+ */
+int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
+{
+	u32 piosize;
+	int changed = 0;
+	int ret;
+
+	/*
+	 * mtu is IB data payload max.  It's the largest power of 2 less
+	 * than piosize (or even larger, since it only really controls the
+	 * largest we can receive; we can send the max of the mtu and
+	 * piosize).  We check that it's one of the valid IB sizes.
+	 */
+	if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
+	    arg != 4096) {
+		ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
+		ret = -EINVAL;
+		goto bail;
+	}
+	if (dd->ipath_ibmtu == arg) {
+		ret = 0;        /* same as current */
+		goto bail;
+	}
+
+	piosize = dd->ipath_ibmaxlen;
+	dd->ipath_ibmtu = arg;
+
+	if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
+		/* Only if it's not the initial value (or reset to it) */
+		if (piosize != dd->ipath_init_ibmaxlen) {
+			dd->ipath_ibmaxlen = piosize;
+			changed = 1;
+		}
+	} else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
+		piosize = arg + IPATH_PIO_MAXIBHDR;
+		ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
+			   "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
+			   arg);
+		dd->ipath_ibmaxlen = piosize;
+		changed = 1;
+	}
+
+	if (changed) {
+		/*
+		 * set the IBC maxpktlength to the size of our pio
+		 * buffers in words
+		 */
+		u64 ibc = dd->ipath_ibcctrl;
+		ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
+			 INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
+
+		piosize = piosize - 2 * sizeof(u32);    /* ignore pbc */
+		dd->ipath_ibmaxlen = piosize;
+		piosize /= sizeof(u32); /* in words */
+		/*
+		 * for ICRC, which we only send in diag test pkt mode, and
+		 * we don't need to worry about that for mtu
+		 */
+		piosize += 1;
+
+		ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+		dd->ipath_ibcctrl = ibc;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+		dd->ipath_f_tidtemplate(dd);
+	}
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
+{
+	dd->ipath_lid = arg;
+	dd->ipath_lmc = lmc;
+
+	return 0;
+}
+
 /**
 * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
 * @dd: the infinipath device
@ -1637,13 +1750,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
 	ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
 			    INFINIPATH_IBCC_LINKINITCMD_SHIFT);

-	/*
-	 * we are shutting down, so tell the layered driver.  We don't do
-	 * this on just a link state change, much like ethernet, a cable
-	 * unplug, etc. doesn't change driver state
-	 */
-	ipath_layer_intr(dd, IPATH_LAYER_INT_IF_DOWN);
-
 	/* disable IBC */
 	dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
@ -1743,7 +1849,7 @@ static int __init infinipath_init(void)
 {
 	int ret;

-	ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version);
+	ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);

 	/*
 	 * These must be called before the driver is registered with
@ -1776,8 +1882,18 @@ static int __init infinipath_init(void)
 		goto bail_group;
 	}

+	ret = ipath_diagpkt_add();
+	if (ret < 0) {
+		printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
+		       "diag data device: error %d\n", -ret);
+		goto bail_ipathfs;
+	}
+
 	goto bail;

+bail_ipathfs:
+	ipath_exit_ipathfs();
+
 bail_group:
 	ipath_driver_remove_group(&ipath_driver.driver);

@ -1888,6 +2004,8 @@ static void __exit infinipath_cleanup(void)
 	struct ipath_devdata *dd, *tmp;
 	unsigned long flags;

+	ipath_diagpkt_remove();
+
 	ipath_exit_ipathfs();

 	ipath_driver_remove_group(&ipath_driver.driver);
@ -1998,5 +2116,22 @@ int ipath_reset_device(int unit)
 	return ret;
 }

+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
+{
+	u64 val;
+	if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) {
+		return -1;
+	}
+	if ( dd->ipath_rx_pol_inv != new_pol_inv ) {
+		dd->ipath_rx_pol_inv = new_pol_inv;
+		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+			 INFINIPATH_XGXS_RX_POL_SHIFT);
+		val |= ((u64)dd->ipath_rx_pol_inv) <<
+			INFINIPATH_XGXS_RX_POL_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+	}
+	return 0;
+}
 module_init(infinipath_init);
 module_exit(infinipath_cleanup);
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@ -39,7 +39,6 @@
 #include <asm/pgtable.h>

 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"

 static int ipath_open(struct inode *, struct file *);
@ -985,15 +984,17 @@ static int mmap_piobufs(struct vm_area_struct *vma,
 	 * write combining behavior we want on the PIO buffers!
 	 */

-	if (vma->vm_flags & VM_READ) {
-		dev_info(&dd->pcidev->dev,
-			 "Can't map piobufs as readable (flags=%lx)\n",
-			 vma->vm_flags);
-		ret = -EPERM;
-		goto bail;
-	}
+#if defined(__powerpc__)
+	/* There isn't a generic way to specify writethrough mappings */
+	pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+	pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
+	pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
+#endif

-	/* don't allow them to later change to readable with mprotect */
+	/*
+	 * don't allow them to later change to readable with mprotect (for when
+	 * not initially mapped readable, as is normally the case)
+	 */
 	vma->vm_flags &= ~VM_MAYREAD;
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;

@ -1109,7 +1110,7 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
 		ret = mmap_rcvegrbufs(vma, pd);
 	else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
 		/*
-		 * The rcvhdrq itself; readonly except on HT-400 (so have
+		 * The rcvhdrq itself; readonly except on HT (so have
 		 * to allow writable mapping), multiple pages, contiguous
 		 * from an i/o perspective.
 		 */
@ -1149,6 +1150,7 @@ static unsigned int ipath_poll(struct file *fp,
 	struct ipath_portdata *pd;
 	u32 head, tail;
 	int bit;
+	unsigned pollflag = 0;
 	struct ipath_devdata *dd;

 	pd = port_fp(fp);
@ -1185,9 +1187,12 @@ static unsigned int ipath_poll(struct file *fp,
 			clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
 			pd->port_rcvwait_to++;
 		}
+		else
+			pollflag = POLLIN | POLLRDNORM;
 	}
 	else {
 		/* it's already happened; don't do wait_event overhead */
+		pollflag = POLLIN | POLLRDNORM;
 		pd->port_rcvnowait++;
 	}

@ -1195,7 +1200,7 @@ static unsigned int ipath_poll(struct file *fp,
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);

-	return 0;
+	return pollflag;
 }

 static int try_alloc_port(struct ipath_devdata *dd, int port,
@ -1297,14 +1302,14 @@ static int find_best_unit(struct file *fp)
 	 * This code is present to allow a knowledgeable person to
 	 * specify the layout of processes to processors before opening
 	 * this driver, and then we'll assign the process to the "closest"
-	 * HT-400 to that processor (we assume reasonable connectivity,
+	 * InfiniPath chip to that processor (we assume reasonable connectivity,
 	 * for now).  This code assumes that if affinity has been set
 	 * before this point, that at most one cpu is set; for now this
 	 * is reasonable.  I check for both cpus_empty() and cpus_full(),
 	 * in case some kernel variant sets none of the bits when no
 	 * affinity is set.  2.6.11 and 12 kernels have all present
 	 * cpus set.  Some day we'll have to fix it up further to handle
-	 * a cpu subset.  This algorithm fails for two HT-400's connected
+	 * a cpu subset.  This algorithm fails for two HT chips connected
 	 * in tunnel fashion.  Eventually this needs real topology
 	 * information.  There may be some issues with dual core numbering
 	 * as well.  This needs more work prior to release.
@ -1815,7 +1820,7 @@ int ipath_user_add(struct ipath_devdata *dd)
 		if (ret < 0) {
 			ipath_dev_err(dd, "Could not create wildcard "
 				      "minor: error %d\n", -ret);
-			goto bail_sma;
+			goto bail_user;
 		}

 		atomic_set(&user_setup, 1);
@ -1831,7 +1836,7 @@ int ipath_user_add(struct ipath_devdata *dd)

 	goto bail;

-bail_sma:
+bail_user:
 	user_cleanup();
 bail:
 	return ret;
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@ -191,8 +191,8 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
 	portinfo[4] = (dd->ipath_lid << 16);

 	/*
-	 * Notimpl yet SMLID (should we store this in the driver, in case
-	 * SMA dies?)  CapabilityMask is 0, we don't support any of these
+	 * Notimpl yet SMLID.
+	 * CapabilityMask is 0, we don't support any of these
 	 * DiagCode is 0; we don't store any diag info for now Notimpl yet
 	 * M_KeyLeasePeriod (we don't support M_Key)
 	 */
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@ -33,7 +33,7 @@

 /*
 * This file contains all of the code that is specific to the InfiniPath
- * HT-400 chip.
+ * HT chip.
 */

 #include <linux/pci.h>
@ -43,7 +43,7 @@
 #include "ipath_registers.h"

 /*
- * This lists the InfiniPath HT400 registers, in the actual chip layout.
+ * This lists the InfiniPath registers, in the actual chip layout.
 * This structure should never be directly accessed.
 *
 * The names are in InterCap form because they're taken straight from
@ -461,8 +461,9 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 			 * times.
 			 */
 			if (dd->ipath_flags & IPATH_INITTED) {
-				ipath_dev_err(dd, "Fatal Error (freeze "
-					      "mode), no longer usable\n");
+				ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+					      "mode), no longer usable, SN %.16s\n",
+						  dd->ipath_serial);
 				isfatal = 1;
 			}
 			*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
@ -537,7 +538,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 	if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
 		strlcat(msg, "[HT core Misc7]", msgl);
 	if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-		strlcat(msg, "[Memory BIST test failed, HT-400 unusable]",
+		strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
 			msgl);
 		/* ignore from now on, so disable until driver reloaded */
 		dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
@ -553,7 +554,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,

 	if (hwerrs & _IPATH_PLL_FAIL) {
 		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[PLL failed (%llx), HT-400 unusable]",
+			 "[PLL failed (%llx), InfiniPath hardware unusable]",
 			 (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
 		strlcat(msg, bitsmsg, msgl);
 		/* ignore from now on, so disable until driver reloaded */
@ -610,18 +611,18 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 		break;
 	case 5:
 		/*
-		 * HT-460 original production board; two production levels, with
+		 * original production board; two production levels, with
 		 * different serial number ranges.   See ipath_ht_early_init() for
 		 * case where we enable IPATH_GPIO_INTR for later serial # range.
 		 */
-		n = "InfiniPath_HT-460";
+		n = "InfiniPath_QHT7040";
 		break;
 	case 6:
 		n = "OEM_Board_3";
 		break;
 	case 7:
-		/* HT-460 small form factor production board */
-		n = "InfiniPath_HT-465";
+		/* small form factor production board */
+		n = "InfiniPath_QHT7140";
 		break;
 	case 8:
 		n = "LS/X-1";
@ -633,7 +634,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 		n = "OEM_Board_2";
 		break;
 	case 11:
-		n = "InfiniPath_HT-470";
+		n = "InfiniPath_HT-470"; /* obsoleted */
 		break;
 	case 12:
 		n = "OEM_Board_4";
@ -641,7 +642,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 	default:		/* don't know, just print the number */
 		ipath_dev_err(dd, "Don't yet know about board "
 			      "with ID %u\n", boardrev);
-		snprintf(name, namelen, "Unknown_InfiniPath_HT-4xx_%u",
+		snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
 			 boardrev);
 		break;
 	}
@ -650,11 +651,10 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,

 	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
 		/*
-		 * This version of the driver only supports the HT-400
-		 * Rev 3.2
+		 * This version of the driver only supports Rev 3.2 and 3.3
 		 */
 		ipath_dev_err(dd,
-			      "Unsupported HT-400 revision %u.%u!\n",
+			      "Unsupported InfiniPath hardware revision %u.%u!\n",
 			      dd->ipath_majrev, dd->ipath_minrev);
 		ret = 1;
 		goto bail;
@ -738,7 +738,7 @@ static void ipath_check_htlink(struct ipath_devdata *dd)

 static int ipath_setup_ht_reset(struct ipath_devdata *dd)
 {
-	ipath_dbg("No reset possible for HT-400\n");
+	ipath_dbg("No reset possible for this InfiniPath hardware\n");
 	return 0;
 }

@ -925,7 +925,7 @@ static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev,

 	/*
 	 * kernels with CONFIG_PCI_MSI set the vector in the irq field of
-	 * struct pci_device, so we use that to program the HT-400 internal
+	 * struct pci_device, so we use that to program the internal
 	 * interrupt register (not config space) with that value. The BIOS
 	 * must still have done the basic MSI setup.
 	 */
@ -1013,7 +1013,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd,
 * @dd: the infinipath device
 *
 * Called during driver unload.
- * This is currently a nop for the HT-400, not for all chips
+ * This is currently a nop for the HT chip, not for all chips
 */
 static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
 {
@ -1290,6 +1290,15 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
 		val &= ~INFINIPATH_XGXS_RESET;
 		change = 1;
 	}
+	if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+	     INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+		/* need to compensate for Tx inversion in partner */
+		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+		         INFINIPATH_XGXS_RX_POL_SHIFT);
+		val |= dd->ipath_rx_pol_inv <<
+			INFINIPATH_XGXS_RX_POL_SHIFT;
+		change = 1;
+	}
 	if (change)
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);

@ -1470,7 +1479,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
 	dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;

 	/*
-	 * For HT-400, we allocate a somewhat overly large eager buffer,
+	 * For HT, we allocate a somewhat overly large eager buffer,
 	 * such that we can guarantee that we can receive the largest
 	 * packet that we can send out.  To truly support a 4KB MTU,
 	 * we need to bump this to a large value.  To date, other than
@ -1531,7 +1540,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
 	if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
 		dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
 		/*
-		 * Later production HT-460 has same changes as HT-465, so
+		 * Later production QHT7040 has same changes as QHT7140, so
 		 * can use GPIO interrupts.  They have serial #'s starting
 		 * with 128, rather than 112.
 		 */
@ -1560,13 +1569,13 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
 }

 /**
- * ipath_init_ht400_funcs - set up the chip-specific function pointers
+ * ipath_init_iba6110_funcs - set up the chip-specific function pointers
 * @dd: the infinipath device
 *
 * This is global, and is called directly at init to set up the
 * chip-specific function pointers for later use.
 */
-void ipath_init_ht400_funcs(struct ipath_devdata *dd)
+void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
 {
 	dd->ipath_f_intrsetup = ipath_ht_intconfig;
 	dd->ipath_f_bus = ipath_setup_ht_config;
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@ -32,7 +32,7 @@
 */
 /*
 * This file contains all of the code that is specific to the
- * InfiniPath PE-800 chip.
+ * InfiniPath PCIe chip.
 */

 #include <linux/interrupt.h>
@ -45,9 +45,9 @@

 /*
 * This file contains all the chip-specific register information and
- * access functions for the QLogic InfiniPath PE800, the PCI-Express chip.
+ * access functions for the QLogic InfiniPath PCI-Express chip.
 *
- * This lists the InfiniPath PE800 registers, in the actual chip layout.
+ * This lists the InfiniPath registers, in the actual chip layout.
 * This structure should never be directly accessed.
 */
 struct _infinipath_do_not_use_kernel_regs {
@ -213,7 +213,6 @@ static const struct ipath_kregs ipath_pe_kregs = {
 	.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
 	.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),

-	/* This group is pe-800-specific; and used only in this file */
 	/* The rcvpktled register controls one of the debug port signals, so
 	 * a packet activity LED can be connected to it. */
 	.kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
@ -364,8 +363,9 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 			 * and we get here multiple times
 			 */
 			if (dd->ipath_flags & IPATH_INITTED) {
-				ipath_dev_err(dd, "Fatal Error (freeze "
-					      "mode), no longer usable\n");
+				ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+					      "mode), no longer usable, SN %.16s\n",
+						  dd->ipath_serial);
 				isfatal = 1;
 			}
 			/*
@ -388,7 +388,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 	*msg = '\0';

 	if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-		strlcat(msg, "[Memory BIST test failed, PE-800 unusable]",
+		strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
 			msgl);
 		/* ignore from now on, so disable until driver reloaded */
 		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
@ -433,7 +433,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,

 	if (hwerrs & _IPATH_PLL_FAIL) {
 		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[PLL failed (%llx), PE-800 unusable]",
+			 "[PLL failed (%llx), InfiniPath hardware unusable]",
 			 (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
 		strlcat(msg, bitsmsg, msgl);
 		/* ignore from now on, so disable until driver reloaded */
@ -511,22 +511,25 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
 		n = "InfiniPath_Emulation";
 		break;
 	case 1:
-		n = "InfiniPath_PE-800-Bringup";
+		n = "InfiniPath_QLE7140-Bringup";
 		break;
 	case 2:
-		n = "InfiniPath_PE-880";
+		n = "InfiniPath_QLE7140";
 		break;
 	case 3:
-		n = "InfiniPath_PE-850";
+		n = "InfiniPath_QMI7140";
 		break;
 	case 4:
-		n = "InfiniPath_PE-860";
+		n = "InfiniPath_QEM7140";
+		break;
+	case 5:
+		n = "InfiniPath_QMH7140";
 		break;
 	default:
 		ipath_dev_err(dd,
 			      "Don't yet know about board with ID %u\n",
 			      boardrev);
-		snprintf(name, namelen, "Unknown_InfiniPath_PE-8xx_%u",
+		snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
 			 boardrev);
 		break;
 	}
@ -534,7 +537,7 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
 		snprintf(name, namelen, "%s", n);

 	if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) {
-		ipath_dev_err(dd, "Unsupported PE-800 revision %u.%u!\n",
+		ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n",
 			      dd->ipath_majrev, dd->ipath_minrev);
 		ret = 1;
 	} else
@ -651,6 +654,15 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
 		val &= ~INFINIPATH_XGXS_RESET;
 		change = 1;
 	}
+	if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+	     INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+		/* need to compensate for Tx inversion in partner */
+		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+		         INFINIPATH_XGXS_RX_POL_SHIFT);
+		val |= dd->ipath_rx_pol_inv <<
+			INFINIPATH_XGXS_RX_POL_SHIFT;
+		change = 1;
+	}
 	if (change)
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);

@ -705,7 +717,7 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
 }

-/* this is not yet needed on the PE800, so just return 0. */
+/* this is not yet needed on this chip, so just return 0. */
 static int ipath_pe_intconfig(struct ipath_devdata *dd)
 {
 	return 0;
@ -759,8 +771,8 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
 *
 * This is called during driver unload.
 * We do the pci_disable_msi here, not in generic code, because it
- * isn't used for the HT-400. If we do end up needing pci_enable_msi
- * at some point in the future for HT-400, we'll move the call back
+ * isn't used for the HT chips. If we do end up needing pci_enable_msi
+ * at some point in the future for HT, we'll move the call back
 * into the main init_one code.
 */
 static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
@ -780,10 +792,10 @@ static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
 * late in 2.6.16).
 * All that can be done is to edit the kernel source to remove the quirk
 * check until that is fixed.
- * We do not need to call enable_msi() for our HyperTransport chip (HT-400),
- * even those it uses MSI, and we want to avoid the quirk warning, so
- * So we call enable_msi only for the PE-800.  If we do end up needing
- * pci_enable_msi at some point in the future for HT-400, we'll move the
+ * We do not need to call enable_msi() for our HyperTransport chip,
+ * even though it uses MSI, and we want to avoid the quirk warning, so
+ * So we call enable_msi only for PCIe.  If we do end up needing
+ * pci_enable_msi at some point in the future for HT, we'll move the
 * call back into the main init_one code.
 * We save the msi lo and hi values, so we can restore them after
 * chip reset (the kernel PCI infrastructure doesn't yet handle that
@ -971,8 +983,7 @@ static int ipath_setup_pe_reset(struct ipath_devdata *dd)
 	int ret;

 	/* Use ERROR so it shows up in logs, etc. */
-	ipath_dev_err(dd, "Resetting PE-800 unit %u\n",
-		      dd->ipath_unit);
+	ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
 	/* keep chip from being accessed in a few places */
 	dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT);
 	val = dd->ipath_control | INFINIPATH_C_RESET;
@ -1078,7 +1089,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
 * @port: the port
 *
 * clear all TID entries for a port, expected and eager.
- * Used from ipath_close().  On PE800, TIDs are only 32 bits,
+ * Used from ipath_close().  On this chip, TIDs are only 32 bits,
 * not 64, but they are still on 64 bit boundaries, so tidbase
 * is declared as u64 * for the pointer math, even though we write 32 bits
 */
@ -1148,9 +1159,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
 	dd->ipath_flags |= IPATH_4BYTE_TID;

 	/*
-	 * For openib, we need to be able to handle an IB header of 96 bytes
-	 * or 24 dwords.  HT-400 has arbitrary sized receive buffers, so we
-	 * made them the same size as the PIO buffers.  The PE-800 does not
+	 * For openfabrics, we need to be able to handle an IB header of
+	 * 24 dwords.  HT chip has arbitrary sized receive buffers, so we
+	 * made them the same size as the PIO buffers.  This chip does not
 	 * handle arbitrary size buffers, so we need the header large enough
 	 * to handle largest IB header, but still have room for a 2KB MTU
 	 * standard IB packet.
@ -1158,11 +1169,10 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
 	dd->ipath_rcvhdrentsize = 24;
 	dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;

-	/* For HT-400, we allocate a somewhat overly large eager buffer,
-	 * such that we can guarantee that we can receive the largest packet
-	 * that we can send out.  To truly support a 4KB MTU, we need to
-	 * bump this to a larger value.  We'll do this when I get around to
-	 * testing 4KB sends on the PE-800, which I have not yet done.
+	/*
+	 * To truly support a 4KB MTU (for usermode), we need to
+	 * bump this to a larger value.  For now, we use them for
+	 * the kernel only.
 	 */
 	dd->ipath_rcvegrbufsize = 2048;
 	/*
@ -1175,9 +1185,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
 	dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;

 	/*
-	 * For PE-800, we can request a receive interrupt for 1 or
+	 * We can request a receive interrupt for 1 or
 	 * more packets from current offset.  For now, we set this
-	 * up for a single packet, to match the HT-400 behavior.
+	 * up for a single packet.
 	 */
 	dd->ipath_rhdrhead_intr_off = 1ULL<<32;

@ -1216,13 +1226,13 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
 }

 /**
- * ipath_init_pe800_funcs - set up the chip-specific function pointers
+ * ipath_init_iba6120_funcs - set up the chip-specific function pointers
 * @dd: the infinipath device
 *
 * This is global, and is called directly at init to set up the
 * chip-specific function pointers for later use.
 */
-void ipath_init_pe800_funcs(struct ipath_devdata *dd)
+void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
 {
 	dd->ipath_f_intrsetup = ipath_pe_intconfig;
 	dd->ipath_f_bus = ipath_setup_pe_config;
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@ -53,8 +53,8 @@ module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
 MODULE_PARM_DESC(cfgports, "Set max number of ports to use");

 /*
- * Number of buffers reserved for driver (layered drivers and SMA
- * send).  Reserved at end of buffer list.   Initialized based on
+ * Number of buffers reserved for driver (verbs and layered drivers.)
+ * Reserved at end of buffer list.   Initialized based on
 * number of PIO buffers if not set via module interface.
 * The problem with this is that it's global, but we'll use different
 * numbers for different chip types.  So the default value is not
@ -80,7 +80,7 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
 *
 * Allocate the eager TID buffers and program them into infinipath.
 * We use the network layer alloc_skb() allocator to allocate the
- * memory, and either use the buffers as is for things like SMA
+ * memory, and either use the buffers as is for things like verbs
 * packets, or pass the buffers up to the ipath layered driver and
 * thence the network layer, replacing them as we do so (see
 * ipath_rcv_layer()).
@ -240,7 +240,11 @@ static int init_chip_first(struct ipath_devdata *dd,
 			  "only supports %u\n", ipath_cfgports,
 			  dd->ipath_portcnt);
 	}
-	dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports,
+	/*
+	 * Allocate full portcnt array, rather than just cfgports, because
+	 * cleanup iterates across all possible ports.
+	 */
+	dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt,
 			       GFP_KERNEL);

 	if (!dd->ipath_pd) {
@ -446,9 +450,9 @@ static void enable_chip(struct ipath_devdata *dd,
 	u32 val;
 	int i;

-	if (!reinit) {
-		init_waitqueue_head(&ipath_sma_state_wait);
-	}
+	if (!reinit)
+		init_waitqueue_head(&ipath_state_wait);
+
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);

@ -687,7 +691,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
 		/ (sizeof(u64) * BITS_PER_BYTE / 2);
 	if (ipath_kpiobufs == 0) {
-		/* not set by user, or set explictly to default  */
+		/* not set by user (this is default) */
 		if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
 			kpiobufs = 32;
 		else
@ -946,6 +950,7 @@ static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
 			dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
 	}

+	ipath_kpiobufs = val;
 	ret = 0;
 bail:
 	spin_unlock_irqrestore(&ipath_devs_lock, flags);
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@ -34,7 +34,7 @@
 #include <linux/pci.h>

 #include "ipath_kernel.h"
-#include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"

 /* These are all rcv-related errors which we want to count for stats */
@ -201,7 +201,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 				  ib_linkstate(lstate));
 		}
 		else
-			ipath_cdbg(SMA, "Unit %u link state %s, last "
+			ipath_cdbg(VERBOSE, "Unit %u link state %s, last "
 				   "was %s\n", dd->ipath_unit,
 				   ib_linkstate(lstate),
 				   ib_linkstate((unsigned)
@ -213,7 +213,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 		if (lstate == IPATH_IBSTATE_INIT ||
 		    lstate == IPATH_IBSTATE_ARM ||
 		    lstate == IPATH_IBSTATE_ACTIVE)
-			ipath_cdbg(SMA, "Unit %u link state down"
+			ipath_cdbg(VERBOSE, "Unit %u link state down"
 				   " (state 0x%x), from %s\n",
 				   dd->ipath_unit,
 				   (u32)val & IPATH_IBSTATE_MASK,
@ -269,7 +269,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 			     INFINIPATH_IBCS_LINKSTATE_MASK)
 			    == INFINIPATH_IBCS_L_STATE_ACTIVE)
 				/* if from up to down be more vocal */
-				ipath_cdbg(SMA,
+				ipath_cdbg(VERBOSE,
 					   "Unit %u link now down (%s)\n",
 					   dd->ipath_unit,
 					   ipath_ibcstatus_str[ltstate]);
@ -289,8 +289,6 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 		*dd->ipath_statusp |=
 			IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
 		dd->ipath_f_setextled(dd, lstate, ltstate);
-
-		__ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP);
 	} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
 		/*
 		 * set INIT and DOWN.  Down is checked by most of the other
@ -598,11 +596,11 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)

 	if (!noprint && *msg)
 		ipath_dev_err(dd, "%s error\n", msg);
-	if (dd->ipath_sma_state_wanted & dd->ipath_flags) {
-		ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, "
-			   "waking\n", dd->ipath_sma_state_wanted,
+	if (dd->ipath_state_wanted & dd->ipath_flags) {
+		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
+			   "waking\n", dd->ipath_state_wanted,
 			   dd->ipath_flags);
-		wake_up_interruptible(&ipath_sma_state_wait);
+		wake_up_interruptible(&ipath_state_wait);
 	}

 	return chkerrpkts;
@ -708,11 +706,7 @@ static void handle_layer_pioavail(struct ipath_devdata *dd)
 {
 	int ret;

-	ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
-	if (ret > 0)
-		goto set;
-
-	ret = __ipath_verbs_piobufavail(dd);
+	ret = ipath_ib_piobufavail(dd->verbs_dev);
 	if (ret > 0)
 		goto set;

--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@ -132,12 +132,6 @@ struct _ipath_layer {
 	void *l_arg;
 };

-/* Verbs layer interface */
-struct _verbs_layer {
-	void *l_arg;
-	struct timer_list l_timer;
-};
-
 struct ipath_devdata {
 	struct list_head ipath_list;

@ -198,7 +192,8 @@ struct ipath_devdata {
 	void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
 	/* fill out chip-specific fields */
 	int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
-	struct _verbs_layer verbs_layer;
+	struct ipath_ibdev *verbs_dev;
+	struct timer_list verbs_timer;
 	/* total dwords sent (summed from counter) */
 	u64 ipath_sword;
 	/* total dwords rcvd (summed from counter) */
@ -241,7 +236,7 @@ struct ipath_devdata {
 	u64 ipath_tidtemplate;
 	/* value to write to free TIDs */
 	u64 ipath_tidinvalid;
-	/* PE-800 rcv interrupt setup */
+	/* IBA6120 rcv interrupt setup */
 	u64 ipath_rhdrhead_intr_off;

 	/* size of memory at ipath_kregbase */
@ -250,8 +245,8 @@ struct ipath_devdata {
 	u32 ipath_pioavregs;
 	/* IPATH_POLL, etc. */
 	u32 ipath_flags;
-	/* ipath_flags sma is waiting for */
-	u32 ipath_sma_state_wanted;
+	/* ipath_flags driver is waiting for */
+	u32 ipath_state_wanted;
 	/* last buffer for user use, first buf for kernel use is this
 	 * index. */
 	u32 ipath_lastport_piobuf;
@ -311,10 +306,6 @@ struct ipath_devdata {
 	u32 ipath_pcibar0;
 	/* so we can rewrite it after a chip reset */
 	u32 ipath_pcibar1;
-	/* sequential tries for SMA send and no bufs */
-	u32 ipath_nosma_bufs;
-	/* duration (seconds) ipath_nosma_bufs set */
-	u32 ipath_nosma_secs;

 	/* HT/PCI Vendor ID (here for NodeInfo) */
 	u16 ipath_vendorid;
@ -512,6 +503,8 @@ struct ipath_devdata {
 	u8 ipath_pci_cacheline;
 	/* LID mask control */
 	u8 ipath_lmc;
+	/* Rx Polarity inversion (compensate for ~tx on partner) */
+	u8 ipath_rx_pol_inv;

 	/* local link integrity counter */
 	u32 ipath_lli_counter;
@ -523,18 +516,6 @@ extern struct list_head ipath_dev_list;
 extern spinlock_t ipath_devs_lock;
 extern struct ipath_devdata *ipath_lookup(int unit);

-extern u16 ipath_layer_rcv_opcode;
-extern int __ipath_layer_intr(struct ipath_devdata *, u32);
-extern int ipath_layer_intr(struct ipath_devdata *, u32);
-extern int __ipath_layer_rcv(struct ipath_devdata *, void *,
-			     struct sk_buff *);
-extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *);
-extern int __ipath_verbs_piobufavail(struct ipath_devdata *);
-extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32);
-
-void ipath_layer_add(struct ipath_devdata *);
-void ipath_layer_remove(struct ipath_devdata *);
-
 int ipath_init_chip(struct ipath_devdata *, int);
 int ipath_enable_wc(struct ipath_devdata *dd);
 void ipath_disable_wc(struct ipath_devdata *dd);
@ -549,9 +530,8 @@ void ipath_cdev_cleanup(struct cdev **cdevp,

 int ipath_diag_add(struct ipath_devdata *);
 void ipath_diag_remove(struct ipath_devdata *);
-void ipath_diag_bringup_link(struct ipath_devdata *);

-extern wait_queue_head_t ipath_sma_state_wait;
+extern wait_queue_head_t ipath_state_wait;

 int ipath_user_add(struct ipath_devdata *dd);
 void ipath_user_remove(struct ipath_devdata *dd);
@ -582,12 +562,14 @@ void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);

 int ipath_parse_ushort(const char *str, unsigned short *valp);

-int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
-void ipath_set_ib_lstate(struct ipath_devdata *, int);
 void ipath_kreceive(struct ipath_devdata *);
 int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
 int ipath_reset_device(int);
 void ipath_get_faststats(unsigned long);
+int ipath_set_linkstate(struct ipath_devdata *, u8);
+int ipath_set_mtu(struct ipath_devdata *, u16);
+int ipath_set_lid(struct ipath_devdata *, u32, u8);
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);

 /* for use in system calls, where we want to know device type, etc. */
 #define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
@ -642,10 +624,8 @@ void ipath_free_data(struct ipath_portdata *dd);
 int ipath_waitfor_mdio_cmdready(struct ipath_devdata *);
 int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *);
 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
-/* init PE-800-specific func */
-void ipath_init_pe800_funcs(struct ipath_devdata *);
-/* init HT-400-specific func */
-void ipath_init_ht400_funcs(struct ipath_devdata *);
+void ipath_init_iba6120_funcs(struct ipath_devdata *);
+void ipath_init_iba6110_funcs(struct ipath_devdata *);
 void ipath_get_eeprom_info(struct ipath_devdata *);
 u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);

@ -801,7 +781,7 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,

 struct device_driver;

-extern const char ipath_core_version[];
+extern const char ib_ipath_version[];

 int ipath_driver_create_group(struct device_driver *);
 void ipath_driver_remove_group(struct device_driver *);
@ -810,6 +790,9 @@ int ipath_device_create_group(struct device *, struct ipath_devdata *);
 void ipath_device_remove_group(struct device *, struct ipath_devdata *);
 int ipath_expose_reset(struct device *);

+int ipath_diagpkt_add(void);
+void ipath_diagpkt_remove(void);
+
 int ipath_init_ipathfs(void);
 void ipath_exit_ipathfs(void);
 int ipathfs_add_device(struct ipath_devdata *);
@ -831,10 +814,10 @@ const char *ipath_get_unit_name(int unit);

 extern struct mutex ipath_mutex;

-#define IPATH_DRV_NAME		"ipath_core"
+#define IPATH_DRV_NAME		"ib_ipath"
 #define IPATH_MAJOR		233
 #define IPATH_USER_MINOR_BASE	0
-#define IPATH_SMA_MINOR		128
+#define IPATH_DIAGPKT_MINOR	127
 #define IPATH_DIAG_MINOR_BASE	129
 #define IPATH_NMINORS		255

--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@ -34,6 +34,7 @@
 #include <asm/io.h>

 #include "ipath_verbs.h"
+#include "ipath_kernel.h"

 /**
 * ipath_alloc_lkey - allocate an lkey
@ -60,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
 		r = (r + 1) & (rkt->max - 1);
 		if (r == n) {
 			spin_unlock_irqrestore(&rkt->lock, flags);
-			_VERBS_INFO("LKEY table full\n");
+			ipath_dbg(KERN_INFO "LKEY table full\n");
 			ret = 0;
 			goto bail;
 		}
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ b/drivers/infiniband/hw/ipath/ipath_layer.c
--- a/drivers/infiniband/hw/ipath/ipath_layer.h
+++ b/drivers/infiniband/hw/ipath/ipath_layer.h
@ -40,73 +40,9 @@
 */

 struct sk_buff;
-struct ipath_sge_state;
 struct ipath_devdata;
 struct ether_header;

-struct ipath_layer_counters {
-	u64 symbol_error_counter;
-	u64 link_error_recovery_counter;
-	u64 link_downed_counter;
-	u64 port_rcv_errors;
-	u64 port_rcv_remphys_errors;
-	u64 port_xmit_discards;
-	u64 port_xmit_data;
-	u64 port_rcv_data;
-	u64 port_xmit_packets;
-	u64 port_rcv_packets;
-	u32 local_link_integrity_errors;
-	u32 excessive_buffer_overrun_errors;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct ipath_seg {
-	void *vaddr;
-	size_t length;
-};
-
-/* The number of ipath_segs that fit in a page. */
-#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
-
-struct ipath_segarray {
-	struct ipath_seg segs[IPATH_SEGSZ];
-};
-
-struct ipath_mregion {
-	u64 user_base;		/* User's address for this region */
-	u64 iova;		/* IB start address of this region */
-	size_t length;
-	u32 lkey;
-	u32 offset;		/* offset (bytes) to start of region */
-	int access_flags;
-	u32 max_segs;		/* number of ipath_segs in all the arrays */
-	u32 mapsz;		/* size of the map array */
-	struct ipath_segarray *map[0];	/* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct ipath_sge {
-	struct ipath_mregion *mr;
-	void *vaddr;		/* current pointer into the segment */
-	u32 sge_length;		/* length of the SGE */
-	u32 length;		/* remaining length of the segment */
-	u16 m;			/* current index: mr->map[m] */
-	u16 n;			/* current index: mr->map[m]->segs[n] */
-};
-
-struct ipath_sge_state {
-	struct ipath_sge *sg_list;	/* next SGE to be used if any */
-	struct ipath_sge sge;	/* progress state for the current SGE */
-	u8 num_sge;
-};
-
 int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
 			 void (*l_remove)(void *),
 			 int (*l_intr)(void *, u32),
@ -114,62 +50,14 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
 				      struct sk_buff *),
 			 u16 rcv_opcode,
 			 int (*l_rcv_lid)(void *, void *));
-int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
-			 void (*l_remove)(void *arg),
-			 int (*l_piobufavail)(void *arg),
-			 void (*l_rcv)(void *arg, void *rhdr,
-				       void *data, u32 tlen),
-			 void (*l_timer_cb)(void *arg));
 void ipath_layer_unregister(void);
-void ipath_verbs_unregister(void);
 int ipath_layer_open(struct ipath_devdata *, u32 * pktmax);
 u16 ipath_layer_get_lid(struct ipath_devdata *dd);
 int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *);
 u16 ipath_layer_get_bcast(struct ipath_devdata *dd);
-u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd);
-int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state);
-int ipath_layer_set_mtu(struct ipath_devdata *, u16);
-int ipath_set_lid(struct ipath_devdata *, u32, u8);
 int ipath_layer_send_hdr(struct ipath_devdata *dd,
 			 struct ether_header *hdr);
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
-		     u32 * hdr, u32 len, struct ipath_sge_state *ss);
 int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd);
-int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
-			      size_t namelen);
-int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-				  u64 *rwords, u64 *spkts, u64 *rpkts,
-				  u64 *xmit_wait);
-int ipath_layer_get_counters(struct ipath_devdata *dd,
-			     struct ipath_layer_counters *cntrs);
-int ipath_layer_want_buffer(struct ipath_devdata *dd);
-int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid);
-__be64 ipath_layer_get_guid(struct ipath_devdata *);
-u32 ipath_layer_get_nguid(struct ipath_devdata *);
-u32 ipath_layer_get_majrev(struct ipath_devdata *);
-u32 ipath_layer_get_minrev(struct ipath_devdata *);
-u32 ipath_layer_get_pcirev(struct ipath_devdata *);
-u32 ipath_layer_get_flags(struct ipath_devdata *dd);
-struct device *ipath_layer_get_device(struct ipath_devdata *dd);
-u16 ipath_layer_get_deviceid(struct ipath_devdata *dd);
-u32 ipath_layer_get_vendorid(struct ipath_devdata *);
-u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd);
-u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd);
-int ipath_layer_enable_timer(struct ipath_devdata *dd);
-int ipath_layer_disable_timer(struct ipath_devdata *dd);
-int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags);
-unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd);
-unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index);
-int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd);
-int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
-					 int sleep);
-int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n);
-int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n);
-u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);

 /* ipath_ether interrupt values */
 #define IPATH_LAYER_INT_IF_UP 0x2
@ -178,9 +66,6 @@ u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
 #define IPATH_LAYER_INT_SEND_CONTINUE 0x10
 #define IPATH_LAYER_INT_BCAST 0x40

-/* _verbs_layer.l_flags */
-#define IPATH_VERBS_KERNEL_SMA 0x1
-
 extern unsigned ipath_debug; /* debugging bit mask */

 #endif				/* _IPATH_LAYER_H */
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@ -101,15 +101,15 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
 	nip->num_ports = ibdev->phys_port_cnt;
 	/* This is already in network order */
 	nip->sys_guid = to_idev(ibdev)->sys_image_guid;
-	nip->node_guid = ipath_layer_get_guid(dd);
+	nip->node_guid = dd->ipath_guid;
 	nip->port_guid = nip->sys_guid;
-	nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd));
-	nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd));
-	majrev = ipath_layer_get_majrev(dd);
-	minrev = ipath_layer_get_minrev(dd);
+	nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
+	nip->device_id = cpu_to_be16(dd->ipath_deviceid);
+	majrev = dd->ipath_majrev;
+	minrev = dd->ipath_minrev;
 	nip->revision = cpu_to_be32((majrev << 16) | minrev);
 	nip->local_port_num = port;
-	vendor = ipath_layer_get_vendorid(dd);
+	vendor = dd->ipath_vendorid;
 	nip->vendor_id[0] = 0;
 	nip->vendor_id[1] = vendor >> 8;
 	nip->vendor_id[2] = vendor;
@ -133,13 +133,89 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp,
 	 */
 	if (startgx == 0)
 		/* The first is a copy of the read-only HW GUID. */
-		*p = ipath_layer_get_guid(to_idev(ibdev)->dd);
+		*p = to_idev(ibdev)->dd->ipath_guid;
 	else
 		smp->status |= IB_SMP_INVALID_FIELD;

 	return reply(smp);
 }

+
+static int get_overrunthreshold(struct ipath_devdata *dd)
+{
+	return (dd->ipath_ibcctrl >>
+		INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+}
+
+/**
+ * set_overrunthreshold - set the overrun threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
+{
+	unsigned v;
+
+	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+	if (v != n) {
+		dd->ipath_ibcctrl &=
+			~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
+			  INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
+		dd->ipath_ibcctrl |=
+			(u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+	}
+	return 0;
+}
+
+static int get_phyerrthreshold(struct ipath_devdata *dd)
+{
+	return (dd->ipath_ibcctrl >>
+		INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+}
+
+/**
+ * set_phyerrthreshold - set the physical error threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
+{
+	unsigned v;
+
+	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+	if (v != n) {
+		dd->ipath_ibcctrl &=
+			~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
+			  INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
+		dd->ipath_ibcctrl |=
+			(u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+	}
+	return 0;
+}
+
+/**
+ * get_linkdowndefaultstate - get the default linkdown state
+ * @dd: the infinipath device
+ *
+ * Returns zero if the default is POLL, 1 if the default is SLEEP.
+ */
+static int get_linkdowndefaultstate(struct ipath_devdata *dd)
+{
+	return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
+}
+
 static int recv_subn_get_portinfo(struct ib_smp *smp,
 				  struct ib_device *ibdev, u8 port)
 {
@ -166,7 +242,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	    (dev->mkeyprot_resv_lmc >> 6) == 0)
 		pip->mkey = dev->mkey;
 	pip->gid_prefix = dev->gid_prefix;
-	lid = ipath_layer_get_lid(dev->dd);
+	lid = dev->dd->ipath_lid;
 	pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
 	pip->sm_lid = cpu_to_be16(dev->sm_lid);
 	pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
@ -177,14 +253,14 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	pip->link_width_supported = 3;	/* 1x or 4x */
 	pip->link_width_active = 2;	/* 4x */
 	pip->linkspeed_portstate = 0x10;	/* 2.5Gbps */
-	ibcstat = ipath_layer_get_lastibcstat(dev->dd);
+	ibcstat = dev->dd->ipath_lastibcstat;
 	pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1;
 	pip->portphysstate_linkdown =
 		(ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
-		(ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2);
+		(get_linkdowndefaultstate(dev->dd) ? 1 : 2);
 	pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc;
 	pip->linkspeedactive_enabled = 0x11;	/* 2.5Gbps, 2.5Gbps */
-	switch (ipath_layer_get_ibmtu(dev->dd)) {
+	switch (dev->dd->ipath_ibmtu) {
 	case 4096:
 		mtu = IB_MTU_4096;
 		break;
@ -217,7 +293,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
 	/* P_KeyViolations are counted by hardware. */
 	pip->pkey_violations =
-		cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) -
+		cpu_to_be16((ipath_get_cr_errpkey(dev->dd) -
 			     dev->z_pkey_violations) & 0xFFFF);
 	pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
 	/* Only the hardware GUID is supported for now */
@ -226,8 +302,8 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	/* 32.768 usec. response time (guessing) */
 	pip->resv_resptimevalue = 3;
 	pip->localphyerrors_overrunerrors =
-		(ipath_layer_get_phyerrthreshold(dev->dd) << 4) |
-		ipath_layer_get_overrunthreshold(dev->dd);
+		(get_phyerrthreshold(dev->dd) << 4) |
+		get_overrunthreshold(dev->dd);
 	/* pip->max_credit_hint; */
 	/* pip->link_roundtrip_latency[3]; */

@ -237,6 +313,20 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	return ret;
 }

+/**
+ * get_pkeys - return the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the pkey table is placed here
+ */
+static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
+{
+	struct ipath_portdata *pd = dd->ipath_pd[0];
+
+	memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
+
+	return 0;
+}
+
 static int recv_subn_get_pkeytable(struct ib_smp *smp,
 				   struct ib_device *ibdev)
 {
@ -249,9 +339,9 @@ static int recv_subn_get_pkeytable(struct ib_smp *smp,
 	memset(smp->data, 0, sizeof(smp->data));
 	if (startpx == 0) {
 		struct ipath_ibdev *dev = to_idev(ibdev);
-		unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+		unsigned i, n = ipath_get_npkeys(dev->dd);

-		ipath_layer_get_pkeys(dev->dd, p);
+		get_pkeys(dev->dd, p);

 		for (i = 0; i < n; i++)
 			q[i] = cpu_to_be16(p[i]);
@ -268,6 +358,24 @@ static int recv_subn_set_guidinfo(struct ib_smp *smp,
 	return recv_subn_get_guidinfo(smp, ibdev);
 }

+/**
+ * set_linkdowndefaultstate - set the default linkdown state
+ * @dd: the infinipath device
+ * @sleep: the new state
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
+{
+	if (sleep)
+		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+	else
+		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+			 dd->ipath_ibcctrl);
+	return 0;
+}
+
 /**
 * recv_subn_set_portinfo - set port information
 * @smp: the incoming SM packet
@ -290,7 +398,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	u8 state;
 	u16 lstate;
 	u32 mtu;
-	int ret;
+	int ret, ore;

 	if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
 		goto err;
@ -304,7 +412,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);

 	lid = be16_to_cpu(pip->lid);
-	if (lid != ipath_layer_get_lid(dev->dd)) {
+	if (lid != dev->dd->ipath_lid) {
 		/* Must be a valid unicast LID address. */
 		if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
 			goto err;
@ -342,11 +450,11 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	case 0: /* NOP */
 		break;
 	case 1: /* SLEEP */
-		if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1))
+		if (set_linkdowndefaultstate(dev->dd, 1))
 			goto err;
 		break;
 	case 2: /* POLL */
-		if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0))
+		if (set_linkdowndefaultstate(dev->dd, 0))
 			goto err;
 		break;
 	default:
@ -376,7 +484,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 		/* XXX We have already partially updated our state! */
 		goto err;
 	}
-	ipath_layer_set_mtu(dev->dd, mtu);
+	ipath_set_mtu(dev->dd, mtu);

 	dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;

@ -392,20 +500,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	 * later.
 	 */
 	if (pip->pkey_violations == 0)
-		dev->z_pkey_violations =
-			ipath_layer_get_cr_errpkey(dev->dd);
+		dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd);

 	if (pip->qkey_violations == 0)
 		dev->qkey_violations = 0;

-	if (ipath_layer_set_phyerrthreshold(
-		    dev->dd,
-		    (pip->localphyerrors_overrunerrors >> 4) & 0xF))
+	ore = pip->localphyerrors_overrunerrors;
+	if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF))
 		goto err;

-	if (ipath_layer_set_overrunthreshold(
-		    dev->dd,
-		    (pip->localphyerrors_overrunerrors & 0xF)))
+	if (set_overrunthreshold(dev->dd, (ore & 0xF)))
 		goto err;

 	dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
@ -423,7 +527,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	 * is down or is being set to down.
 	 */
 	state = pip->linkspeed_portstate & 0xF;
-	flags = ipath_layer_get_flags(dev->dd);
+	flags = dev->dd->ipath_flags;
 	lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
 	if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
 		goto err;
@ -439,7 +543,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 		/* FALLTHROUGH */
 	case IB_PORT_DOWN:
 		if (lstate == 0)
-			if (ipath_layer_get_linkdowndefaultstate(dev->dd))
+			if (get_linkdowndefaultstate(dev->dd))
 				lstate = IPATH_IB_LINKDOWN_SLEEP;
 			else
 				lstate = IPATH_IB_LINKDOWN;
@ -451,7 +555,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 			lstate = IPATH_IB_LINKDOWN_DISABLE;
 		else
 			goto err;
-		ipath_layer_set_linkstate(dev->dd, lstate);
+		ipath_set_linkstate(dev->dd, lstate);
 		if (flags & IPATH_LINKACTIVE) {
 			event.event = IB_EVENT_PORT_ERR;
 			ib_dispatch_event(&event);
@ -460,7 +564,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	case IB_PORT_ARMED:
 		if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE)))
 			break;
-		ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM);
+		ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM);
 		if (flags & IPATH_LINKACTIVE) {
 			event.event = IB_EVENT_PORT_ERR;
 			ib_dispatch_event(&event);
@ -469,7 +573,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	case IB_PORT_ACTIVE:
 		if (!(flags & IPATH_LINKARMED))
 			break;
-		ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
+		ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
 		event.event = IB_EVENT_PORT_ACTIVE;
 		ib_dispatch_event(&event);
 		break;
@ -493,6 +597,152 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	return ret;
 }

+/**
+ * rm_pkey - decrecment the reference count for the given PKEY
+ * @dd: the infinipath device
+ * @key: the PKEY index
+ *
+ * Return true if this was the last reference and the hardware table entry
+ * needs to be changed.
+ */
+static int rm_pkey(struct ipath_devdata *dd, u16 key)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+		if (dd->ipath_pkeys[i] != key)
+			continue;
+		if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
+			dd->ipath_pkeys[i] = 0;
+			ret = 1;
+			goto bail;
+		}
+		break;
+	}
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+/**
+ * add_pkey - add the given PKEY to the hardware table
+ * @dd: the infinipath device
+ * @key: the PKEY
+ *
+ * Return an error code if unable to add the entry, zero if no change,
+ * or 1 if the hardware PKEY register needs to be updated.
+ */
+static int add_pkey(struct ipath_devdata *dd, u16 key)
+{
+	int i;
+	u16 lkey = key & 0x7FFF;
+	int any = 0;
+	int ret;
+
+	if (lkey == 0x7FFF) {
+		ret = 0;
+		goto bail;
+	}
+
+	/* Look for an empty slot or a matching PKEY. */
+	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+		if (!dd->ipath_pkeys[i]) {
+			any++;
+			continue;
+		}
+		/* If it matches exactly, try to increment the ref count */
+		if (dd->ipath_pkeys[i] == key) {
+			if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
+				ret = 0;
+				goto bail;
+			}
+			/* Lost the race. Look for an empty slot below. */
+			atomic_dec(&dd->ipath_pkeyrefs[i]);
+			any++;
+		}
+		/*
+		 * It makes no sense to have both the limited and unlimited
+		 * PKEY set at the same time since the unlimited one will
+		 * disable the limited one.
+		 */
+		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
+			ret = -EEXIST;
+			goto bail;
+		}
+	}
+	if (!any) {
+		ret = -EBUSY;
+		goto bail;
+	}
+	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+		if (!dd->ipath_pkeys[i] &&
+		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
+			/* for ipathstats, etc. */
+			ipath_stats.sps_pkeys[i] = lkey;
+			dd->ipath_pkeys[i] = key;
+			ret = 1;
+			goto bail;
+		}
+	}
+	ret = -EBUSY;
+
+bail:
+	return ret;
+}
+
+/**
+ * set_pkeys - set the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the PKEY table
+ */
+static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
+{
+	struct ipath_portdata *pd;
+	int i;
+	int changed = 0;
+
+	pd = dd->ipath_pd[0];
+
+	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
+		u16 key = pkeys[i];
+		u16 okey = pd->port_pkeys[i];
+
+		if (key == okey)
+			continue;
+		/*
+		 * The value of this PKEY table entry is changing.
+		 * Remove the old entry in the hardware's array of PKEYs.
+		 */
+		if (okey & 0x7FFF)
+			changed |= rm_pkey(dd, okey);
+		if (key & 0x7FFF) {
+			int ret = add_pkey(dd, key);
+
+			if (ret < 0)
+				key = 0;
+			else
+				changed |= ret;
+		}
+		pd->port_pkeys[i] = key;
+	}
+	if (changed) {
+		u64 pkey;
+
+		pkey = (u64) dd->ipath_pkeys[0] |
+			((u64) dd->ipath_pkeys[1] << 16) |
+			((u64) dd->ipath_pkeys[2] << 32) |
+			((u64) dd->ipath_pkeys[3] << 48);
+		ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
+			   (unsigned long long) pkey);
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
+				 pkey);
+	}
+	return 0;
+}
+
 static int recv_subn_set_pkeytable(struct ib_smp *smp,
 				   struct ib_device *ibdev)
 {
@ -500,13 +750,12 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
 	__be16 *p = (__be16 *) smp->data;
 	u16 *q = (u16 *) smp->data;
 	struct ipath_ibdev *dev = to_idev(ibdev);
-	unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+	unsigned i, n = ipath_get_npkeys(dev->dd);

 	for (i = 0; i < n; i++)
 		q[i] = be16_to_cpu(p[i]);

-	if (startpx != 0 ||
-	    ipath_layer_set_pkeys(dev->dd, q) != 0)
+	if (startpx != 0 || set_pkeys(dev->dd, q) != 0)
 		smp->status |= IB_SMP_INVALID_FIELD;

 	return recv_subn_get_pkeytable(smp, ibdev);
@ -844,10 +1093,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
 		pmp->data;
 	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_layer_counters cntrs;
+	struct ipath_verbs_counters cntrs;
 	u8 port_select = p->port_select;

-	ipath_layer_get_counters(dev->dd, &cntrs);
+	ipath_get_counters(dev->dd, &cntrs);

 	/* Adjust counters for any resets done. */
 	cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
@ -944,8 +1193,8 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
 	u64 swords, rwords, spkts, rpkts, xwait;
 	u8 port_select = p->port_select;

-	ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-				      &rpkts, &xwait);
+	ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+				&rpkts, &xwait);

 	/* Adjust counters for any resets done. */
 	swords -= dev->z_port_xmit_data;
@ -978,13 +1227,13 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp,
 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
 		pmp->data;
 	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_layer_counters cntrs;
+	struct ipath_verbs_counters cntrs;

 	/*
 	 * Since the HW doesn't support clearing counters, we save the
 	 * current count and subtract it from future responses.
 	 */
-	ipath_layer_get_counters(dev->dd, &cntrs);
+	ipath_get_counters(dev->dd, &cntrs);

 	if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
 		dev->z_symbol_error_counter = cntrs.symbol_error_counter;
@ -1041,8 +1290,8 @@ static int recv_pma_set_portcounters_ext(struct ib_perf *pmp,
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	u64 swords, rwords, spkts, rpkts, xwait;

-	ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-				      &rpkts, &xwait);
+	ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+				&rpkts, &xwait);

 	if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
 		dev->z_port_xmit_data = swords;
--- a/drivers/infiniband/hw/ipath/ipath_mmap.c
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <asm/pgtable.h>
+
+#include "ipath_verbs.h"
+
+/**
+ * ipath_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct ipath_mmap_info
+ */
+void ipath_release_mmap_info(struct kref *ref)
+{
+	struct ipath_mmap_info *ip =
+		container_of(ref, struct ipath_mmap_info, ref);
+
+	vfree(ip->obj);
+	kfree(ip);
+}
+
+/*
+ * open and close keep track of how many times the CQ is mapped,
+ * to avoid releasing it.
+ */
+static void ipath_vma_open(struct vm_area_struct *vma)
+{
+	struct ipath_mmap_info *ip = vma->vm_private_data;
+
+	kref_get(&ip->ref);
+	ip->mmap_cnt++;
+}
+
+static void ipath_vma_close(struct vm_area_struct *vma)
+{
+	struct ipath_mmap_info *ip = vma->vm_private_data;
+
+	ip->mmap_cnt--;
+	kref_put(&ip->ref, ipath_release_mmap_info);
+}
+
+static struct vm_operations_struct ipath_vm_ops = {
+	.open =     ipath_vma_open,
+	.close =    ipath_vma_close,
+};
+
+/**
+ * ipath_mmap - create a new mmap region
+ * @context: the IB user context of the process making the mmap() call
+ * @vma: the VMA to be initialized
+ * Return zero if the mmap is OK. Otherwise, return an errno.
+ */
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	struct ipath_ibdev *dev = to_idev(context->device);
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long size = vma->vm_end - vma->vm_start;
+	struct ipath_mmap_info *ip, **pp;
+	int ret = -EINVAL;
+
+	/*
+	 * Search the device's list of objects waiting for a mmap call.
+	 * Normally, this list is very short since a call to create a
+	 * CQ, QP, or SRQ is soon followed by a call to mmap().
+	 */
+	spin_lock_irq(&dev->pending_lock);
+	for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) {
+		/* Only the creator is allowed to mmap the object */
+		if (context != ip->context || (void *) offset != ip->obj)
+			continue;
+		/* Don't allow a mmap larger than the object. */
+		if (size > ip->size)
+			break;
+
+		*pp = ip->next;
+		spin_unlock_irq(&dev->pending_lock);
+
+		ret = remap_vmalloc_range(vma, ip->obj, 0);
+		if (ret)
+			goto done;
+		vma->vm_ops = &ipath_vm_ops;
+		vma->vm_private_data = ip;
+		ipath_vma_open(vma);
+		goto done;
+	}
+	spin_unlock_irq(&dev->pending_lock);
+done:
+	return ret;
+}
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@ -36,6 +36,18 @@

 #include "ipath_verbs.h"

+/* Fast memory region */
+struct ipath_fmr {
+	struct ib_fmr ibfmr;
+	u8 page_shift;
+	struct ipath_mregion mr;        /* must be last */
+};
+
+static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
+{
+	return container_of(ibfmr, struct ipath_fmr, ibfmr);
+}
+
 /**
 * ipath_get_dma_mr - get a DMA memory region
 * @pd: protection domain for this memory region
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@ -35,7 +35,7 @@
 #include <linux/vmalloc.h>

 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"

 #define BITS_PER_PAGE		(PAGE_SIZE*BITS_PER_BYTE)
 #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
@ -44,19 +44,6 @@
 #define find_next_offset(map, off) find_next_zero_bit((map)->page, \
 						      BITS_PER_PAGE, off)

-#define TRANS_INVALID	0
-#define TRANS_ANY2RST	1
-#define TRANS_RST2INIT	2
-#define TRANS_INIT2INIT	3
-#define TRANS_INIT2RTR	4
-#define TRANS_RTR2RTS	5
-#define TRANS_RTS2RTS	6
-#define TRANS_SQERR2RTS	7
-#define TRANS_ANY2ERR	8
-#define TRANS_RTS2SQD	9  /* XXX Wait for expected ACKs & signal event */
-#define TRANS_SQD2SQD	10 /* error if not drained & parameter change */
-#define TRANS_SQD2RTS	11 /* error if not drained */
-
 /*
 * Convert the AETH credit code into the number of credits.
 */
@ -287,7 +274,7 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
 				free_qpn(qpt, qp->ibqp.qp_num);
 			if (!atomic_dec_and_test(&qp->refcount) ||
 			    !ipath_destroy_qp(&qp->ibqp))
-				_VERBS_INFO("QP memory leak!\n");
+				ipath_dbg(KERN_INFO "QP memory leak!\n");
 			qp = nqp;
 		}
 	}
@ -355,8 +342,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
 	qp->s_last = 0;
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
-	qp->r_rq.head = 0;
-	qp->r_rq.tail = 0;
+	if (qp->r_rq.wq) {
+		qp->r_rq.wq->head = 0;
+		qp->r_rq.wq->tail = 0;
+	}
 	qp->r_reuse_sge = 0;
 }

@ -373,8 +362,8 @@ void ipath_error_qp(struct ipath_qp *qp)
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ib_wc wc;

-	_VERBS_INFO("QP%d/%d in error state\n",
-		    qp->ibqp.qp_num, qp->remote_qpn);
+	ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
+		  qp->ibqp.qp_num, qp->remote_qpn);

 	spin_lock(&dev->pending_lock);
 	/* XXX What if its already removed by the timeout code? */
@ -410,15 +399,32 @@ void ipath_error_qp(struct ipath_qp *qp)
 	qp->s_hdrwords = 0;
 	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;

-	wc.opcode = IB_WC_RECV;
-	spin_lock(&qp->r_rq.lock);
-	while (qp->r_rq.tail != qp->r_rq.head) {
-		wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
-		if (++qp->r_rq.tail >= qp->r_rq.size)
-			qp->r_rq.tail = 0;
-		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	if (qp->r_rq.wq) {
+		struct ipath_rwq *wq;
+		u32 head;
+		u32 tail;
+
+		spin_lock(&qp->r_rq.lock);
+
+		/* sanity check pointers before trusting them */
+		wq = qp->r_rq.wq;
+		head = wq->head;
+		if (head >= qp->r_rq.size)
+			head = 0;
+		tail = wq->tail;
+		if (tail >= qp->r_rq.size)
+			tail = 0;
+		wc.opcode = IB_WC_RECV;
+		while (tail != head) {
+			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
+			if (++tail >= qp->r_rq.size)
+				tail = 0;
+			ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+		}
+		wq->tail = tail;
+
+		spin_unlock(&qp->r_rq.lock);
 	}
-	spin_unlock(&qp->r_rq.lock);
 }

 /**
@ -426,11 +432,12 @@ void ipath_error_qp(struct ipath_qp *qp)
 * @ibqp: the queue pair who's attributes we're modifying
 * @attr: the new attributes
 * @attr_mask: the mask of attributes to modify
+ * @udata: user data for ipathverbs.so
 *
 * Returns 0 on success, otherwise returns an errno.
 */
 int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		    int attr_mask)
+		    int attr_mask, struct ib_udata *udata)
 {
 	struct ipath_ibdev *dev = to_idev(ibqp->device);
 	struct ipath_qp *qp = to_iqp(ibqp);
@ -448,19 +455,46 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 				attr_mask))
 		goto inval;

-	if (attr_mask & IB_QP_AV)
+	if (attr_mask & IB_QP_AV) {
 		if (attr->ah_attr.dlid == 0 ||
 		    attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
 			goto inval;

+		if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
+		    (attr->ah_attr.grh.sgid_index > 1))
+			goto inval;
+	}
+
 	if (attr_mask & IB_QP_PKEY_INDEX)
-		if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd))
+		if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
 			goto inval;

 	if (attr_mask & IB_QP_MIN_RNR_TIMER)
 		if (attr->min_rnr_timer > 31)
 			goto inval;

+	if (attr_mask & IB_QP_PORT)
+		if (attr->port_num == 0 ||
+		    attr->port_num > ibqp->device->phys_port_cnt)
+			goto inval;
+
+	if (attr_mask & IB_QP_PATH_MTU)
+		if (attr->path_mtu > IB_MTU_4096)
+			goto inval;
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		if (attr->max_dest_rd_atomic > 1)
+			goto inval;
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+		if (attr->max_rd_atomic > 1)
+			goto inval;
+
+	if (attr_mask & IB_QP_PATH_MIG_STATE)
+		if (attr->path_mig_state != IB_MIG_MIGRATED &&
+		    attr->path_mig_state != IB_MIG_REARM)
+			goto inval;
+
 	switch (new_state) {
 	case IB_QPS_RESET:
 		ipath_reset_qp(qp);
@ -511,6 +545,9 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	if (attr_mask & IB_QP_MIN_RNR_TIMER)
 		qp->r_min_rnr_timer = attr->min_rnr_timer;

+	if (attr_mask & IB_QP_TIMEOUT)
+		qp->timeout = attr->timeout;
+
 	if (attr_mask & IB_QP_QKEY)
 		qp->qkey = attr->qkey;

@ -543,7 +580,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	attr->dest_qp_num = qp->remote_qpn;
 	attr->qp_access_flags = qp->qp_access_flags;
 	attr->cap.max_send_wr = qp->s_size - 1;
-	attr->cap.max_recv_wr = qp->r_rq.size - 1;
+	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
 	attr->cap.max_send_sge = qp->s_max_sge;
 	attr->cap.max_recv_sge = qp->r_rq.max_sge;
 	attr->cap.max_inline_data = 0;
@ -557,7 +594,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	attr->max_dest_rd_atomic = 1;
 	attr->min_rnr_timer = qp->r_min_rnr_timer;
 	attr->port_num = 1;
-	attr->timeout = 0;
+	attr->timeout = qp->timeout;
 	attr->retry_cnt = qp->s_retry_cnt;
 	attr->rnr_retry = qp->s_rnr_retry;
 	attr->alt_port_num = 0;
@ -569,9 +606,10 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	init_attr->recv_cq = qp->ibqp.recv_cq;
 	init_attr->srq = qp->ibqp.srq;
 	init_attr->cap = attr->cap;
-	init_attr->sq_sig_type =
-		(qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
-		? IB_SIGNAL_REQ_WR : 0;
+	if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
+		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+	else
+		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
 	init_attr->qp_type = qp->ibqp.qp_type;
 	init_attr->port_num = 1;
 	return 0;
@ -596,13 +634,23 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp)
 	} else {
 		u32 min, max, x;
 		u32 credits;
+		struct ipath_rwq *wq = qp->r_rq.wq;
+		u32 head;
+		u32 tail;

+		/* sanity check pointers before trusting them */
+		head = wq->head;
+		if (head >= qp->r_rq.size)
+			head = 0;
+		tail = wq->tail;
+		if (tail >= qp->r_rq.size)
+			tail = 0;
 		/*
 		 * Compute the number of credits available (RWQEs).
 		 * XXX Not holding the r_rq.lock here so there is a small
 		 * chance that the pair of reads are not atomic.
 		 */
-		credits = qp->r_rq.head - qp->r_rq.tail;
+		credits = head - tail;
 		if ((int)credits < 0)
 			credits += qp->r_rq.size;
 		/*
@ -679,27 +727,37 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 	case IB_QPT_UD:
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
-		qp = kmalloc(sizeof(*qp), GFP_KERNEL);
+		sz = sizeof(*qp);
+		if (init_attr->srq) {
+			struct ipath_srq *srq = to_isrq(init_attr->srq);
+
+			sz += sizeof(*qp->r_sg_list) *
+				srq->rq.max_sge;
+		} else
+			sz += sizeof(*qp->r_sg_list) *
+				init_attr->cap.max_recv_sge;
+		qp = kmalloc(sz, GFP_KERNEL);
 		if (!qp) {
-			vfree(swq);
 			ret = ERR_PTR(-ENOMEM);
-			goto bail;
+			goto bail_swq;
 		}
 		if (init_attr->srq) {
+			sz = 0;
 			qp->r_rq.size = 0;
 			qp->r_rq.max_sge = 0;
 			qp->r_rq.wq = NULL;
+			init_attr->cap.max_recv_wr = 0;
+			init_attr->cap.max_recv_sge = 0;
 		} else {
 			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
 			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-			sz = (sizeof(struct ipath_sge) * qp->r_rq.max_sge) +
+			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
 				sizeof(struct ipath_rwqe);
-			qp->r_rq.wq = vmalloc(qp->r_rq.size * sz);
+			qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
+					      qp->r_rq.size * sz);
 			if (!qp->r_rq.wq) {
-				kfree(qp);
-				vfree(swq);
 				ret = ERR_PTR(-ENOMEM);
-				goto bail;
+				goto bail_qp;
 			}
 		}

@ -719,24 +777,19 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 		qp->s_wq = swq;
 		qp->s_size = init_attr->cap.max_send_wr + 1;
 		qp->s_max_sge = init_attr->cap.max_send_sge;
-		qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ?
-			1 << IPATH_S_SIGNAL_REQ_WR : 0;
+		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
+			qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
+		else
+			qp->s_flags = 0;
 		dev = to_idev(ibpd->device);
 		err = ipath_alloc_qpn(&dev->qp_table, qp,
 				      init_attr->qp_type);
 		if (err) {
-			vfree(swq);
-			vfree(qp->r_rq.wq);
-			kfree(qp);
 			ret = ERR_PTR(err);
-			goto bail;
+			goto bail_rwq;
 		}
+		qp->ip = NULL;
 		ipath_reset_qp(qp);
-
-		/* Tell the core driver that the kernel SMA is present. */
-		if (init_attr->qp_type == IB_QPT_SMI)
-			ipath_layer_set_verbs_flags(dev->dd,
-						    IPATH_VERBS_KERNEL_SMA);
 		break;

 	default:
@ -747,8 +800,63 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,

 	init_attr->cap.max_inline_data = 0;

-	ret = &qp->ibqp;
+	/*
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) qp->r_rq.wq;
+		int err;

+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_rwq;
+		}
+
+		if (qp->r_rq.wq) {
+			/* Allocate info for ipath_mmap(). */
+			ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+			if (!ip) {
+				ret = ERR_PTR(-ENOMEM);
+				goto bail_rwq;
+			}
+			qp->ip = ip;
+			ip->context = ibpd->uobject->context;
+			ip->obj = qp->r_rq.wq;
+			kref_init(&ip->ref);
+			ip->mmap_cnt = 0;
+			ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+					      qp->r_rq.size * sz);
+			spin_lock_irq(&dev->pending_lock);
+			ip->next = dev->pending_mmaps;
+			dev->pending_mmaps = ip;
+			spin_unlock_irq(&dev->pending_lock);
+		}
+	}
+
+	spin_lock(&dev->n_qps_lock);
+	if (dev->n_qps_allocated == ib_ipath_max_qps) {
+		spin_unlock(&dev->n_qps_lock);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail_ip;
+	}
+
+	dev->n_qps_allocated++;
+	spin_unlock(&dev->n_qps_lock);
+
+	ret = &qp->ibqp;
+	goto bail;
+
+bail_ip:
+	kfree(qp->ip);
+bail_rwq:
+	vfree(qp->r_rq.wq);
+bail_qp:
+	kfree(qp);
+bail_swq:
+	vfree(swq);
 bail:
 	return ret;
 }
@ -768,15 +876,12 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
 	struct ipath_ibdev *dev = to_idev(ibqp->device);
 	unsigned long flags;

-	/* Tell the core driver that the kernel SMA is gone. */
-	if (qp->ibqp.qp_type == IB_QPT_SMI)
-		ipath_layer_set_verbs_flags(dev->dd, 0);
-
-	spin_lock_irqsave(&qp->r_rq.lock, flags);
-	spin_lock(&qp->s_lock);
+	spin_lock_irqsave(&qp->s_lock, flags);
 	qp->state = IB_QPS_ERR;
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	spin_lock(&dev->n_qps_lock);
+	dev->n_qps_allocated--;
+	spin_unlock(&dev->n_qps_lock);

 	/* Stop the sending tasklet. */
 	tasklet_kill(&qp->s_task);
@ -797,8 +902,11 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
 	if (atomic_read(&qp->refcount) != 0)
 		ipath_free_qp(&dev->qp_table, qp);

+	if (qp->ip)
+		kref_put(&qp->ip->ref, ipath_release_mmap_info);
+	else
+		vfree(qp->r_rq.wq);
 	vfree(qp->s_wq);
-	vfree(qp->r_rq.wq);
 	kfree(qp);
 	return 0;
 }
@ -850,8 +958,8 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);

-	_VERBS_INFO("Send queue error on QP%d/%d: err: %d\n",
-		    qp->ibqp.qp_num, qp->remote_qpn, wc->status);
+	ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
+		  qp->ibqp.qp_num, qp->remote_qpn, wc->status);

 	spin_lock(&dev->pending_lock);
 	/* XXX What if its already removed by the timeout code? */
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@ -32,7 +32,7 @@
 */

 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"

 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_RC_##x
@ -540,7 +540,7 @@ static void send_rc_ack(struct ipath_qp *qp)
 		lrh0 = IPATH_LRH_GRH;
 	}
 	/* read pkey_index w/o lock (its atomic) */
-	bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
 	if (qp->r_nak_state)
 		ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
 					    (qp->r_nak_state <<
@ -557,7 +557,7 @@ static void send_rc_ack(struct ipath_qp *qp)
 	hdr.lrh[0] = cpu_to_be16(lrh0);
 	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
 	hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
-	hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
+	hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
 	ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
@ -1323,8 +1323,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		 * the eager header buffer size to 56 bytes so the last 4
 		 * bytes of the BTH header (PSN) is in the data buffer.
 		 */
-		header_in_data =
-			ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
 		if (header_in_data) {
 			psn = be32_to_cpu(((__be32 *) data)[0]);
 			data += sizeof(__be32);
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@ -36,8 +36,7 @@

 /*
 * This file should only be included by kernel source, and by the diags.  It
- * defines the registers, and their contents, for the InfiniPath HT-400
- * chip.
+ * defines the registers, and their contents, for InfiniPath chips.
 */

 /*
@ -283,10 +282,12 @@
 #define INFINIPATH_XGXS_RESET          0x7ULL
 #define INFINIPATH_XGXS_MDIOADDR_MASK  0xfULL
 #define INFINIPATH_XGXS_MDIOADDR_SHIFT 4
+#define INFINIPATH_XGXS_RX_POL_SHIFT 19
+#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL

 #define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL	/* 40 bits valid */

-/* TID entries (memory), HT400-only */
+/* TID entries (memory), HT-only */
 #define INFINIPATH_RT_VALID 0x8000000000000000ULL
 #define INFINIPATH_RT_ADDR_SHIFT 0
 #define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@ -32,7 +32,7 @@
 */

 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"

 /*
 * Convert the AETH RNR timeout code into the number of milliseconds.
@ -106,6 +106,54 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 }

+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
+{
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+	int user = to_ipd(qp->ibqp.pd)->user;
+	int i, j, ret;
+	struct ib_wc wc;
+
+	qp->r_len = 0;
+	for (i = j = 0; i < wqe->num_sge; i++) {
+		if (wqe->sg_list[i].length == 0)
+			continue;
+		/* Check LKEY */
+		if ((user && wqe->sg_list[i].lkey == 0) ||
+		    !ipath_lkey_ok(&dev->lk_table,
+				   &qp->r_sg_list[j], &wqe->sg_list[i],
+				   IB_ACCESS_LOCAL_WRITE))
+			goto bad_lkey;
+		qp->r_len += wqe->sg_list[i].length;
+		j++;
+	}
+	qp->r_sge.sge = qp->r_sg_list[0];
+	qp->r_sge.sg_list = qp->r_sg_list + 1;
+	qp->r_sge.num_sge = j;
+	ret = 1;
+	goto bail;
+
+bad_lkey:
+	wc.wr_id = wqe->wr_id;
+	wc.status = IB_WC_LOC_PROT_ERR;
+	wc.opcode = IB_WC_RECV;
+	wc.vendor_err = 0;
+	wc.byte_len = 0;
+	wc.imm_data = 0;
+	wc.qp_num = qp->ibqp.qp_num;
+	wc.src_qp = 0;
+	wc.wc_flags = 0;
+	wc.pkey_index = 0;
+	wc.slid = 0;
+	wc.sl = 0;
+	wc.dlid_path_bits = 0;
+	wc.port_num = 0;
+	/* Signal solicited completion event. */
+	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	ret = 0;
+bail:
+	return ret;
+}
+
 /**
 * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
 * @qp: the QP
@ -119,71 +167,71 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
 {
 	unsigned long flags;
 	struct ipath_rq *rq;
+	struct ipath_rwq *wq;
 	struct ipath_srq *srq;
 	struct ipath_rwqe *wqe;
-	int ret = 1;
+	void (*handler)(struct ib_event *, void *);
+	u32 tail;
+	int ret;

-	if (!qp->ibqp.srq) {
+	if (qp->ibqp.srq) {
+		srq = to_isrq(qp->ibqp.srq);
+		handler = srq->ibsrq.event_handler;
+		rq = &srq->rq;
+	} else {
+		srq = NULL;
+		handler = NULL;
 		rq = &qp->r_rq;
-		spin_lock_irqsave(&rq->lock, flags);
-
-		if (unlikely(rq->tail == rq->head)) {
-			ret = 0;
-			goto done;
-		}
-		wqe = get_rwqe_ptr(rq, rq->tail);
-		qp->r_wr_id = wqe->wr_id;
-		if (!wr_id_only) {
-			qp->r_sge.sge = wqe->sg_list[0];
-			qp->r_sge.sg_list = wqe->sg_list + 1;
-			qp->r_sge.num_sge = wqe->num_sge;
-			qp->r_len = wqe->length;
-		}
-		if (++rq->tail >= rq->size)
-			rq->tail = 0;
-		goto done;
 	}

-	srq = to_isrq(qp->ibqp.srq);
-	rq = &srq->rq;
 	spin_lock_irqsave(&rq->lock, flags);
-
-	if (unlikely(rq->tail == rq->head)) {
-		ret = 0;
-		goto done;
-	}
-	wqe = get_rwqe_ptr(rq, rq->tail);
+	wq = rq->wq;
+	tail = wq->tail;
+	/* Validate tail before using it since it is user writable. */
+	if (tail >= rq->size)
+		tail = 0;
+	do {
+		if (unlikely(tail == wq->head)) {
+			spin_unlock_irqrestore(&rq->lock, flags);
+			ret = 0;
+			goto bail;
+		}
+		wqe = get_rwqe_ptr(rq, tail);
+		if (++tail >= rq->size)
+			tail = 0;
+	} while (!wr_id_only && !init_sge(qp, wqe));
 	qp->r_wr_id = wqe->wr_id;
-	if (!wr_id_only) {
-		qp->r_sge.sge = wqe->sg_list[0];
-		qp->r_sge.sg_list = wqe->sg_list + 1;
-		qp->r_sge.num_sge = wqe->num_sge;
-		qp->r_len = wqe->length;
-	}
-	if (++rq->tail >= rq->size)
-		rq->tail = 0;
-	if (srq->ibsrq.event_handler) {
-		struct ib_event ev;
+	wq->tail = tail;
+
+	ret = 1;
+	if (handler) {
 		u32 n;

-		if (rq->head < rq->tail)
-			n = rq->size + rq->head - rq->tail;
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		n = wq->head;
+		if (n >= rq->size)
+			n = 0;
+		if (n < tail)
+			n += rq->size - tail;
 		else
-			n = rq->head - rq->tail;
+			n -= tail;
 		if (n < srq->limit) {
+			struct ib_event ev;
+
 			srq->limit = 0;
 			spin_unlock_irqrestore(&rq->lock, flags);
 			ev.device = qp->ibqp.device;
 			ev.element.srq = qp->ibqp.srq;
 			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			srq->ibsrq.event_handler(&ev,
-						 srq->ibsrq.srq_context);
+			handler(&ev, srq->ibsrq.srq_context);
 			goto bail;
 		}
 	}
-
-done:
 	spin_unlock_irqrestore(&rq->lock, flags);
+
 bail:
 	return ret;
 }
@ -422,6 +470,15 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
 		wake_up(&qp->wait);
 }

+static int want_buffer(struct ipath_devdata *dd)
+{
+	set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+			 dd->ipath_sendctrl);
+
+	return 0;
+}
+
 /**
 * ipath_no_bufs_available - tell the layer driver we need buffers
 * @qp: the QP that caused the problem
@ -438,7 +495,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
 		list_add_tail(&qp->piowait, &dev->piowait);
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 	/*
-	 * Note that as soon as ipath_layer_want_buffer() is called and
+	 * Note that as soon as want_buffer() is called and
 	 * possibly before it returns, ipath_ib_piobufavail()
 	 * could be called.  If we are still in the tasklet function,
 	 * tasklet_hi_schedule() will not call us until the next time
@ -448,7 +505,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
 	 */
 	clear_bit(IPATH_S_BUSY, &qp->s_flags);
 	tasklet_unlock(&qp->s_task);
-	ipath_layer_want_buffer(dev->dd);
+	want_buffer(dev->dd);
 	dev->n_piowait++;
 }

@ -563,7 +620,7 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
 	hdr->hop_limit = grh->hop_limit;
 	/* The SGID is 32-bit aligned. */
 	hdr->sgid.global.subnet_prefix = dev->gid_prefix;
-	hdr->sgid.global.interface_id = ipath_layer_get_guid(dev->dd);
+	hdr->sgid.global.interface_id = dev->dd->ipath_guid;
 	hdr->dgid = grh->dgid;

 	/* GRH header size in 32-bit words. */
@ -595,8 +652,7 @@ void ipath_do_ruc_send(unsigned long data)
 	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
 		goto bail;

-	if (unlikely(qp->remote_ah_attr.dlid ==
-		     ipath_layer_get_lid(dev->dd))) {
+	if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
 		ipath_ruc_loopback(qp);
 		goto clear;
 	}
@ -663,8 +719,8 @@ void ipath_do_ruc_send(unsigned long data)
 	qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
 	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
 				       SIZE_OF_CRC);
-	qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
-	bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+	qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
+	bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
 	bth0 |= extra_bytes << 20;
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@ -48,66 +48,39 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 			   struct ib_recv_wr **bad_wr)
 {
 	struct ipath_srq *srq = to_isrq(ibsrq);
-	struct ipath_ibdev *dev = to_idev(ibsrq->device);
+	struct ipath_rwq *wq;
 	unsigned long flags;
 	int ret;

 	for (; wr; wr = wr->next) {
 		struct ipath_rwqe *wqe;
 		u32 next;
-		int i, j;
+		int i;

-		if (wr->num_sge > srq->rq.max_sge) {
+		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}

 		spin_lock_irqsave(&srq->rq.lock, flags);
-		next = srq->rq.head + 1;
+		wq = srq->rq.wq;
+		next = wq->head + 1;
 		if (next >= srq->rq.size)
 			next = 0;
-		if (next == srq->rq.tail) {
+		if (next == wq->tail) {
 			spin_unlock_irqrestore(&srq->rq.lock, flags);
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}

-		wqe = get_rwqe_ptr(&srq->rq, srq->rq.head);
+		wqe = get_rwqe_ptr(&srq->rq, wq->head);
 		wqe->wr_id = wr->wr_id;
-		wqe->sg_list[0].mr = NULL;
-		wqe->sg_list[0].vaddr = NULL;
-		wqe->sg_list[0].length = 0;
-		wqe->sg_list[0].sge_length = 0;
-		wqe->length = 0;
-		for (i = 0, j = 0; i < wr->num_sge; i++) {
-			/* Check LKEY */
-			if (to_ipd(srq->ibsrq.pd)->user &&
-			    wr->sg_list[i].lkey == 0) {
-				spin_unlock_irqrestore(&srq->rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			if (wr->sg_list[i].length == 0)
-				continue;
-			if (!ipath_lkey_ok(&dev->lk_table,
-					   &wqe->sg_list[j],
-					   &wr->sg_list[i],
-					   IB_ACCESS_LOCAL_WRITE)) {
-				spin_unlock_irqrestore(&srq->rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			wqe->length += wr->sg_list[i].length;
-			j++;
-		}
-		wqe->num_sge = j;
-		srq->rq.head = next;
+		wqe->num_sge = wr->num_sge;
+		for (i = 0; i < wr->num_sge; i++)
+			wqe->sg_list[i] = wr->sg_list[i];
+		wq->head = next;
 		spin_unlock_irqrestore(&srq->rq.lock, flags);
 	}
 	ret = 0;
@ -133,53 +106,95 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,

 	if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}

 	if (srq_init_attr->attr.max_wr == 0) {
 		ret = ERR_PTR(-EINVAL);
-		goto bail;
+		goto done;
 	}

 	if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
 	    (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
 		ret = ERR_PTR(-EINVAL);
-		goto bail;
+		goto done;
 	}

 	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
 	if (!srq) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}

 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
 	 */
 	srq->rq.size = srq_init_attr->attr.max_wr + 1;
-	sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge +
+	srq->rq.max_sge = srq_init_attr->attr.max_sge;
+	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
 		sizeof(struct ipath_rwqe);
-	srq->rq.wq = vmalloc(srq->rq.size * sz);
+	srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
 	if (!srq->rq.wq) {
-		kfree(srq);
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto bail_srq;
 	}

+	/*
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) srq->rq.wq;
+		int err;
+
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_wq;
+		}
+
+		/* Allocate info for ipath_mmap(). */
+		ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+		if (!ip) {
+			ret = ERR_PTR(-ENOMEM);
+			goto bail_wq;
+		}
+		srq->ip = ip;
+		ip->context = ibpd->uobject->context;
+		ip->obj = srq->rq.wq;
+		kref_init(&ip->ref);
+		ip->mmap_cnt = 0;
+		ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+				      srq->rq.size * sz);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	} else
+		srq->ip = NULL;
+
 	/*
 	 * ib_create_srq() will initialize srq->ibsrq.
 	 */
 	spin_lock_init(&srq->rq.lock);
-	srq->rq.head = 0;
-	srq->rq.tail = 0;
+	srq->rq.wq->head = 0;
+	srq->rq.wq->tail = 0;
 	srq->rq.max_sge = srq_init_attr->attr.max_sge;
 	srq->limit = srq_init_attr->attr.srq_limit;

-	ret = &srq->ibsrq;
-
 	dev->n_srqs_allocated++;

-bail:
+	ret = &srq->ibsrq;
+	goto done;
+
+bail_wq:
+	vfree(srq->rq.wq);
+
+bail_srq:
+	kfree(srq);
+
+done:
 	return ret;
 }

@ -188,83 +203,130 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
 * @ibsrq: the SRQ to modify
 * @attr: the new attributes of the SRQ
 * @attr_mask: indicates which attributes to modify
+ * @udata: user data for ipathverbs.so
 */
 int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask)
+		     enum ib_srq_attr_mask attr_mask,
+		     struct ib_udata *udata)
 {
 	struct ipath_srq *srq = to_isrq(ibsrq);
-	unsigned long flags;
-	int ret;
-
-	if (attr_mask & IB_SRQ_MAX_WR)
-		if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
-		    (attr->max_sge > srq->rq.max_sge)) {
-			ret = -EINVAL;
-			goto bail;
-		}
-
-	if (attr_mask & IB_SRQ_LIMIT)
-		if (attr->srq_limit >= srq->rq.size) {
-			ret = -EINVAL;
-			goto bail;
-		}
+	int ret = 0;

 	if (attr_mask & IB_SRQ_MAX_WR) {
-		struct ipath_rwqe *wq, *p;
-		u32 sz, size, n;
+		struct ipath_rwq *owq;
+		struct ipath_rwq *wq;
+		struct ipath_rwqe *p;
+		u32 sz, size, n, head, tail;
+
+		/* Check that the requested sizes are below the limits. */
+		if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
+		    ((attr_mask & IB_SRQ_LIMIT) ?
+		     attr->srq_limit : srq->limit) > attr->max_wr) {
+			ret = -EINVAL;
+			goto bail;
+		}

 		sz = sizeof(struct ipath_rwqe) +
-			attr->max_sge * sizeof(struct ipath_sge);
+			srq->rq.max_sge * sizeof(struct ib_sge);
 		size = attr->max_wr + 1;
-		wq = vmalloc(size * sz);
+		wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
 		if (!wq) {
 			ret = -ENOMEM;
 			goto bail;
 		}

-		spin_lock_irqsave(&srq->rq.lock, flags);
-		if (srq->rq.head < srq->rq.tail)
-			n = srq->rq.size + srq->rq.head - srq->rq.tail;
+		/*
+		 * Return the address of the RWQ as the offset to mmap.
+		 * See ipath_mmap() for details.
+		 */
+		if (udata && udata->inlen >= sizeof(__u64)) {
+			__u64 offset_addr;
+			__u64 offset = (__u64) wq;
+
+			ret = ib_copy_from_udata(&offset_addr, udata,
+						 sizeof(offset_addr));
+			if (ret) {
+				vfree(wq);
+				goto bail;
+			}
+			udata->outbuf = (void __user *) offset_addr;
+			ret = ib_copy_to_udata(udata, &offset,
+					       sizeof(offset));
+			if (ret) {
+				vfree(wq);
+				goto bail;
+			}
+		}
+
+		spin_lock_irq(&srq->rq.lock);
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		owq = srq->rq.wq;
+		head = owq->head;
+		if (head >= srq->rq.size)
+			head = 0;
+		tail = owq->tail;
+		if (tail >= srq->rq.size)
+			tail = 0;
+		n = head;
+		if (n < tail)
+			n += srq->rq.size - tail;
 		else
-			n = srq->rq.head - srq->rq.tail;
-		if (size <= n || size <= srq->limit) {
-			spin_unlock_irqrestore(&srq->rq.lock, flags);
+			n -= tail;
+		if (size <= n) {
+			spin_unlock_irq(&srq->rq.lock);
 			vfree(wq);
 			ret = -EINVAL;
 			goto bail;
 		}
 		n = 0;
-		p = wq;
-		while (srq->rq.tail != srq->rq.head) {
+		p = wq->wq;
+		while (tail != head) {
 			struct ipath_rwqe *wqe;
 			int i;

-			wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail);
+			wqe = get_rwqe_ptr(&srq->rq, tail);
 			p->wr_id = wqe->wr_id;
-			p->length = wqe->length;
 			p->num_sge = wqe->num_sge;
 			for (i = 0; i < wqe->num_sge; i++)
 				p->sg_list[i] = wqe->sg_list[i];
 			n++;
 			p = (struct ipath_rwqe *)((char *) p + sz);
-			if (++srq->rq.tail >= srq->rq.size)
-				srq->rq.tail = 0;
+			if (++tail >= srq->rq.size)
+				tail = 0;
 		}
-		vfree(srq->rq.wq);
 		srq->rq.wq = wq;
 		srq->rq.size = size;
-		srq->rq.head = n;
-		srq->rq.tail = 0;
-		srq->rq.max_sge = attr->max_sge;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
-	}
+		wq->head = n;
+		wq->tail = 0;
+		if (attr_mask & IB_SRQ_LIMIT)
+			srq->limit = attr->srq_limit;
+		spin_unlock_irq(&srq->rq.lock);

-	if (attr_mask & IB_SRQ_LIMIT) {
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		srq->limit = attr->srq_limit;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
+		vfree(owq);
+
+		if (srq->ip) {
+			struct ipath_mmap_info *ip = srq->ip;
+			struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
+
+			ip->obj = wq;
+			ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+					      size * sz);
+			spin_lock_irq(&dev->pending_lock);
+			ip->next = dev->pending_mmaps;
+			dev->pending_mmaps = ip;
+			spin_unlock_irq(&dev->pending_lock);
+		}
+	} else if (attr_mask & IB_SRQ_LIMIT) {
+		spin_lock_irq(&srq->rq.lock);
+		if (attr->srq_limit >= srq->rq.size)
+			ret = -EINVAL;
+		else
+			srq->limit = attr->srq_limit;
+		spin_unlock_irq(&srq->rq.lock);
 	}
-	ret = 0;

 bail:
 	return ret;
--- a/Show more
+++ b/Show more