Merge branch 'k.o/for-4.12' into k.o/for-4.12-rdma-netdevice

This commit is contained in:
Doug Ledford 2017-04-20 12:00:41 -04:00
commit 23790ba2d7
43 changed files with 3012 additions and 1532 deletions

View file

@ -29,4 +29,5 @@ ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o

View file

@ -0,0 +1,627 @@
/*
* Copyright (c) 2016, Mellanox Technologies inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/file.h>
#include <linux/anon_inodes.h>
#include <rdma/ib_verbs.h>
#include <rdma/uverbs_types.h>
#include <linux/rcupdate.h>
#include "uverbs.h"
#include "core_priv.h"
#include "rdma_core.h"
void uverbs_uobject_get(struct ib_uobject *uobject)
{
kref_get(&uobject->ref);
}
static void uverbs_uobject_free(struct kref *ref)
{
struct ib_uobject *uobj =
container_of(ref, struct ib_uobject, ref);
if (uobj->type->type_class->needs_kfree_rcu)
kfree_rcu(uobj, rcu);
else
kfree(uobj);
}
void uverbs_uobject_put(struct ib_uobject *uobject)
{
kref_put(&uobject->ref, uverbs_uobject_free);
}
static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
{
/*
* When a shared access is required, we use a positive counter. Each
* shared access request checks that the value != -1 and increment it.
* Exclusive access is required for operations like write or destroy.
* In exclusive access mode, we check that the counter is zero (nobody
* claimed this object) and we set it to -1. Releasing a shared access
* lock is done simply by decreasing the counter. As for exclusive
* access locks, since only a single one of them is is allowed
* concurrently, setting the counter to zero is enough for releasing
* this lock.
*/
if (!exclusive)
return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
-EBUSY : 0;
/* lock is either WRITE or DESTROY - should be exclusive */
return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
}
static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
const struct uverbs_obj_type *type)
{
struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL);
if (!uobj)
return ERR_PTR(-ENOMEM);
/*
* user_handle should be filled by the handler,
* The object is added to the list in the commit stage.
*/
uobj->context = context;
uobj->type = type;
atomic_set(&uobj->usecnt, 0);
kref_init(&uobj->ref);
return uobj;
}
static int idr_add_uobj(struct ib_uobject *uobj)
{
int ret;
idr_preload(GFP_KERNEL);
spin_lock(&uobj->context->ufile->idr_lock);
/*
* We start with allocating an idr pointing to NULL. This represents an
* object which isn't initialized yet. We'll replace it later on with
* the real object once we commit.
*/
ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0,
min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
if (ret >= 0)
uobj->id = ret;
spin_unlock(&uobj->context->ufile->idr_lock);
idr_preload_end();
return ret < 0 ? ret : 0;
}
/*
* It only removes it from the uobjects list, uverbs_uobject_put() is still
* required.
*/
static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
{
spin_lock(&uobj->context->ufile->idr_lock);
idr_remove(&uobj->context->ufile->idr, uobj->id);
spin_unlock(&uobj->context->ufile->idr_lock);
}
/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext,
int id, bool exclusive)
{
struct ib_uobject *uobj;
rcu_read_lock();
/* object won't be released as we're protected in rcu */
uobj = idr_find(&ucontext->ufile->idr, id);
if (!uobj) {
uobj = ERR_PTR(-ENOENT);
goto free;
}
uverbs_uobject_get(uobj);
free:
rcu_read_unlock();
return uobj;
}
static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext,
int id, bool exclusive)
{
struct file *f;
struct ib_uobject *uobject;
const struct uverbs_obj_fd_type *fd_type =
container_of(type, struct uverbs_obj_fd_type, type);
if (exclusive)
return ERR_PTR(-EOPNOTSUPP);
f = fget(id);
if (!f)
return ERR_PTR(-EBADF);
uobject = f->private_data;
/*
* fget(id) ensures we are not currently running uverbs_close_fd,
* and the caller is expected to ensure that uverbs_close_fd is never
* done while a call top lookup is possible.
*/
if (f->f_op != fd_type->fops) {
fput(f);
return ERR_PTR(-EBADF);
}
uverbs_uobject_get(uobject);
return uobject;
}
struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext,
int id, bool exclusive)
{
struct ib_uobject *uobj;
int ret;
uobj = type->type_class->lookup_get(type, ucontext, id, exclusive);
if (IS_ERR(uobj))
return uobj;
if (uobj->type != type) {
ret = -EINVAL;
goto free;
}
ret = uverbs_try_lock_object(uobj, exclusive);
if (ret) {
WARN(ucontext->cleanup_reason,
"ib_uverbs: Trying to lookup_get while cleanup context\n");
goto free;
}
return uobj;
free:
uobj->type->type_class->lookup_put(uobj, exclusive);
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
}
static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext)
{
int ret;
struct ib_uobject *uobj;
uobj = alloc_uobj(ucontext, type);
if (IS_ERR(uobj))
return uobj;
ret = idr_add_uobj(uobj);
if (ret)
goto uobj_put;
ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device,
RDMACG_RESOURCE_HCA_OBJECT);
if (ret)
goto idr_remove;
return uobj;
idr_remove:
uverbs_idr_remove_uobj(uobj);
uobj_put:
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
}
static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext)
{
const struct uverbs_obj_fd_type *fd_type =
container_of(type, struct uverbs_obj_fd_type, type);
int new_fd;
struct ib_uobject *uobj;
struct ib_uobject_file *uobj_file;
struct file *filp;
new_fd = get_unused_fd_flags(O_CLOEXEC);
if (new_fd < 0)
return ERR_PTR(new_fd);
uobj = alloc_uobj(ucontext, type);
if (IS_ERR(uobj)) {
put_unused_fd(new_fd);
return uobj;
}
uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
filp = anon_inode_getfile(fd_type->name,
fd_type->fops,
uobj_file,
fd_type->flags);
if (IS_ERR(filp)) {
put_unused_fd(new_fd);
uverbs_uobject_put(uobj);
return (void *)filp;
}
uobj_file->uobj.id = new_fd;
uobj_file->uobj.object = filp;
uobj_file->ufile = ucontext->ufile;
INIT_LIST_HEAD(&uobj->list);
kref_get(&uobj_file->ufile->ref);
return uobj;
}
struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext)
{
return type->type_class->alloc_begin(type, ucontext);
}
static void uverbs_uobject_add(struct ib_uobject *uobject)
{
mutex_lock(&uobject->context->uobjects_lock);
list_add(&uobject->list, &uobject->context->uobjects);
mutex_unlock(&uobject->context->uobjects_lock);
}
static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
const struct uverbs_obj_idr_type *idr_type =
container_of(uobj->type, struct uverbs_obj_idr_type,
type);
int ret = idr_type->destroy_object(uobj, why);
/*
* We can only fail gracefully if the user requested to destroy the
* object. In the rest of the cases, just remove whatever you can.
*/
if (why == RDMA_REMOVE_DESTROY && ret)
return ret;
ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
RDMACG_RESOURCE_HCA_OBJECT);
uverbs_idr_remove_uobj(uobj);
return ret;
}
static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
{
struct ib_uobject_file *uobj_file =
container_of(uobj, struct ib_uobject_file, uobj);
struct file *filp = uobj->object;
int id = uobj_file->uobj.id;
/* Unsuccessful NEW */
fput(filp);
put_unused_fd(id);
}
static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
const struct uverbs_obj_fd_type *fd_type =
container_of(uobj->type, struct uverbs_obj_fd_type, type);
struct ib_uobject_file *uobj_file =
container_of(uobj, struct ib_uobject_file, uobj);
int ret = fd_type->context_closed(uobj_file, why);
if (why == RDMA_REMOVE_DESTROY && ret)
return ret;
if (why == RDMA_REMOVE_DURING_CLEANUP) {
alloc_abort_fd_uobject(uobj);
return ret;
}
uobj_file->uobj.context = NULL;
return ret;
}
static void lockdep_check(struct ib_uobject *uobj, bool exclusive)
{
#ifdef CONFIG_LOCKDEP
if (exclusive)
WARN_ON(atomic_read(&uobj->usecnt) > 0);
else
WARN_ON(atomic_read(&uobj->usecnt) == -1);
#endif
}
static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
int ret;
struct ib_ucontext *ucontext = uobj->context;
ret = uobj->type->type_class->remove_commit(uobj, why);
if (ret && why == RDMA_REMOVE_DESTROY) {
/* We couldn't remove the object, so just unlock the uobject */
atomic_set(&uobj->usecnt, 0);
uobj->type->type_class->lookup_put(uobj, true);
} else {
mutex_lock(&ucontext->uobjects_lock);
list_del(&uobj->list);
mutex_unlock(&ucontext->uobjects_lock);
/* put the ref we took when we created the object */
uverbs_uobject_put(uobj);
}
return ret;
}
/* This is called only for user requested DESTROY reasons */
int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
{
int ret;
struct ib_ucontext *ucontext = uobj->context;
/* put the ref count we took at lookup_get */
uverbs_uobject_put(uobj);
/* Cleanup is running. Calling this should have been impossible */
if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
return 0;
}
lockdep_check(uobj, true);
ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
up_read(&ucontext->cleanup_rwsem);
return ret;
}
static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
{
uverbs_uobject_add(uobj);
spin_lock(&uobj->context->ufile->idr_lock);
/*
* We already allocated this IDR with a NULL object, so
* this shouldn't fail.
*/
WARN_ON(idr_replace(&uobj->context->ufile->idr,
uobj, uobj->id));
spin_unlock(&uobj->context->ufile->idr_lock);
}
static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
{
struct ib_uobject_file *uobj_file =
container_of(uobj, struct ib_uobject_file, uobj);
uverbs_uobject_add(&uobj_file->uobj);
fd_install(uobj_file->uobj.id, uobj->object);
/* This shouldn't be used anymore. Use the file object instead */
uobj_file->uobj.id = 0;
/* Get another reference as we export this to the fops */
uverbs_uobject_get(&uobj_file->uobj);
}
int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
{
/* Cleanup is running. Calling this should have been impossible */
if (!down_read_trylock(&uobj->context->cleanup_rwsem)) {
int ret;
WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
ret = uobj->type->type_class->remove_commit(uobj,
RDMA_REMOVE_DURING_CLEANUP);
if (ret)
pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
uobj->id);
return ret;
}
uobj->type->type_class->alloc_commit(uobj);
up_read(&uobj->context->cleanup_rwsem);
return 0;
}
static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
{
uverbs_idr_remove_uobj(uobj);
ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
RDMACG_RESOURCE_HCA_OBJECT);
uverbs_uobject_put(uobj);
}
void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
{
uobj->type->type_class->alloc_abort(uobj);
}
static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive)
{
}
static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
{
struct file *filp = uobj->object;
WARN_ON(exclusive);
/* This indirectly calls uverbs_close_fd and free the object */
fput(filp);
}
void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
{
lockdep_check(uobj, exclusive);
uobj->type->type_class->lookup_put(uobj, exclusive);
/*
* In order to unlock an object, either decrease its usecnt for
* read access or zero it in case of exclusive access. See
* uverbs_try_lock_object for locking schema information.
*/
if (!exclusive)
atomic_dec(&uobj->usecnt);
else
atomic_set(&uobj->usecnt, 0);
uverbs_uobject_put(uobj);
}
const struct uverbs_obj_type_class uverbs_idr_class = {
.alloc_begin = alloc_begin_idr_uobject,
.lookup_get = lookup_get_idr_uobject,
.alloc_commit = alloc_commit_idr_uobject,
.alloc_abort = alloc_abort_idr_uobject,
.lookup_put = lookup_put_idr_uobject,
.remove_commit = remove_commit_idr_uobject,
/*
* When we destroy an object, we first just lock it for WRITE and
* actually DESTROY it in the finalize stage. So, the problematic
* scenario is when we just started the finalize stage of the
* destruction (nothing was executed yet). Now, the other thread
* fetched the object for READ access, but it didn't lock it yet.
* The DESTROY thread continues and starts destroying the object.
* When the other thread continue - without the RCU, it would
* access freed memory. However, the rcu_read_lock delays the free
* until the rcu_read_lock of the READ operation quits. Since the
* exclusive lock of the object is still taken by the DESTROY flow, the
* READ operation will get -EBUSY and it'll just bail out.
*/
.needs_kfree_rcu = true,
};
static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
{
struct ib_ucontext *ucontext;
struct ib_uverbs_file *ufile = uobj_file->ufile;
int ret;
mutex_lock(&uobj_file->ufile->cleanup_mutex);
/* uobject was either already cleaned up or is cleaned up right now anyway */
if (!uobj_file->uobj.context ||
!down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
goto unlock;
ucontext = uobj_file->uobj.context;
ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE);
up_read(&ucontext->cleanup_rwsem);
if (ret)
pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
unlock:
mutex_unlock(&ufile->cleanup_mutex);
}
void uverbs_close_fd(struct file *f)
{
struct ib_uobject_file *uobj_file = f->private_data;
struct kref *uverbs_file_ref = &uobj_file->ufile->ref;
_uverbs_close_fd(uobj_file);
uverbs_uobject_put(&uobj_file->uobj);
kref_put(uverbs_file_ref, ib_uverbs_release_file);
}
void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
{
enum rdma_remove_reason reason = device_removed ?
RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
unsigned int cur_order = 0;
ucontext->cleanup_reason = reason;
/*
* Waits for all remove_commit and alloc_commit to finish. Logically, We
* want to hold this forever as the context is going to be destroyed,
* but we'll release it since it causes a "held lock freed" BUG message.
*/
down_write(&ucontext->cleanup_rwsem);
while (!list_empty(&ucontext->uobjects)) {
struct ib_uobject *obj, *next_obj;
unsigned int next_order = UINT_MAX;
/*
* This shouldn't run while executing other commands on this
* context. Thus, the only thing we should take care of is
* releasing a FD while traversing this list. The FD could be
* closed and released from the _release fop of this FD.
* In order to mitigate this, we add a lock.
* We take and release the lock per order traversal in order
* to let other threads (which might still use the FDs) chance
* to run.
*/
mutex_lock(&ucontext->uobjects_lock);
list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
list) {
if (obj->type->destroy_order == cur_order) {
int ret;
/*
* if we hit this WARN_ON, that means we are
* racing with a lookup_get.
*/
WARN_ON(uverbs_try_lock_object(obj, true));
ret = obj->type->type_class->remove_commit(obj,
reason);
list_del(&obj->list);
if (ret)
pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
obj->id, cur_order);
/* put the ref we took when we created the object */
uverbs_uobject_put(obj);
} else {
next_order = min(next_order,
obj->type->destroy_order);
}
}
mutex_unlock(&ucontext->uobjects_lock);
cur_order = next_order;
}
up_write(&ucontext->cleanup_rwsem);
}
void uverbs_initialize_ucontext(struct ib_ucontext *ucontext)
{
ucontext->cleanup_reason = 0;
mutex_init(&ucontext->uobjects_lock);
INIT_LIST_HEAD(&ucontext->uobjects);
init_rwsem(&ucontext->cleanup_rwsem);
}
const struct uverbs_obj_type_class uverbs_fd_class = {
.alloc_begin = alloc_begin_fd_uobject,
.lookup_get = lookup_get_fd_uobject,
.alloc_commit = alloc_commit_fd_uobject,
.alloc_abort = alloc_abort_fd_uobject,
.lookup_put = lookup_put_fd_uobject,
.remove_commit = remove_commit_fd_uobject,
.needs_kfree_rcu = false,
};

View file

@ -0,0 +1,78 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef RDMA_CORE_H
#define RDMA_CORE_H
#include <linux/idr.h>
#include <rdma/uverbs_types.h>
#include <rdma/ib_verbs.h>
#include <linux/mutex.h>
/*
* These functions initialize the context and cleanups its uobjects.
* The context has a list of objects which is protected by a mutex
* on the context. initialize_ucontext should be called when we create
* a context.
* cleanup_ucontext removes all uobjects from the context and puts them.
*/
void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed);
void uverbs_initialize_ucontext(struct ib_ucontext *ucontext);
/*
* uverbs_uobject_get is called in order to increase the reference count on
* an uobject. This is useful when a handler wants to keep the uobject's memory
* alive, regardless if this uobject is still alive in the context's objects
* repository. Objects are put via uverbs_uobject_put.
*/
void uverbs_uobject_get(struct ib_uobject *uobject);
/*
* In order to indicate we no longer needs this uobject, uverbs_uobject_put
* is called. When the reference count is decreased, the uobject is freed.
* For example, this is used when attaching a completion channel to a CQ.
*/
void uverbs_uobject_put(struct ib_uobject *uobject);
/* Indicate this fd is no longer used by this consumer, but its memory isn't
* necessarily released yet. When the last reference is put, we release the
* memory. After this call is executed, calling uverbs_uobject_get isn't
* allowed.
* This must be called from the release file_operations of the file!
*/
void uverbs_close_fd(struct file *f);
#endif /* RDMA_CORE_H */

View file

@ -76,12 +76,13 @@
* an asynchronous event queue file is created and released when the
* event file is closed.
*
* struct ib_uverbs_event_file: One reference is held by the VFS and
* released when the file is closed. For asynchronous event files,
* another reference is held by the corresponding main context file
* and released when that file is closed. For completion event files,
* a reference is taken when a CQ is created that uses the file, and
* released when the CQ is destroyed.
* struct ib_uverbs_event_queue: Base structure for
* struct ib_uverbs_async_event_file and struct ib_uverbs_completion_event_file.
* One reference is held by the VFS and released when the file is closed.
* For asynchronous event files, another reference is held by the corresponding
* main context file and released when that file is closed. For completion
* event files, a reference is taken when a CQ is created that uses the file,
* and released when the CQ is destroyed.
*/
struct ib_uverbs_device {
@ -101,18 +102,26 @@ struct ib_uverbs_device {
struct list_head uverbs_events_file_list;
};
struct ib_uverbs_event_file {
struct kref ref;
int is_async;
struct ib_uverbs_file *uverbs_file;
struct ib_uverbs_event_queue {
spinlock_t lock;
int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
};
struct ib_uverbs_async_event_file {
struct ib_uverbs_event_queue ev_queue;
struct ib_uverbs_file *uverbs_file;
struct kref ref;
struct list_head list;
};
struct ib_uverbs_completion_event_file {
struct ib_uobject_file uobj_file;
struct ib_uverbs_event_queue ev_queue;
};
struct ib_uverbs_file {
struct kref ref;
struct mutex mutex;
@ -120,9 +129,13 @@ struct ib_uverbs_file {
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
struct ib_uverbs_event_file *async_file;
struct ib_uverbs_async_event_file *async_file;
struct list_head list;
int is_closed;
struct idr idr;
/* spinlock protects write access to idr */
spinlock_t idr_lock;
};
struct ib_uverbs_event {
@ -159,6 +172,8 @@ struct ib_usrq_object {
struct ib_uqp_object {
struct ib_uevent_object uevent;
/* lock for mcast list */
struct mutex mcast_lock;
struct list_head mcast_list;
struct ib_uxrcd_object *uxrcd;
};
@ -176,32 +191,18 @@ struct ib_ucq_object {
u32 async_events_reported;
};
extern spinlock_t ib_uverbs_idr_lock;
extern struct idr ib_uverbs_pd_idr;
extern struct idr ib_uverbs_mr_idr;
extern struct idr ib_uverbs_mw_idr;
extern struct idr ib_uverbs_ah_idr;
extern struct idr ib_uverbs_cq_idr;
extern struct idr ib_uverbs_qp_idr;
extern struct idr ib_uverbs_srq_idr;
extern struct idr ib_uverbs_xrcd_idr;
extern struct idr ib_uverbs_rule_idr;
extern struct idr ib_uverbs_wq_idr;
extern struct idr ib_uverbs_rwq_ind_tbl_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
struct ib_device *ib_dev,
int is_async);
extern const struct file_operations uverbs_event_fops;
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
struct ib_device *ib_dev);
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
struct ib_uverbs_event_file *ev_file,
struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj);
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
struct ib_uevent_object *uobj);
void ib_uverbs_release_file(struct kref *ref);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
@ -210,9 +211,12 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd,
enum rdma_remove_reason why);
int uverbs_dealloc_mw(struct ib_mw *mw);
void ib_uverbs_detach_umcast(struct ib_qp *qp,
struct ib_uqp_object *uobj);
struct ib_uverbs_flow_spec {
union {

File diff suppressed because it is too large Load diff

View file

@ -52,6 +52,7 @@
#include "uverbs.h"
#include "core_priv.h"
#include "rdma_core.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
@ -67,19 +68,6 @@ enum {
static struct class *uverbs_class;
DEFINE_SPINLOCK(ib_uverbs_idr_lock);
DEFINE_IDR(ib_uverbs_pd_idr);
DEFINE_IDR(ib_uverbs_mr_idr);
DEFINE_IDR(ib_uverbs_mw_idr);
DEFINE_IDR(ib_uverbs_ah_idr);
DEFINE_IDR(ib_uverbs_cq_idr);
DEFINE_IDR(ib_uverbs_qp_idr);
DEFINE_IDR(ib_uverbs_srq_idr);
DEFINE_IDR(ib_uverbs_xrcd_idr);
DEFINE_IDR(ib_uverbs_rule_idr);
DEFINE_IDR(ib_uverbs_wq_idr);
DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@ -168,37 +156,37 @@ static struct kobj_type ib_uverbs_dev_ktype = {
.release = ib_uverbs_release_dev,
};
static void ib_uverbs_release_event_file(struct kref *ref)
static void ib_uverbs_release_async_event_file(struct kref *ref)
{
struct ib_uverbs_event_file *file =
container_of(ref, struct ib_uverbs_event_file, ref);
struct ib_uverbs_async_event_file *file =
container_of(ref, struct ib_uverbs_async_event_file, ref);
kfree(file);
}
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
struct ib_uverbs_event_file *ev_file,
struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj)
{
struct ib_uverbs_event *evt, *tmp;
if (ev_file) {
spin_lock_irq(&ev_file->lock);
spin_lock_irq(&ev_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
spin_unlock_irq(&ev_file->lock);
spin_unlock_irq(&ev_file->ev_queue.lock);
kref_put(&ev_file->ref, ib_uverbs_release_event_file);
uverbs_uobject_put(&ev_file->uobj_file.uobj);
}
spin_lock_irq(&file->async_file->lock);
spin_lock_irq(&file->async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
spin_unlock_irq(&file->async_file->lock);
spin_unlock_irq(&file->async_file->ev_queue.lock);
}
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
@ -206,16 +194,16 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
{
struct ib_uverbs_event *evt, *tmp;
spin_lock_irq(&file->async_file->lock);
spin_lock_irq(&file->async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
spin_unlock_irq(&file->async_file->lock);
spin_unlock_irq(&file->async_file->ev_queue.lock);
}
static void ib_uverbs_detach_umcast(struct ib_qp *qp,
struct ib_uqp_object *uobj)
void ib_uverbs_detach_umcast(struct ib_qp *qp,
struct ib_uqp_object *uobj)
{
struct ib_uverbs_mcast_entry *mcast, *tmp;
@ -227,138 +215,11 @@ static void ib_uverbs_detach_umcast(struct ib_qp *qp,
}
static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
struct ib_ucontext *context)
struct ib_ucontext *context,
bool device_removed)
{
struct ib_uobject *uobj, *tmp;
context->closing = 1;
list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
struct ib_ah *ah = uobj->object;
idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
ib_destroy_ah(ah);
ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
/* Remove MWs before QPs, in order to support type 2A MWs. */
list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
struct ib_mw *mw = uobj->object;
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
uverbs_dealloc_mw(mw);
ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
struct ib_flow *flow_id = uobj->object;
idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
ib_destroy_flow(flow_id);
ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
struct ib_qp *qp = uobj->object;
struct ib_uqp_object *uqp =
container_of(uobj, struct ib_uqp_object, uevent.uobject);
idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
if (qp == qp->real_qp)
ib_uverbs_detach_umcast(qp, uqp);
ib_destroy_qp(qp);
ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
RDMACG_RESOURCE_HCA_OBJECT);
ib_uverbs_release_uevent(file, &uqp->uevent);
kfree(uqp);
}
list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
ib_destroy_rwq_ind_table(rwq_ind_tbl);
kfree(ind_tbl);
kfree(uobj);
}
list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
struct ib_wq *wq = uobj->object;
struct ib_uwq_object *uwq =
container_of(uobj, struct ib_uwq_object, uevent.uobject);
idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
ib_destroy_wq(wq);
ib_uverbs_release_uevent(file, &uwq->uevent);
kfree(uwq);
}
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = uobj->object;
struct ib_uevent_object *uevent =
container_of(uobj, struct ib_uevent_object, uobject);
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
ib_destroy_srq(srq);
ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
RDMACG_RESOURCE_HCA_OBJECT);
ib_uverbs_release_uevent(file, uevent);
kfree(uevent);
}
list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
struct ib_cq *cq = uobj->object;
struct ib_uverbs_event_file *ev_file = cq->cq_context;
struct ib_ucq_object *ucq =
container_of(uobj, struct ib_ucq_object, uobject);
idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
ib_destroy_cq(cq);
ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
RDMACG_RESOURCE_HCA_OBJECT);
ib_uverbs_release_ucq(file, ev_file, ucq);
kfree(ucq);
}
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object;
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
ib_dereg_mr(mr);
ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
mutex_lock(&file->device->xrcd_tree_mutex);
list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
struct ib_xrcd *xrcd = uobj->object;
struct ib_uxrcd_object *uxrcd =
container_of(uobj, struct ib_uxrcd_object, uobject);
idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
ib_uverbs_dealloc_xrcd(file->device, xrcd);
kfree(uxrcd);
}
mutex_unlock(&file->device->xrcd_tree_mutex);
list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
struct ib_pd *pd = uobj->object;
idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
ib_dealloc_pd(pd);
ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
uverbs_cleanup_ucontext(context, device_removed);
put_pid(context->tgid);
ib_rdmacg_uncharge(&context->cg_obj, context->device,
@ -372,7 +233,7 @@ static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
complete(&dev->comp);
}
static void ib_uverbs_release_file(struct kref *ref)
void ib_uverbs_release_file(struct kref *ref)
{
struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref);
@ -392,58 +253,54 @@ static void ib_uverbs_release_file(struct kref *ref)
kfree(file);
}
static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
struct ib_uverbs_file *uverbs_file,
struct file *filp, char __user *buf,
size_t count, loff_t *pos,
size_t eventsz)
{
struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_event *event;
int eventsz;
int ret = 0;
spin_lock_irq(&file->lock);
spin_lock_irq(&ev_queue->lock);
while (list_empty(&file->event_list)) {
spin_unlock_irq(&file->lock);
while (list_empty(&ev_queue->event_list)) {
spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
if (wait_event_interruptible(file->poll_wait,
(!list_empty(&file->event_list) ||
if (wait_event_interruptible(ev_queue->poll_wait,
(!list_empty(&ev_queue->event_list) ||
/* The barriers built into wait_event_interruptible()
* and wake_up() guarentee this will see the null set
* without using RCU
*/
!file->uverbs_file->device->ib_dev)))
!uverbs_file->device->ib_dev)))
return -ERESTARTSYS;
/* If device was disassociated and no event exists set an error */
if (list_empty(&file->event_list) &&
!file->uverbs_file->device->ib_dev)
if (list_empty(&ev_queue->event_list) &&
!uverbs_file->device->ib_dev)
return -EIO;
spin_lock_irq(&file->lock);
spin_lock_irq(&ev_queue->lock);
}
event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
if (file->is_async)
eventsz = sizeof (struct ib_uverbs_async_event_desc);
else
eventsz = sizeof (struct ib_uverbs_comp_event_desc);
event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
if (eventsz > count) {
ret = -EINVAL;
event = NULL;
} else {
list_del(file->event_list.next);
list_del(ev_queue->event_list.next);
if (event->counter) {
++(*event->counter);
list_del(&event->obj_list);
}
}
spin_unlock_irq(&file->lock);
spin_unlock_irq(&ev_queue->lock);
if (event) {
if (copy_to_user(buf, event, eventsz))
@ -457,87 +314,158 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
return ret;
}
static unsigned int ib_uverbs_event_poll(struct file *filp,
static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_async_event_file *file = filp->private_data;
return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp,
buf, count, pos,
sizeof(struct ib_uverbs_async_event_desc));
}
static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_completion_event_file *comp_ev_file =
filp->private_data;
return ib_uverbs_event_read(&comp_ev_file->ev_queue,
comp_ev_file->uobj_file.ufile, filp,
buf, count, pos,
sizeof(struct ib_uverbs_comp_event_desc));
}
static unsigned int ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
struct file *filp,
struct poll_table_struct *wait)
{
unsigned int pollflags = 0;
struct ib_uverbs_event_file *file = filp->private_data;
poll_wait(filp, &file->poll_wait, wait);
poll_wait(filp, &ev_queue->poll_wait, wait);
spin_lock_irq(&file->lock);
if (!list_empty(&file->event_list))
spin_lock_irq(&ev_queue->lock);
if (!list_empty(&ev_queue->event_list))
pollflags = POLLIN | POLLRDNORM;
spin_unlock_irq(&file->lock);
spin_unlock_irq(&ev_queue->lock);
return pollflags;
}
static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
static unsigned int ib_uverbs_async_event_poll(struct file *filp,
struct poll_table_struct *wait)
{
struct ib_uverbs_event_file *file = filp->private_data;
return fasync_helper(fd, filp, on, &file->async_queue);
return ib_uverbs_event_poll(filp->private_data, filp, wait);
}
static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
static unsigned int ib_uverbs_comp_event_poll(struct file *filp,
struct poll_table_struct *wait)
{
struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_completion_event_file *comp_ev_file =
filp->private_data;
return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
}
static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
{
struct ib_uverbs_event_queue *ev_queue = filp->private_data;
return fasync_helper(fd, filp, on, &ev_queue->async_queue);
}
static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
{
struct ib_uverbs_completion_event_file *comp_ev_file =
filp->private_data;
return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
}
static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_async_event_file *file = filp->private_data;
struct ib_uverbs_file *uverbs_file = file->uverbs_file;
struct ib_uverbs_event *entry, *tmp;
int closed_already = 0;
mutex_lock(&file->uverbs_file->device->lists_mutex);
spin_lock_irq(&file->lock);
closed_already = file->is_closed;
file->is_closed = 1;
list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
mutex_lock(&uverbs_file->device->lists_mutex);
spin_lock_irq(&file->ev_queue.lock);
closed_already = file->ev_queue.is_closed;
file->ev_queue.is_closed = 1;
list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
if (entry->counter)
list_del(&entry->obj_list);
kfree(entry);
}
spin_unlock_irq(&file->lock);
spin_unlock_irq(&file->ev_queue.lock);
if (!closed_already) {
list_del(&file->list);
if (file->is_async)
ib_unregister_event_handler(&file->uverbs_file->
event_handler);
ib_unregister_event_handler(&uverbs_file->event_handler);
}
mutex_unlock(&file->uverbs_file->device->lists_mutex);
mutex_unlock(&uverbs_file->device->lists_mutex);
kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
kref_put(&file->ref, ib_uverbs_release_event_file);
kref_put(&uverbs_file->ref, ib_uverbs_release_file);
kref_put(&file->ref, ib_uverbs_release_async_event_file);
return 0;
}
static const struct file_operations uverbs_event_fops = {
static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_completion_event_file *file = filp->private_data;
struct ib_uverbs_event *entry, *tmp;
spin_lock_irq(&file->ev_queue.lock);
list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
if (entry->counter)
list_del(&entry->obj_list);
kfree(entry);
}
spin_unlock_irq(&file->ev_queue.lock);
uverbs_close_fd(filp);
return 0;
}
const struct file_operations uverbs_event_fops = {
.owner = THIS_MODULE,
.read = ib_uverbs_event_read,
.poll = ib_uverbs_event_poll,
.release = ib_uverbs_event_close,
.fasync = ib_uverbs_event_fasync,
.read = ib_uverbs_comp_event_read,
.poll = ib_uverbs_comp_event_poll,
.release = ib_uverbs_comp_event_close,
.fasync = ib_uverbs_comp_event_fasync,
.llseek = no_llseek,
};
static const struct file_operations uverbs_async_event_fops = {
.owner = THIS_MODULE,
.read = ib_uverbs_async_event_read,
.poll = ib_uverbs_async_event_poll,
.release = ib_uverbs_async_event_close,
.fasync = ib_uverbs_async_event_fasync,
.llseek = no_llseek,
};
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
struct ib_uverbs_event_file *file = cq_context;
struct ib_uverbs_event_queue *ev_queue = cq_context;
struct ib_ucq_object *uobj;
struct ib_uverbs_event *entry;
unsigned long flags;
if (!file)
if (!ev_queue)
return;
spin_lock_irqsave(&file->lock, flags);
if (file->is_closed) {
spin_unlock_irqrestore(&file->lock, flags);
spin_lock_irqsave(&ev_queue->lock, flags);
if (ev_queue->is_closed) {
spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
spin_unlock_irqrestore(&file->lock, flags);
spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
@ -546,12 +474,12 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
entry->desc.comp.cq_handle = cq->uobject->user_handle;
entry->counter = &uobj->comp_events_reported;
list_add_tail(&entry->list, &file->event_list);
list_add_tail(&entry->list, &ev_queue->event_list);
list_add_tail(&entry->obj_list, &uobj->comp_list);
spin_unlock_irqrestore(&file->lock, flags);
spin_unlock_irqrestore(&ev_queue->lock, flags);
wake_up_interruptible(&file->poll_wait);
kill_fasync(&file->async_queue, SIGIO, POLL_IN);
wake_up_interruptible(&ev_queue->poll_wait);
kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
}
static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
@ -562,15 +490,15 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
struct ib_uverbs_event *entry;
unsigned long flags;
spin_lock_irqsave(&file->async_file->lock, flags);
if (file->async_file->is_closed) {
spin_unlock_irqrestore(&file->async_file->lock, flags);
spin_lock_irqsave(&file->async_file->ev_queue.lock, flags);
if (file->async_file->ev_queue.is_closed) {
spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
spin_unlock_irqrestore(&file->async_file->lock, flags);
spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
}
@ -579,13 +507,13 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
entry->desc.async.reserved = 0;
entry->counter = counter;
list_add_tail(&entry->list, &file->async_file->event_list);
list_add_tail(&entry->list, &file->async_file->ev_queue.event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
spin_unlock_irqrestore(&file->async_file->lock, flags);
spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
wake_up_interruptible(&file->async_file->poll_wait);
kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
wake_up_interruptible(&file->async_file->ev_queue.poll_wait);
kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
@ -603,7 +531,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
struct ib_uevent_object *uobj;
/* for XRC target qp's, check that qp is live */
if (!event->element.qp->uobject || !event->element.qp->uobject->live)
if (!event->element.qp->uobject)
return;
uobj = container_of(event->element.qp->uobject,
@ -648,15 +576,23 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
{
kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
file->async_file = NULL;
}
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
struct ib_device *ib_dev,
int is_async)
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
{
struct ib_uverbs_event_file *ev_file;
spin_lock_init(&ev_queue->lock);
INIT_LIST_HEAD(&ev_queue->event_list);
init_waitqueue_head(&ev_queue->poll_wait);
ev_queue->is_closed = 0;
ev_queue->async_queue = NULL;
}
struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
struct ib_device *ib_dev)
{
struct ib_uverbs_async_event_file *ev_file;
struct file *filp;
int ret;
@ -664,16 +600,11 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
if (!ev_file)
return ERR_PTR(-ENOMEM);
kref_init(&ev_file->ref);
spin_lock_init(&ev_file->lock);
INIT_LIST_HEAD(&ev_file->event_list);
init_waitqueue_head(&ev_file->poll_wait);
ib_uverbs_init_event_queue(&ev_file->ev_queue);
ev_file->uverbs_file = uverbs_file;
kref_get(&ev_file->uverbs_file->ref);
ev_file->async_queue = NULL;
ev_file->is_closed = 0;
filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
kref_init(&ev_file->ref);
filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
ev_file, O_RDONLY);
if (IS_ERR(filp))
goto err_put_refs;
@ -683,64 +614,33 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
&uverbs_file->device->uverbs_events_file_list);
mutex_unlock(&uverbs_file->device->lists_mutex);
if (is_async) {
WARN_ON(uverbs_file->async_file);
uverbs_file->async_file = ev_file;
kref_get(&uverbs_file->async_file->ref);
INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
ib_dev,
ib_uverbs_event_handler);
ret = ib_register_event_handler(&uverbs_file->event_handler);
if (ret)
goto err_put_file;
WARN_ON(uverbs_file->async_file);
uverbs_file->async_file = ev_file;
kref_get(&uverbs_file->async_file->ref);
INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
ib_dev,
ib_uverbs_event_handler);
ret = ib_register_event_handler(&uverbs_file->event_handler);
if (ret)
goto err_put_file;
/* At that point async file stuff was fully set */
ev_file->is_async = 1;
}
/* At that point async file stuff was fully set */
return filp;
err_put_file:
fput(filp);
kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
kref_put(&uverbs_file->async_file->ref,
ib_uverbs_release_async_event_file);
uverbs_file->async_file = NULL;
return ERR_PTR(ret);
err_put_refs:
kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
kref_put(&ev_file->ref, ib_uverbs_release_event_file);
kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
return filp;
}
/*
* Look up a completion event file by FD. If lookup is successful,
* takes a ref to the event file struct that it returns; if
* unsuccessful, returns NULL.
*/
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
{
struct ib_uverbs_event_file *ev_file = NULL;
struct fd f = fdget(fd);
if (!f.file)
return NULL;
if (f.file->f_op != &uverbs_event_fops)
goto out;
ev_file = f.file->private_data;
if (ev_file->is_async) {
ev_file = NULL;
goto out;
}
kref_get(&ev_file->ref);
out:
fdput(f);
return ev_file;
}
static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
{
u64 mask;
@ -986,6 +886,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
}
file->device = dev;
spin_lock_init(&file->idr_lock);
idr_init(&file->idr);
file->ucontext = NULL;
file->async_file = NULL;
kref_init(&file->ref);
@ -1019,10 +921,11 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
mutex_lock(&file->cleanup_mutex);
if (file->ucontext) {
ib_uverbs_cleanup_ucontext(file, file->ucontext);
ib_uverbs_cleanup_ucontext(file, file->ucontext, false);
file->ucontext = NULL;
}
mutex_unlock(&file->cleanup_mutex);
idr_destroy(&file->idr);
mutex_lock(&file->device->lists_mutex);
if (!file->is_closed) {
@ -1032,7 +935,8 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->device->lists_mutex);
if (file->async_file)
kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
kref_put(&file->async_file->ref,
ib_uverbs_release_async_event_file);
kref_put(&file->ref, ib_uverbs_release_file);
kobject_put(&dev->kobj);
@ -1231,7 +1135,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
struct ib_uverbs_file *file;
struct ib_uverbs_event_file *event_file;
struct ib_uverbs_async_event_file *event_file;
struct ib_event event;
/* Pending running commands to terminate */
@ -1268,7 +1172,9 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
* (e.g mmput).
*/
ib_dev->disassociate_ucontext(ucontext);
ib_uverbs_cleanup_ucontext(file, ucontext);
mutex_lock(&file->cleanup_mutex);
ib_uverbs_cleanup_ucontext(file, ucontext, true);
mutex_unlock(&file->cleanup_mutex);
}
mutex_lock(&uverbs_dev->lists_mutex);
@ -1278,21 +1184,20 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
event_file = list_first_entry(&uverbs_dev->
uverbs_events_file_list,
struct ib_uverbs_event_file,
struct ib_uverbs_async_event_file,
list);
spin_lock_irq(&event_file->lock);
event_file->is_closed = 1;
spin_unlock_irq(&event_file->lock);
spin_lock_irq(&event_file->ev_queue.lock);
event_file->ev_queue.is_closed = 1;
spin_unlock_irq(&event_file->ev_queue.lock);
list_del(&event_file->list);
if (event_file->is_async) {
ib_unregister_event_handler(&event_file->uverbs_file->
event_handler);
event_file->uverbs_file->event_handler.device = NULL;
}
ib_unregister_event_handler(
&event_file->uverbs_file->event_handler);
event_file->uverbs_file->event_handler.device =
NULL;
wake_up_interruptible(&event_file->poll_wait);
kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
wake_up_interruptible(&event_file->ev_queue.poll_wait);
kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
mutex_unlock(&uverbs_dev->lists_mutex);
}
@ -1396,13 +1301,6 @@ static void __exit ib_uverbs_cleanup(void)
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
if (overflow_maj)
unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
idr_destroy(&ib_uverbs_pd_idr);
idr_destroy(&ib_uverbs_mr_idr);
idr_destroy(&ib_uverbs_mw_idr);
idr_destroy(&ib_uverbs_ah_idr);
idr_destroy(&ib_uverbs_cq_idr);
idr_destroy(&ib_uverbs_qp_idr);
idr_destroy(&ib_uverbs_srq_idr);
}
module_init(ib_uverbs_init);

View file

@ -0,0 +1,275 @@
/*
* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <rdma/uverbs_std_types.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_verbs.h>
#include <linux/bug.h>
#include <linux/file.h>
#include "rdma_core.h"
#include "uverbs.h"
int uverbs_free_ah(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
return ib_destroy_ah((struct ib_ah *)uobject->object);
}
int uverbs_free_flow(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
return ib_destroy_flow((struct ib_flow *)uobject->object);
}
int uverbs_free_mw(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
return uverbs_dealloc_mw((struct ib_mw *)uobject->object);
}
int uverbs_free_qp(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_qp *qp = uobject->object;
struct ib_uqp_object *uqp =
container_of(uobject, struct ib_uqp_object, uevent.uobject);
int ret;
if (why == RDMA_REMOVE_DESTROY) {
if (!list_empty(&uqp->mcast_list))
return -EBUSY;
} else if (qp == qp->real_qp) {
ib_uverbs_detach_umcast(qp, uqp);
}
ret = ib_destroy_qp(qp);
if (ret && why == RDMA_REMOVE_DESTROY)
return ret;
if (uqp->uxrcd)
atomic_dec(&uqp->uxrcd->refcnt);
ib_uverbs_release_uevent(uobject->context->ufile, &uqp->uevent);
return ret;
}
int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
int ret;
ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
if (!ret || why != RDMA_REMOVE_DESTROY)
kfree(ind_tbl);
return ret;
}
int uverbs_free_wq(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_wq *wq = uobject->object;
struct ib_uwq_object *uwq =
container_of(uobject, struct ib_uwq_object, uevent.uobject);
int ret;
ret = ib_destroy_wq(wq);
if (!ret || why != RDMA_REMOVE_DESTROY)
ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
return ret;
}
int uverbs_free_srq(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_srq *srq = uobject->object;
struct ib_uevent_object *uevent =
container_of(uobject, struct ib_uevent_object, uobject);
enum ib_srq_type srq_type = srq->srq_type;
int ret;
ret = ib_destroy_srq(srq);
if (ret && why == RDMA_REMOVE_DESTROY)
return ret;
if (srq_type == IB_SRQT_XRC) {
struct ib_usrq_object *us =
container_of(uevent, struct ib_usrq_object, uevent);
atomic_dec(&us->uxrcd->refcnt);
}
ib_uverbs_release_uevent(uobject->context->ufile, uevent);
return ret;
}
int uverbs_free_cq(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_cq *cq = uobject->object;
struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
struct ib_ucq_object *ucq =
container_of(uobject, struct ib_ucq_object, uobject);
int ret;
ret = ib_destroy_cq(cq);
if (!ret || why != RDMA_REMOVE_DESTROY)
ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
container_of(ev_queue,
struct ib_uverbs_completion_event_file,
ev_queue) : NULL,
ucq);
return ret;
}
int uverbs_free_mr(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
return ib_dereg_mr((struct ib_mr *)uobject->object);
}
int uverbs_free_xrcd(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_xrcd *xrcd = uobject->object;
struct ib_uxrcd_object *uxrcd =
container_of(uobject, struct ib_uxrcd_object, uobject);
int ret;
mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex);
if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt))
ret = -EBUSY;
else
ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device,
xrcd, why);
mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex);
return ret;
}
int uverbs_free_pd(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_pd *pd = uobject->object;
if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt))
return -EBUSY;
ib_dealloc_pd((struct ib_pd *)uobject->object);
return 0;
}
int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file,
enum rdma_remove_reason why)
{
struct ib_uverbs_completion_event_file *comp_event_file =
container_of(uobj_file, struct ib_uverbs_completion_event_file,
uobj_file);
struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
spin_lock_irq(&event_queue->lock);
event_queue->is_closed = 1;
spin_unlock_irq(&event_queue->lock);
if (why == RDMA_REMOVE_DRIVER_REMOVE) {
wake_up_interruptible(&event_queue->poll_wait);
kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
}
return 0;
};
const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = {
.type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0),
.context_closed = uverbs_hot_unplug_completion_event_file,
.fops = &uverbs_event_fops,
.name = "[infinibandevent]",
.flags = O_RDONLY,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_cq = {
.type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0),
.destroy_object = uverbs_free_cq,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_qp = {
.type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0),
.destroy_object = uverbs_free_qp,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_mw = {
.type = UVERBS_TYPE_ALLOC_IDR(0),
.destroy_object = uverbs_free_mw,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_mr = {
/* 1 is used in order to free the MR after all the MWs */
.type = UVERBS_TYPE_ALLOC_IDR(1),
.destroy_object = uverbs_free_mr,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_srq = {
.type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0),
.destroy_object = uverbs_free_srq,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_ah = {
.type = UVERBS_TYPE_ALLOC_IDR(0),
.destroy_object = uverbs_free_ah,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_flow = {
.type = UVERBS_TYPE_ALLOC_IDR(0),
.destroy_object = uverbs_free_flow,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_wq = {
.type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0),
.destroy_object = uverbs_free_wq,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table = {
.type = UVERBS_TYPE_ALLOC_IDR(0),
.destroy_object = uverbs_free_rwq_ind_tbl,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd = {
.type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0),
.destroy_object = uverbs_free_xrcd,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_pd = {
/* 2 is used in order to free the PD after MRs */
.type = UVERBS_TYPE_ALLOC_IDR(2),
.destroy_object = uverbs_free_pd,
};

View file

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -64,6 +64,7 @@
#include "platform.h"
#include "aspm.h"
#include "affinity.h"
#include "debugfs.h"
#define NUM_IB_PORTS 1
@ -1045,6 +1046,7 @@ static void dc_start(struct hfi1_devdata *);
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np);
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms);
/*
* Error interrupt table entry. This is used as input to the interrupt
@ -7165,7 +7167,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
* set the max_rate field in handle_verify_cap until v0.19.
*/
if ((dd->icode == ICODE_RTL_SILICON) &&
(dd->dc8051_ver < dc8051_ver(0, 19))) {
(dd->dc8051_ver < dc8051_ver(0, 19, 0))) {
/* max_rate: 0 = 12.5G, 1 = 25G */
switch (max_rate) {
case 0:
@ -7350,7 +7352,7 @@ void handle_verify_cap(struct work_struct *work)
}
ppd->link_speed_active = 0; /* invalid value */
if (dd->dc8051_ver < dc8051_ver(0, 20)) {
if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* remote_tx_rate: 0 = 12.5G, 1 = 25G */
switch (remote_tx_rate) {
case 0:
@ -7897,6 +7899,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
}
if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev)))
reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK;
/* report any remaining errors */
if (reg)
dd_dev_info_ratelimited(dd, "DCC Error: %s\n",
@ -8343,6 +8348,52 @@ static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
return 0;
}
/*
* Provide a cache for some of the LCB registers in case the LCB is
* unavailable.
* (The LCB is unavailable in certain link states, for example.)
*/
struct lcb_datum {
u32 off;
u64 val;
};
static struct lcb_datum lcb_cache[] = {
{ DC_LCB_ERR_INFO_RX_REPLAY_CNT, 0},
{ DC_LCB_ERR_INFO_SEQ_CRC_CNT, 0 },
{ DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT, 0 },
};
static void update_lcb_cache(struct hfi1_devdata *dd)
{
int i;
int ret;
u64 val;
for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
ret = read_lcb_csr(dd, lcb_cache[i].off, &val);
/* Update if we get good data */
if (likely(ret != -EBUSY))
lcb_cache[i].val = val;
}
}
static int read_lcb_cache(u32 off, u64 *val)
{
int i;
for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
if (lcb_cache[i].off == off) {
*val = lcb_cache[i].val;
return 0;
}
}
pr_warn("%s bad offset 0x%x\n", __func__, off);
return -1;
}
/*
* Read an LCB CSR. Access may not be in host control, so check.
* Return 0 on success, -EBUSY on failure.
@ -8354,9 +8405,13 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
/* if up, go through the 8051 for the value */
if (ppd->host_link_state & HLS_UP)
return read_lcb_via_8051(dd, addr, data);
/* if going up or down, no access */
if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
return -EBUSY;
/* if going up or down, check the cache, otherwise, no access */
if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) {
if (read_lcb_cache(addr, data))
return -EBUSY;
return 0;
}
/* otherwise, host has access */
*data = read_csr(dd, addr);
return 0;
@ -8371,7 +8426,7 @@ static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
int ret;
if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR ||
(dd->dc8051_ver < dc8051_ver(0, 20))) {
(dd->dc8051_ver < dc8051_ver(0, 20, 0))) {
if (acquire_lcb_access(dd, 0) == 0) {
write_csr(dd, addr, data);
release_lcb_access(dd, 0);
@ -8677,13 +8732,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
& REMOTE_DEVICE_REV_MASK;
}
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
u8 *ver_patch)
{
u32 frame;
read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
*ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
*ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
*ver_major = (frame >> STS_FM_VERSION_MAJOR_SHIFT) &
STS_FM_VERSION_MAJOR_MASK;
*ver_minor = (frame >> STS_FM_VERSION_MINOR_SHIFT) &
STS_FM_VERSION_MINOR_MASK;
read_8051_config(dd, VERSION_PATCH, GENERAL_CONFIG, &frame);
*ver_patch = (frame >> STS_FM_VERSION_PATCH_SHIFT) &
STS_FM_VERSION_PATCH_MASK;
}
static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
@ -8891,8 +8953,6 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message)
*/
static int do_quick_linkup(struct hfi1_devdata *dd)
{
u64 reg;
unsigned long timeout;
int ret;
lcb_shutdown(dd, 0);
@ -8915,19 +8975,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_RUN,
1ull << DC_LCB_CFG_RUN_EN_SHIFT);
/* watch LCB_STS_LINK_TRANSFER_ACTIVE */
timeout = jiffies + msecs_to_jiffies(10);
while (1) {
reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
if (reg)
break;
if (time_after(jiffies, timeout)) {
dd_dev_err(dd,
"timeout waiting for LINK_TRANSFER_ACTIVE\n");
return -ETIMEDOUT;
}
udelay(2);
}
ret = wait_link_transfer_active(dd, 10);
if (ret)
return ret;
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
@ -9091,7 +9141,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (ret)
goto set_local_link_attributes_fail;
if (dd->dc8051_ver < dc8051_ver(0, 20)) {
if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* set the tx rate to the fastest enabled */
if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
ppd->local_tx_rate = 1;
@ -9494,8 +9544,11 @@ static int test_qsfp_read(struct hfi1_pportdata *ppd)
int ret;
u8 status;
/* report success if not a QSFP */
if (ppd->port_type != PORT_TYPE_QSFP)
/*
* Report success if not a QSFP or, if it is a QSFP, but the cable is
* not present
*/
if (ppd->port_type != PORT_TYPE_QSFP || !qsfp_mod_present(ppd))
return 0;
/* read byte 2, the status byte */
@ -10082,6 +10135,64 @@ static void check_lni_states(struct hfi1_pportdata *ppd)
decode_state_complete(ppd, last_remote_state, "received");
}
/* wait for wait_ms for LINK_TRANSFER_ACTIVE to go to 1 */
static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms)
{
u64 reg;
unsigned long timeout;
/* watch LCB_STS_LINK_TRANSFER_ACTIVE */
timeout = jiffies + msecs_to_jiffies(wait_ms);
while (1) {
reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
if (reg)
break;
if (time_after(jiffies, timeout)) {
dd_dev_err(dd,
"timeout waiting for LINK_TRANSFER_ACTIVE\n");
return -ETIMEDOUT;
}
udelay(2);
}
return 0;
}
/* called when the logical link state is not down as it should be */
static void force_logical_link_state_down(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
/*
* Bring link up in LCB loopback
*/
write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
write_csr(dd, DC_LCB_CFG_REINIT_AS_SLAVE, 0);
write_csr(dd, DC_LCB_CFG_CNT_FOR_SKIP_STALL, 0x110);
write_csr(dd, DC_LCB_CFG_LOOPBACK, 0x2);
write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
(void)read_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET);
udelay(3);
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1);
write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT);
wait_link_transfer_active(dd, 100);
/*
* Bring the link down again.
*/
write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0);
write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0);
/* call again to adjust ppd->statusp, if needed */
get_logical_state(ppd);
}
/*
* Helper for set_link_state(). Do not call except from that routine.
* Expects ppd->hls_mutex to be held.
@ -10098,6 +10209,8 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
int do_transition;
int do_wait;
update_lcb_cache(dd);
previous_state = ppd->host_link_state;
ppd->host_link_state = HLS_GOING_OFFLINE;
pstate = read_physical_state(dd);
@ -10135,15 +10248,18 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
return ret;
}
/* make sure the logical state is also down */
wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
/*
* Now in charge of LCB - must be after the physical state is
* offline.quiet and before host_link_state is changed.
*/
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
/* make sure the logical state is also down */
ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
if (ret)
force_logical_link_state_down(ppd);
ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
if (ppd->port_type == PORT_TYPE_QSFP &&

View file

@ -1,7 +1,7 @@
#ifndef _CHIP_H
#define _CHIP_H
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -394,7 +394,8 @@
#define LAST_REMOTE_STATE_COMPLETE 0x13
#define LINK_QUALITY_INFO 0x14
#define REMOTE_DEVICE_ID 0x15
#define LINK_DOWN_REASON 0x16
#define LINK_DOWN_REASON 0x16 /* first byte of offset 0x16 */
#define VERSION_PATCH 0x16 /* last byte of offset 0x16 */
/* 8051 lane specific register field IDs */
#define TX_EQ_SETTINGS 0x00
@ -524,10 +525,12 @@ enum {
#define SUPPORTED_CRCS (CAP_CRC_14B | CAP_CRC_48B)
/* misc status version fields */
#define STS_FM_VERSION_A_SHIFT 16
#define STS_FM_VERSION_A_MASK 0xff
#define STS_FM_VERSION_B_SHIFT 24
#define STS_FM_VERSION_B_MASK 0xff
#define STS_FM_VERSION_MINOR_SHIFT 16
#define STS_FM_VERSION_MINOR_MASK 0xff
#define STS_FM_VERSION_MAJOR_SHIFT 24
#define STS_FM_VERSION_MAJOR_MASK 0xff
#define STS_FM_VERSION_PATCH_SHIFT 24
#define STS_FM_VERSION_PATCH_MASK 0xff
/* LCB_CFG_CRC_MODE TX_VAL and RX_VAL CRC mode values */
#define LCB_CRC_16B 0x0 /* 16b CRC */
@ -698,7 +701,8 @@ void fabric_serdes_reset(struct hfi1_devdata *dd);
int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
/* chip.c */
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b);
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
u8 *ver_patch);
void read_guid(struct hfi1_devdata *dd);
int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,

View file

@ -51,8 +51,12 @@
#include <linux/export.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ratelimit.h>
#include <linux/fault-inject.h>
#include "hfi.h"
#include "trace.h"
#include "debugfs.h"
#include "device.h"
#include "qp.h"
@ -1063,6 +1067,222 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
DEBUGFS_FILE_OPS(sdma_cpu_list);
#ifdef CONFIG_FAULT_INJECTION
static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
{
struct hfi1_opcode_stats_perctx *opstats;
if (*pos >= ARRAY_SIZE(opstats->stats))
return NULL;
return pos;
}
static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
struct hfi1_opcode_stats_perctx *opstats;
++*pos;
if (*pos >= ARRAY_SIZE(opstats->stats))
return NULL;
return pos;
}
static void _fault_stats_seq_stop(struct seq_file *s, void *v)
{
}
static int _fault_stats_seq_show(struct seq_file *s, void *v)
{
loff_t *spos = v;
loff_t i = *spos, j;
u64 n_packets = 0, n_bytes = 0;
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
for (j = 0; j < dd->first_user_ctxt; j++) {
if (!dd->rcd[j])
continue;
n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
}
if (!n_packets && !n_bytes)
return SEQ_SKIP;
if (!ibd->fault_opcode->n_rxfaults[i] &&
!ibd->fault_opcode->n_txfaults[i])
return SEQ_SKIP;
seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
(unsigned long long)n_packets,
(unsigned long long)n_bytes,
(unsigned long long)ibd->fault_opcode->n_rxfaults[i],
(unsigned long long)ibd->fault_opcode->n_txfaults[i]);
return 0;
}
DEBUGFS_SEQ_FILE_OPS(fault_stats);
DEBUGFS_SEQ_FILE_OPEN(fault_stats);
DEBUGFS_FILE_OPS(fault_stats);
static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd)
{
debugfs_remove_recursive(ibd->fault_opcode->dir);
kfree(ibd->fault_opcode);
ibd->fault_opcode = NULL;
}
static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd)
{
struct dentry *parent = ibd->hfi1_ibdev_dbg;
ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL);
if (!ibd->fault_opcode)
return -ENOMEM;
ibd->fault_opcode->attr.interval = 1;
ibd->fault_opcode->attr.require_end = ULONG_MAX;
ibd->fault_opcode->attr.stacktrace_depth = 32;
ibd->fault_opcode->attr.dname = NULL;
ibd->fault_opcode->attr.verbose = 0;
ibd->fault_opcode->fault_by_opcode = false;
ibd->fault_opcode->opcode = 0;
ibd->fault_opcode->mask = 0xff;
ibd->fault_opcode->dir =
fault_create_debugfs_attr("fault_opcode",
parent,
&ibd->fault_opcode->attr);
if (IS_ERR(ibd->fault_opcode->dir)) {
kfree(ibd->fault_opcode);
return -ENOENT;
}
DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd);
if (!debugfs_create_bool("fault_by_opcode", 0600,
ibd->fault_opcode->dir,
&ibd->fault_opcode->fault_by_opcode))
goto fail;
if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir,
&ibd->fault_opcode->opcode))
goto fail;
if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir,
&ibd->fault_opcode->mask))
goto fail;
return 0;
fail:
fault_exit_opcode_debugfs(ibd);
return -ENOMEM;
}
static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd)
{
debugfs_remove_recursive(ibd->fault_packet->dir);
kfree(ibd->fault_packet);
ibd->fault_packet = NULL;
}
static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd)
{
struct dentry *parent = ibd->hfi1_ibdev_dbg;
ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL);
if (!ibd->fault_packet)
return -ENOMEM;
ibd->fault_packet->attr.interval = 1;
ibd->fault_packet->attr.require_end = ULONG_MAX;
ibd->fault_packet->attr.stacktrace_depth = 32;
ibd->fault_packet->attr.dname = NULL;
ibd->fault_packet->attr.verbose = 0;
ibd->fault_packet->fault_by_packet = false;
ibd->fault_packet->dir =
fault_create_debugfs_attr("fault_packet",
parent,
&ibd->fault_opcode->attr);
if (IS_ERR(ibd->fault_packet->dir)) {
kfree(ibd->fault_packet);
return -ENOENT;
}
if (!debugfs_create_bool("fault_by_packet", 0600,
ibd->fault_packet->dir,
&ibd->fault_packet->fault_by_packet))
goto fail;
if (!debugfs_create_u64("fault_stats", 0400,
ibd->fault_packet->dir,
&ibd->fault_packet->n_faults))
goto fail;
return 0;
fail:
fault_exit_packet_debugfs(ibd);
return -ENOMEM;
}
static void fault_exit_debugfs(struct hfi1_ibdev *ibd)
{
fault_exit_opcode_debugfs(ibd);
fault_exit_packet_debugfs(ibd);
}
static int fault_init_debugfs(struct hfi1_ibdev *ibd)
{
int ret = 0;
ret = fault_init_opcode_debugfs(ibd);
if (ret)
return ret;
ret = fault_init_packet_debugfs(ibd);
if (ret)
fault_exit_opcode_debugfs(ibd);
return ret;
}
bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
{
return ibd->fault_suppress_err;
}
bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
{
bool ret = false;
struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode)
return false;
if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask))
return false;
ret = should_fail(&ibd->fault_opcode->attr, 1);
if (ret) {
trace_hfi1_fault_opcode(qp, opcode);
if (rx)
ibd->fault_opcode->n_rxfaults[opcode]++;
else
ibd->fault_opcode->n_txfaults[opcode]++;
}
return ret;
}
bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi;
struct hfi1_ibdev *ibd = dev_from_rdi(rdi);
bool ret = false;
if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet)
return false;
ret = should_fail(&ibd->fault_packet->attr, 1);
if (ret) {
++ibd->fault_packet->n_faults;
trace_hfi1_fault_packet(packet);
}
return ret;
}
#endif
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
@ -1112,12 +1332,22 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
!port_cntr_ops[i].ops.write ?
S_IRUGO : S_IRUGO | S_IWUSR);
}
#ifdef CONFIG_FAULT_INJECTION
debugfs_create_bool("fault_suppress_err", 0600,
ibd->hfi1_ibdev_dbg,
&ibd->fault_suppress_err);
fault_init_debugfs(ibd);
#endif
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
if (!hfi1_dbg_root)
goto out;
#ifdef CONFIG_FAULT_INJECTION
fault_exit_debugfs(ibd);
#endif
debugfs_remove(ibd->hfi1_ibdev_link);
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:

View file

@ -53,23 +53,79 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
void hfi1_dbg_init(void);
void hfi1_dbg_exit(void);
#ifdef CONFIG_FAULT_INJECTION
#include <linux/fault-inject.h>
struct fault_opcode {
struct fault_attr attr;
struct dentry *dir;
bool fault_by_opcode;
u64 n_rxfaults[256];
u64 n_txfaults[256];
u8 opcode;
u8 mask;
};
struct fault_packet {
struct fault_attr attr;
struct dentry *dir;
bool fault_by_packet;
u64 n_faults;
};
bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
#else
static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
return false;
}
static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
u32 opcode, bool rx)
{
return false;
}
static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
{
return false;
}
#endif
#else
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
}
void hfi1_dbg_init(void)
static inline void hfi1_dbg_init(void)
{
}
void hfi1_dbg_exit(void)
static inline void hfi1_dbg_exit(void)
{
}
static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
return false;
}
static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
u32 opcode, bool rx)
{
return false;
}
static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
{
return false;
}
#endif
#endif /* _HFI1_DEBUGFS_H */

View file

@ -59,6 +59,7 @@
#include "trace.h"
#include "qp.h"
#include "sdma.h"
#include "debugfs.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@ -1354,6 +1355,9 @@ void handle_eflags(struct hfi1_packet *packet)
*/
int process_receive_ib(struct hfi1_packet *packet)
{
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
packet->rcd->ctxt,
rhf_err_flags(packet->rhf),
@ -1363,6 +1367,11 @@ int process_receive_ib(struct hfi1_packet *packet)
packet->updegr,
rhf_egr_index(packet->rhf));
if (unlikely(
(hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
(packet->rhf & RHF_DC_ERR))))
return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
return RHF_RCV_CONTINUE;
@ -1398,6 +1407,12 @@ int process_receive_bypass(struct hfi1_packet *packet)
int process_receive_error(struct hfi1_packet *packet)
{
/* KHdrHCRCErr -- KDETH packet with a bad HCRC */
if (unlikely(
hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
rhf_rcv_type_err(packet->rhf) == 3))
return RHF_RCV_CONTINUE;
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
@ -1409,6 +1424,8 @@ int process_receive_error(struct hfi1_packet *packet)
int kdeth_process_expected(struct hfi1_packet *packet)
{
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
@ -1421,6 +1438,8 @@ int kdeth_process_eager(struct hfi1_packet *packet)
{
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
dd_dev_err(packet->rcd->dd,
"Unhandled eager packet received. Dropping.\n");

View file

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -1004,7 +1004,9 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
{
u64 reg;
int ret;
u8 ver_a, ver_b;
u8 ver_major;
u8 ver_minor;
u8 ver_patch;
/*
* DC Reset sequence
@ -1073,10 +1075,10 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
return -ETIMEDOUT;
}
read_misc_status(dd, &ver_a, &ver_b);
dd_dev_info(dd, "8051 firmware version %d.%d\n",
(int)ver_b, (int)ver_a);
dd->dc8051_ver = dc8051_ver(ver_b, ver_a);
read_misc_status(dd, &ver_major, &ver_minor, &ver_patch);
dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
(int)ver_major, (int)ver_minor, (int)ver_patch);
dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
return 0;
}

View file

@ -1020,7 +1020,7 @@ struct hfi1_devdata {
u8 qos_shift;
u16 irev; /* implementation revision */
u16 dc8051_ver; /* 8051 firmware version */
u32 dc8051_ver; /* 8051 firmware version */
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config;
@ -1167,15 +1167,16 @@ struct hfi1_devdata {
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
bool aspm_enabled; /* ASPM state: enabled/disabled */
struct rhashtable sdma_rht;
struct rhashtable *sdma_rht;
struct kobject kobj;
};
/* 8051 firmware version helper */
#define dc8051_ver(a, b) ((a) << 8 | (b))
#define dc8051_ver_maj(a) ((a & 0xff00) >> 8)
#define dc8051_ver_min(a) (a & 0x00ff)
#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16)
#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8)
#define dc8051_ver_patch(a) ((a) & 0x0000ff)
/* f_put_tid types */
#define PT_EXPECTED 0

View file

@ -1425,6 +1425,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* First, lock the non-writable module parameters */
HFI1_CAP_LOCK();
/* Validate dev ids */
if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
ent->device == PCI_DEVICE_ID_INTEL1)) {
hfi1_early_err(&pdev->dev,
"Failing on unknown Intel deviceid 0x%x\n",
ent->device);
ret = -ENODEV;
goto bail;
}
/* Validate some global module parameters */
ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
if (ret)
@ -1470,15 +1480,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (ret)
goto bail;
if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
ent->device == PCI_DEVICE_ID_INTEL1)) {
hfi1_early_err(&pdev->dev,
"Failing on unknown Intel deviceid 0x%x\n",
ent->device);
ret = -ENODEV;
goto clean_bail;
}
/*
* Do device-specific initialization, function table setup, dd
* allocation, etc.

View file

@ -1028,13 +1028,17 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
s_last = qp->s_last;
trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
rvt_put_swqe(wqe);
rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
rvt_qp_swqe_complete(qp,
wqe,
ib_hfi1_wc_opcode[wqe->wr.opcode],
IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before re-sending,
@ -1076,12 +1080,16 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
rvt_put_swqe(wqe);
s_last = qp->s_last;
trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
rvt_qp_swqe_complete(qp,
wqe,
ib_hfi1_wc_opcode[wqe->wr.opcode],
IB_WC_SUCCESS);
} else {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);

View file

@ -909,8 +909,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
last = qp->s_last;
old_last = last;
trace_hfi1_qp_send_completion(qp, wqe, last);
if (++last >= qp->s_size)
last = 0;
trace_hfi1_qp_send_completion(qp, wqe, last);
qp->s_last = last;
/* See post_send() */
barrier();
@ -920,7 +922,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
rvt_qp_swqe_complete(qp, wqe, status);
rvt_qp_swqe_complete(qp,
wqe,
ib_hfi1_wc_opcode[wqe->wr.opcode],
status);
if (qp->s_acked == old_last)
qp->s_acked = last;

View file

@ -868,7 +868,7 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
cpu_id = smp_processor_id();
rcu_read_lock();
rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
sdma_rht_params);
if (rht_node && rht_node->map[vl]) {
@ -962,7 +962,12 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
continue;
}
rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
if (vl >= ARRAY_SIZE(rht_node->map)) {
ret = -EINVAL;
goto out;
}
rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (!rht_node) {
rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
@ -982,7 +987,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
rht_node->map[vl]->ctr = 1;
rht_node->map[vl]->sde[0] = sde;
ret = rhashtable_insert_fast(&dd->sdma_rht,
ret = rhashtable_insert_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
if (ret) {
@ -1025,7 +1030,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
if (cpumask_test_cpu(cpu, mask))
continue;
rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (rht_node) {
bool empty = true;
@ -1049,7 +1054,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
}
if (empty) {
ret = rhashtable_remove_fast(&dd->sdma_rht,
ret = rhashtable_remove_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
WARN_ON(ret);
@ -1108,7 +1113,7 @@ void sdma_seqfile_dump_cpu_list(struct seq_file *s,
struct sdma_rht_node *rht_node;
int i, j;
rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
sdma_rht_params);
if (!rht_node)
return;
@ -1322,6 +1327,12 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
synchronize_rcu();
kfree(dd->per_sdma);
dd->per_sdma = NULL;
if (dd->sdma_rht) {
rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL);
kfree(dd->sdma_rht);
dd->sdma_rht = NULL;
}
}
/**
@ -1341,12 +1352,14 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
{
unsigned this_idx;
struct sdma_engine *sde;
struct rhashtable *tmp_sdma_rht;
u16 descq_cnt;
void *curr_head;
struct hfi1_pportdata *ppd = dd->pport + port;
u32 per_sdma_credits;
uint idle_cnt = sdma_idle_cnt;
size_t num_engines = dd->chip_sdma_engines;
int ret = -ENOMEM;
if (!HFI1_CAP_IS_KSET(SDMA)) {
HFI1_CAP_CLEAR(SDMA_AHG);
@ -1378,7 +1391,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
/* alloc memory for array of send engines */
dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
if (!dd->per_sdma)
return -ENOMEM;
return ret;
idle_cnt = ns_to_cclock(dd, idle_cnt);
if (!sdma_desct_intr)
@ -1507,18 +1520,27 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
dd->flags |= HFI1_HAS_SEND_DMA;
dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
dd->num_sdma = num_engines;
if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
ret = sdma_map_init(dd, port, ppd->vls_operational, NULL);
if (ret < 0)
goto bail;
if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL);
if (!tmp_sdma_rht) {
ret = -ENOMEM;
goto bail;
}
ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
if (ret < 0)
goto bail;
dd->sdma_rht = tmp_sdma_rht;
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
return 0;
bail:
sdma_clean(dd, num_engines);
return -ENOMEM;
return ret;
}
/**
@ -1604,7 +1626,6 @@ void sdma_exit(struct hfi1_devdata *dd)
sdma_finalput(&sde->state);
}
sdma_clean(dd, dd->num_sdma);
rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
}
/*

View file

@ -72,6 +72,54 @@ TRACE_EVENT(hfi1_interrupt,
__entry->src)
);
#ifdef CONFIG_FAULT_INJECTION
TRACE_EVENT(hfi1_fault_opcode,
TP_PROTO(struct rvt_qp *qp, u8 opcode),
TP_ARGS(qp, opcode),
TP_STRUCT__entry(DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(u8, opcode)
),
TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->qpn = qp->ibqp.qp_num;
__entry->opcode = opcode;
),
TP_printk("[%s] qpn 0x%x opcode 0x%x",
__get_str(dev), __entry->qpn, __entry->opcode)
);
TRACE_EVENT(hfi1_fault_packet,
TP_PROTO(struct hfi1_packet *packet),
TP_ARGS(packet),
TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->ppd->dd)
__field(u64, eflags)
__field(u32, ctxt)
__field(u32, hlen)
__field(u32, tlen)
__field(u32, updegr)
__field(u32, etail)
),
TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->ppd->dd);
__entry->eflags = rhf_err_flags(packet->rhf);
__entry->ctxt = packet->rcd->ctxt;
__entry->hlen = packet->hlen;
__entry->tlen = packet->tlen;
__entry->updegr = packet->updegr;
__entry->etail = rhf_egr_index(packet->rhf);
),
TP_printk(
"[%s] ctxt %d eflags 0x%llx hlen %d tlen %d updegr %d etail %d",
__get_str(dev),
__entry->ctxt,
__entry->eflags,
__entry->hlen,
__entry->tlen,
__entry->updegr,
__entry->etail
)
);
#endif
#endif /* __HFI1_TRACE_MISC_H */
#undef TRACE_INCLUDE_PATH

View file

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015, 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -104,11 +104,6 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_ack,
TP_ARGS(qp, psn)
);
DEFINE_EVENT(hfi1_rc_template, hfi1_timeout,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)

View file

@ -633,6 +633,49 @@ DEFINE_EVENT(hfi1_bct_template, bct_get,
TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
TP_ARGS(dd, bc));
TRACE_EVENT(
hfi1_qp_send_completion,
TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx),
TP_ARGS(qp, wqe, idx),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(struct rvt_swqe *, wqe)
__field(u64, wr_id)
__field(u32, qpn)
__field(u32, qpt)
__field(u32, length)
__field(u32, idx)
__field(u32, ssn)
__field(enum ib_wr_opcode, opcode)
__field(int, send_flags)
),
TP_fast_assign(
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->wqe = wqe;
__entry->wr_id = wqe->wr.wr_id;
__entry->qpn = qp->ibqp.qp_num;
__entry->qpt = qp->ibqp.qp_type;
__entry->length = wqe->length;
__entry->idx = idx;
__entry->ssn = wqe->ssn;
__entry->opcode = wqe->wr.opcode;
__entry->send_flags = wqe->wr.send_flags;
),
TP_printk(
"[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x",
__get_str(dev),
__entry->qpn,
__entry->qpt,
__entry->wqe,
__entry->idx,
__entry->wr_id,
__entry->length,
__entry->ssn,
__entry->opcode,
__entry->send_flags
)
);
#endif /* __HFI1_TRACE_TX_H */
#undef TRACE_INCLUDE_PATH

View file

@ -1615,9 +1615,10 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
{
hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
cq->comps[idx].status = state;
if (state == ERROR)
cq->comps[idx].errcode = -ret;
smp_wmb(); /* make sure errcode is visible first */
cq->comps[idx].status = state;
trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
idx, state, ret);
}

View file

@ -60,6 +60,7 @@
#include "trace.h"
#include "qp.h"
#include "verbs_txreq.h"
#include "debugfs.h"
static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@ -296,6 +297,22 @@ static inline bool wss_exceeds_threshold(void)
return atomic_read(&wss.total_count) >= wss.threshold;
}
/*
* Translate ib_wr_opcode into ib_wc_opcode.
*/
const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
[IB_WR_SEND] = IB_WC_SEND,
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
[IB_WR_SEND_WITH_INV] = IB_WC_SEND,
[IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
[IB_WR_REG_MR] = IB_WC_REG_MR
};
/*
* Length of header by opcode, 0 --> not supported
*/
@ -501,6 +518,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
return NULL;
}
static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
{
#ifdef CONFIG_FAULT_INJECTION
if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
/*
* In order to drop non-IB traffic we
* set PbcInsertHrc to NONE (0x2).
* The packet will still be delivered
* to the receiving node but a
* KHdrHCRCErr (KDETH packet with a bad
* HCRC) will be triggered and the
* packet will not be delivered to the
* correct context.
*/
pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
else
/*
* In order to drop regular verbs
* traffic we set the PbcTestEbp
* flag. The packet will still be
* delivered to the receiving node but
* a 'late ebp error' will be
* triggered and will be dropped.
*/
pbc |= PBC_TEST_EBP;
#endif
return pbc;
}
/**
* hfi1_ib_rcv - process an incoming packet
* @packet: data packet information
@ -583,6 +629,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
rcu_read_unlock();
goto drop;
}
if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
true))) {
rcu_read_unlock();
goto drop;
}
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(opcode, packet);
if (likely(packet_handler))
@ -781,7 +832,6 @@ static int build_verbs_tx_desc(
if (ret)
goto bail_txadd;
}
/* add the ulp payload - if any. tx->ss can be NULL for acks */
if (tx->ss)
ret = build_verbs_ulp_payload(sde, length, tx);
@ -800,7 +850,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd;
struct verbs_txreq *tx;
u64 pbc_flags = 0;
u8 sc5 = priv->s_sc;
int ret;
@ -809,12 +858,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (!sdma_txreq_built(&tx->txreq)) {
if (likely(pbc == 0)) {
u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
u8 opcode = get_opcode(&tx->phdr.hdr);
/* No vl15 here */
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
pbc = hfi1_fault_tx(qp, opcode, pbc);
pbc = create_pbc(ppd,
pbc_flags,
pbc,
qp->srate_mbps,
vl,
plen);
@ -917,7 +970,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd;
u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
u64 pbc_flags = 0;
u8 sc5;
unsigned long flags = 0;
struct send_context *sc;
@ -942,9 +994,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (likely(pbc == 0)) {
u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
struct verbs_txreq *tx = ps->s_txreq;
u8 opcode = get_opcode(&tx->phdr.hdr);
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
pbc = hfi1_fault_tx(qp, opcode, pbc);
pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
@ -1220,12 +1277,14 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
{
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
u16 ver = dd->dc8051_ver;
u32 ver = dd->dc8051_ver;
memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) |
(u64)dc8051_ver_min(ver);
rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) |
((u64)(dc8051_ver_min(ver)) << 16) |
(u64)dc8051_ver_patch(ver);
rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
@ -1504,10 +1563,10 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
{
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
struct hfi1_ibdev *dev = dev_from_rdi(rdi);
u16 ver = dd_from_dev(dev)->dc8051_ver;
u32 ver = dd_from_dev(dev)->dc8051_ver;
snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver),
dc8051_ver_min(ver));
snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver),
dc8051_ver_min(ver), dc8051_ver_patch(ver));
}
static const char * const driver_cntr_names[] = {
@ -1524,6 +1583,7 @@ static const char * const driver_cntr_names[] = {
"DRIVER_EgrHdrFull"
};
static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */
static const char **dev_cntr_names;
static const char **port_cntr_names;
static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
@ -1578,6 +1638,7 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
{
int i, err;
mutex_lock(&cntr_names_lock);
if (!cntr_names_initialized) {
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@ -1586,8 +1647,10 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
num_driver_cntrs,
&num_dev_cntrs,
&dev_cntr_names);
if (err)
if (err) {
mutex_unlock(&cntr_names_lock);
return NULL;
}
for (i = 0; i < num_driver_cntrs; i++)
dev_cntr_names[num_dev_cntrs + i] =
@ -1601,10 +1664,12 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
if (err) {
kfree(dev_cntr_names);
dev_cntr_names = NULL;
mutex_unlock(&cntr_names_lock);
return NULL;
}
cntr_names_initialized = 1;
}
mutex_unlock(&cntr_names_lock);
if (!port_num)
return rdma_alloc_hw_stats_struct(
@ -1823,9 +1888,13 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
del_timer_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
mutex_lock(&cntr_names_lock);
kfree(dev_cntr_names);
kfree(port_cntr_names);
dev_cntr_names = NULL;
port_cntr_names = NULL;
cntr_names_initialized = 0;
mutex_unlock(&cntr_names_lock);
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)

View file

@ -195,6 +195,11 @@ struct hfi1_ibdev {
struct dentry *hfi1_ibdev_dbg;
/* per HFI symlinks to above */
struct dentry *hfi1_ibdev_link;
#ifdef CONFIG_FAULT_INJECTION
struct fault_opcode *fault_opcode;
struct fault_packet *fault_packet;
bool fault_suppress_err;
#endif
#endif
};

View file

@ -33,6 +33,7 @@
#include <linux/platform_device.h>
#include <linux/acpi.h>
#include <linux/etherdevice.h>
#include <linux/of.h>
#include <rdma/ib_umem.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"

View file

@ -938,7 +938,10 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
/* see post_send() */
barrier();
rvt_put_swqe(wqe);
rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
rvt_qp_swqe_complete(qp,
wqe,
ib_qib_wc_opcode[wqe->wr.opcode],
IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before resending,
@ -983,7 +986,10 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
qp->s_last = s_last;
/* see post_send() */
barrier();
rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
rvt_qp_swqe_complete(qp,
wqe,
ib_qib_wc_opcode[wqe->wr.opcode],
IB_WC_SUCCESS);
} else
this_cpu_inc(*ibp->rvp.rc_delayed_comp);

View file

@ -769,7 +769,10 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
rvt_qp_swqe_complete(qp, wqe, status);
rvt_qp_swqe_complete(qp,
wqe,
ib_qib_wc_opcode[wqe->wr.opcode],
status);
if (qp->s_acked == old_last)
qp->s_acked = last;

View file

@ -113,6 +113,19 @@ static unsigned int ib_qib_disable_sma;
module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(disable_sma, "Disable the SMA");
/*
* Translate ib_wr_opcode into ib_wc_opcode.
*/
const enum ib_wc_opcode ib_qib_wc_opcode[] = {
[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
[IB_WR_SEND] = IB_WC_SEND,
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
};
/*
* System image GUID.
*/

View file

@ -50,6 +50,7 @@
#include <linux/kthread.h>
#include "cq.h"
#include "vt.h"
#include "trace.h"
/**
* rvt_cq_enter - add a new entry to the completion queue
@ -93,6 +94,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
}
return;
}
trace_rvt_cq_enter(cq, entry, head);
if (cq->ip) {
wc->uqueue[head].wr_id = entry->wr_id;
wc->uqueue[head].status = entry->status;
@ -482,6 +484,7 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
if (tail == wc->head)
break;
/* The kernel doesn't need a RMB since it has the lock. */
trace_rvt_cq_poll(cq, &wc->kqueue[tail], npolled);
*entry = wc->kqueue[tail];
if (tail >= cq->ibcq.cqe)
tail = 0;

View file

@ -191,8 +191,9 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
tmr = rcu_access_pointer(dev->dma_mr);
if (!tmr) {
rcu_assign_pointer(dev->dma_mr, mr);
mr->lkey_published = 1;
/* Insure published written first */
rcu_assign_pointer(dev->dma_mr, mr);
rvt_get_mr(mr);
}
goto success;
@ -224,8 +225,9 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
mr->lkey |= 1 << 8;
rkt->gen++;
}
rcu_assign_pointer(rkt->table[r], mr);
mr->lkey_published = 1;
/* Insure published written first */
rcu_assign_pointer(rkt->table[r], mr);
success:
spin_unlock_irqrestore(&rkt->lock, flags);
out:
@ -253,23 +255,24 @@ static void rvt_free_lkey(struct rvt_mregion *mr)
spin_lock_irqsave(&rkt->lock, flags);
if (!lkey) {
if (mr->lkey_published) {
RCU_INIT_POINTER(dev->dma_mr, NULL);
mr->lkey_published = 0;
/* insure published is written before pointer */
rcu_assign_pointer(dev->dma_mr, NULL);
rvt_put_mr(mr);
}
} else {
if (!mr->lkey_published)
goto out;
r = lkey >> (32 - dev->dparms.lkey_table_size);
RCU_INIT_POINTER(rkt->table[r], NULL);
mr->lkey_published = 0;
/* insure published is written before pointer */
rcu_assign_pointer(rkt->table[r], NULL);
}
mr->lkey_published = 0;
freed++;
out:
spin_unlock_irqrestore(&rkt->lock, flags);
if (freed) {
synchronize_rcu();
if (freed)
percpu_ref_kill(&mr->refcount);
}
}
static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
@ -822,16 +825,21 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
goto ok;
}
mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
if (!mr)
goto bail;
rvt_get_mr(mr);
if (!READ_ONCE(mr->lkey_published))
goto bail_unref;
if (unlikely(atomic_read(&mr->lkey_invalid) ||
mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
goto bail_unref;
off = sge->addr - mr->user_base;
if (unlikely(sge->addr < mr->user_base ||
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
goto bail;
rvt_get_mr(mr);
goto bail_unref;
rcu_read_unlock();
off += mr->offset;
@ -867,6 +875,8 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->n = n;
ok:
return 1;
bail_unref:
rvt_put_mr(mr);
bail:
rcu_read_unlock();
return 0;
@ -922,15 +932,20 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
}
mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != rkey || qp->ibqp.pd != mr->pd))
if (!mr)
goto bail;
rvt_get_mr(mr);
/* insure mr read is before test */
if (!READ_ONCE(mr->lkey_published))
goto bail_unref;
if (unlikely(atomic_read(&mr->lkey_invalid) ||
mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail_unref;
off = vaddr - mr->iova;
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
goto bail;
rvt_get_mr(mr);
goto bail_unref;
rcu_read_unlock();
off += mr->offset;
@ -966,6 +981,8 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
sge->n = n;
ok:
return 1;
bail_unref:
rvt_put_mr(mr);
bail:
rcu_read_unlock();
return 0;

View file

@ -1,5 +1,5 @@
/*
* Copyright(c) 2016 Intel Corporation.
* Copyright(c) 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -117,23 +117,6 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
};
EXPORT_SYMBOL(ib_rvt_state_ops);
/*
* Translate ib_wr_opcode into ib_wc_opcode.
*/
const enum ib_wc_opcode ib_rvt_wc_opcode[] = {
[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
[IB_WR_SEND] = IB_WC_SEND,
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
[IB_WR_SEND_WITH_INV] = IB_WC_SEND,
[IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
[IB_WR_REG_MR] = IB_WC_REG_MR
};
EXPORT_SYMBOL(ib_rvt_wc_opcode);
static void get_map_page(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map,
gfp_t gfp)
@ -1789,11 +1772,14 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
0);
qp->s_next_psn = wqe->lpsn + 1;
}
trace_rvt_post_one_wr(qp, wqe);
if (unlikely(reserved_op))
if (unlikely(reserved_op)) {
wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
rvt_qp_wqe_reserve(qp, wqe);
else
} else {
wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
qp->s_avail--;
}
trace_rvt_post_one_wr(qp, wqe);
smp_wmb(); /* see request builders */
qp->s_head = next;
@ -2069,8 +2055,12 @@ static void rvt_rc_timeout(unsigned long arg)
spin_lock_irqsave(&qp->r_lock, flags);
spin_lock(&qp->s_lock);
if (qp->s_flags & RVT_S_TIMER) {
struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
qp->s_flags &= ~RVT_S_TIMER;
rvp->n_rc_timeouts++;
del_timer(&qp->s_timer);
trace_rvt_rc_timeout(qp, qp->s_last_psn + 1);
if (rdi->driver_f.notify_restart_rc)
rdi->driver_f.notify_restart_rc(qp,
qp->s_last_psn + 1,

View file

@ -1,5 +1,5 @@
/*
* Copyright(c) 2016 Intel Corporation.
* Copyright(c) 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -52,3 +52,5 @@
#include "trace_qp.h"
#include "trace_tx.h"
#include "trace_mr.h"
#include "trace_cq.h"
#include "trace_rc.h"

View file

@ -0,0 +1,127 @@
/*
* Copyright(c) 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#if !defined(__RVT_TRACE_CQ_H) || defined(TRACE_HEADER_MULTI_READ)
#define __RVT_TRACE_CQ_H
#include <linux/tracepoint.h>
#include <linux/trace_seq.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdmavt_cq.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rvt_cq
#define wc_opcode_name(opcode) { IB_WC_##opcode, #opcode }
#define show_wc_opcode(opcode) \
__print_symbolic(opcode, \
wc_opcode_name(SEND), \
wc_opcode_name(RDMA_WRITE), \
wc_opcode_name(RDMA_READ), \
wc_opcode_name(COMP_SWAP), \
wc_opcode_name(FETCH_ADD), \
wc_opcode_name(LSO), \
wc_opcode_name(LOCAL_INV), \
wc_opcode_name(REG_MR), \
wc_opcode_name(MASKED_COMP_SWAP), \
wc_opcode_name(RECV), \
wc_opcode_name(RECV_RDMA_WITH_IMM))
#define CQ_PRN \
"[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x"
DECLARE_EVENT_CLASS(
rvt_cq_entry_template,
TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
TP_ARGS(cq, wc, idx),
TP_STRUCT__entry(
RDI_DEV_ENTRY(cq->rdi)
__field(u64, wr_id)
__field(u32, status)
__field(u32, opcode)
__field(u32, qpn)
__field(u32, length)
__field(u32, idx)
),
TP_fast_assign(
RDI_DEV_ASSIGN(cq->rdi)
__entry->wr_id = wc->wr_id;
__entry->status = wc->status;
__entry->opcode = wc->opcode;
__entry->length = wc->byte_len;
__entry->qpn = wc->qp->qp_num;
__entry->idx = idx;
),
TP_printk(
CQ_PRN,
__get_str(dev),
__entry->idx,
__entry->wr_id,
__entry->status,
__entry->opcode, show_wc_opcode(__entry->opcode),
__entry->length,
__entry->qpn
)
);
DEFINE_EVENT(
rvt_cq_entry_template, rvt_cq_enter,
TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
TP_ARGS(cq, wc, idx));
DEFINE_EVENT(
rvt_cq_entry_template, rvt_cq_poll,
TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
TP_ARGS(cq, wc, idx));
#endif /* __RVT_TRACE_CQ_H */
#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_cq
#include <trace/define_trace.h>

View file

@ -0,0 +1,109 @@
/*
* Copyright(c) 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#if !defined(__RVT_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ)
#define __RVT_TRACE_RC_H
#include <linux/tracepoint.h>
#include <linux/trace_seq.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_vt.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rvt_rc
DECLARE_EVENT_CLASS(rvt_rc_template,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn),
TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
__field(u32, qpn)
__field(u32, s_flags)
__field(u32, psn)
__field(u32, s_psn)
__field(u32, s_next_psn)
__field(u32, s_sending_psn)
__field(u32, s_sending_hpsn)
__field(u32, r_psn)
),
TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
__entry->qpn = qp->ibqp.qp_num;
__entry->s_flags = qp->s_flags;
__entry->psn = psn;
__entry->s_psn = qp->s_psn;
__entry->s_next_psn = qp->s_next_psn;
__entry->s_sending_psn = qp->s_sending_psn;
__entry->s_sending_hpsn = qp->s_sending_hpsn;
__entry->r_psn = qp->r_psn;
),
TP_printk(
"[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
__get_str(dev),
__entry->qpn,
__entry->s_flags,
__entry->psn,
__entry->s_psn,
__entry->s_next_psn,
__entry->s_sending_psn,
__entry->s_sending_hpsn,
__entry->r_psn
)
);
DEFINE_EVENT(rvt_rc_template, rvt_rc_timeout,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
#endif /* __RVT_TRACE_RC_H */
#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_rc
#include <trace/define_trace.h>

View file

@ -71,10 +71,20 @@ __print_symbolic(opcode, \
wr_opcode_name(RDMA_READ_WITH_INV), \
wr_opcode_name(LOCAL_INV), \
wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \
wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD), \
wr_opcode_name(RESERVED1), \
wr_opcode_name(RESERVED2), \
wr_opcode_name(RESERVED3), \
wr_opcode_name(RESERVED4), \
wr_opcode_name(RESERVED5), \
wr_opcode_name(RESERVED6), \
wr_opcode_name(RESERVED7), \
wr_opcode_name(RESERVED8), \
wr_opcode_name(RESERVED9), \
wr_opcode_name(RESERVED10))
#define POS_PRN \
"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u"
TRACE_EVENT(
rvt_post_one_wr,
@ -83,7 +93,9 @@ TRACE_EVENT(
TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
__field(u64, wr_id)
__field(struct rvt_swqe *, wqe)
__field(u32, qpn)
__field(u32, qpt)
__field(u32, psn)
__field(u32, lpsn)
__field(u32, length)
@ -92,11 +104,17 @@ TRACE_EVENT(
__field(u32, avail)
__field(u32, head)
__field(u32, last)
__field(u32, ssn)
__field(int, send_flags)
__field(pid_t, pid)
__field(int, num_sge)
),
TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
__entry->wqe = wqe;
__entry->wr_id = wqe->wr.wr_id;
__entry->qpn = qp->ibqp.qp_num;
__entry->qpt = qp->ibqp.qp_type;
__entry->psn = wqe->psn;
__entry->lpsn = wqe->lpsn;
__entry->length = wqe->length;
@ -105,20 +123,30 @@ TRACE_EVENT(
__entry->avail = qp->s_avail;
__entry->head = qp->s_head;
__entry->last = qp->s_last;
__entry->pid = qp->pid;
__entry->ssn = wqe->ssn;
__entry->send_flags = wqe->wr.send_flags;
__entry->num_sge = wqe->wr.num_sge;
),
TP_printk(
POS_PRN,
__get_str(dev),
__entry->wqe,
__entry->wr_id,
__entry->send_flags,
__entry->qpn,
__entry->qpt,
__entry->psn,
__entry->lpsn,
__entry->ssn,
__entry->length,
__entry->opcode, show_wr_opcode(__entry->opcode),
__entry->size,
__entry->avail,
__entry->head,
__entry->last
__entry->last,
__entry->pid,
__entry->num_sge
)
);

View file

@ -281,8 +281,11 @@ void ipoib_delete_debug_files(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n");
WARN_ONCE(!priv->path_dentry, "null path debug file\n");
debugfs_remove(priv->mcg_dentry);
debugfs_remove(priv->path_dentry);
priv->mcg_dentry = priv->path_dentry = NULL;
}
int ipoib_register_debugfs(void)

View file

@ -108,6 +108,33 @@ static struct ib_client ipoib_client = {
.get_net_dev_by_params = ipoib_get_net_dev_by_params,
};
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
static int ipoib_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct netdev_notifier_info *ni = ptr;
struct net_device *dev = ni->dev;
if (dev->netdev_ops->ndo_open != ipoib_open)
return NOTIFY_DONE;
switch (event) {
case NETDEV_REGISTER:
ipoib_create_debug_files(dev);
break;
case NETDEV_CHANGENAME:
ipoib_delete_debug_files(dev);
ipoib_create_debug_files(dev);
break;
case NETDEV_UNREGISTER:
ipoib_delete_debug_files(dev);
break;
}
return NOTIFY_DONE;
}
#endif
int ipoib_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@ -1674,8 +1701,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
ASSERT_RTNL();
ipoib_delete_debug_files(dev);
/* Delete any child interfaces first */
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
/* Stop GC on child */
@ -2090,8 +2115,6 @@ static struct net_device *ipoib_add_port(const char *format,
goto register_failed;
}
ipoib_create_debug_files(priv->dev);
if (ipoib_cm_add_mode_attr(priv->dev))
goto sysfs_failed;
if (ipoib_add_pkey_attr(priv->dev))
@ -2106,7 +2129,6 @@ static struct net_device *ipoib_add_port(const char *format,
return priv->dev;
sysfs_failed:
ipoib_delete_debug_files(priv->dev);
unregister_netdev(priv->dev);
register_failed:
@ -2191,6 +2213,12 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
kfree(dev_list);
}
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
static struct notifier_block ipoib_netdev_notifier = {
.notifier_call = ipoib_netdev_event,
};
#endif
static int __init ipoib_init_module(void)
{
int ret;
@ -2243,6 +2271,9 @@ static int __init ipoib_init_module(void)
if (ret)
goto err_client;
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
register_netdevice_notifier(&ipoib_netdev_notifier);
#endif
return 0;
err_client:
@ -2260,6 +2291,9 @@ static int __init ipoib_init_module(void)
static void __exit ipoib_cleanup_module(void)
{
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
unregister_netdevice_notifier(&ipoib_netdev_notifier);
#endif
ipoib_netlink_fini();
ib_unregister_client(&ipoib_client);
ib_sa_unregister_client(&ipoib_sa_client);

View file

@ -86,8 +86,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
goto register_failed;
}
ipoib_create_debug_files(priv->dev);
/* RTNL childs don't need proprietary sysfs entries */
if (type == IPOIB_LEGACY_CHILD) {
if (ipoib_cm_add_mode_attr(priv->dev))
@ -108,7 +106,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
sysfs_failed:
result = -ENOMEM;
ipoib_delete_debug_files(priv->dev);
unregister_netdevice(priv->dev);
register_failed:

View file

@ -80,6 +80,8 @@ enum {
IB_OPCODE_UD = 0x60,
/* per IBTA 1.3 vol 1 Table 38, A10.3.2 */
IB_OPCODE_CNP = 0x80,
/* Manufacturer specific */
IB_OPCODE_MSP = 0xe0,
/* operations -- just used to define real constants */
IB_OPCODE_SEND_FIRST = 0x00,

View file

@ -1357,6 +1357,17 @@ struct ib_fmr_attr {
struct ib_umem;
enum rdma_remove_reason {
/* Userspace requested uobject deletion. Call could fail */
RDMA_REMOVE_DESTROY,
/* Context deletion. This call should delete the actual object itself */
RDMA_REMOVE_CLOSE,
/* Driver is being hot-unplugged. This call should delete the actual object itself */
RDMA_REMOVE_DRIVER_REMOVE,
/* Context is being cleaned-up, but commit was just completed */
RDMA_REMOVE_DURING_CLEANUP,
};
struct ib_rdmacg_object {
#ifdef CONFIG_CGROUP_RDMA
struct rdma_cgroup *cg; /* owner rdma cgroup */
@ -1365,19 +1376,16 @@ struct ib_rdmacg_object {
struct ib_ucontext {
struct ib_device *device;
struct list_head pd_list;
struct list_head mr_list;
struct list_head mw_list;
struct list_head cq_list;
struct list_head qp_list;
struct list_head srq_list;
struct list_head ah_list;
struct list_head xrcd_list;
struct list_head rule_list;
struct list_head wq_list;
struct list_head rwq_ind_tbl_list;
struct ib_uverbs_file *ufile;
int closing;
/* locking the uobjects_list */
struct mutex uobjects_lock;
struct list_head uobjects;
/* protects cleanup process from other actions */
struct rw_semaphore cleanup_rwsem;
enum rdma_remove_reason cleanup_reason;
struct pid *tgid;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct rb_root umem_tree;
@ -1407,9 +1415,16 @@ struct ib_uobject {
struct ib_rdmacg_object cg_obj; /* rdmacg object */
int id; /* index into kernel idr */
struct kref ref;
struct rw_semaphore mutex; /* protects .live */
atomic_t usecnt; /* protects exclusive access */
struct rcu_head rcu; /* kfree_rcu() overhead */
int live;
const struct uverbs_obj_type *type;
};
struct ib_uobject_file {
struct ib_uobject uobj;
/* ufile contains the lock between context release and file close */
struct ib_uverbs_file *ufile;
};
struct ib_udata {

View file

@ -2,7 +2,7 @@
#define DEF_RDMAVT_INCQP_H
/*
* Copyright(c) 2016 Intel Corporation.
* Copyright(c) 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -526,7 +526,6 @@ static inline void rvt_qp_wqe_reserve(
struct rvt_qp *qp,
struct rvt_swqe *wqe)
{
wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
atomic_inc(&qp->s_reserved_used);
}
@ -550,7 +549,6 @@ static inline void rvt_qp_wqe_unreserve(
struct rvt_swqe *wqe)
{
if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) {
wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
atomic_dec(&qp->s_reserved_used);
/* insure no compiler re-order up to s_last change */
smp_mb__after_atomic();
@ -574,6 +572,7 @@ extern const enum ib_wc_opcode ib_rvt_wc_opcode[];
static inline void rvt_qp_swqe_complete(
struct rvt_qp *qp,
struct rvt_swqe *wqe,
enum ib_wc_opcode opcode,
enum ib_wc_status status)
{
if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED))
@ -586,7 +585,7 @@ static inline void rvt_qp_swqe_complete(
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
wc.status = status;
wc.opcode = ib_rvt_wc_opcode[wqe->wr.opcode];
wc.opcode = opcode;
wc.qp = &qp->ibqp;
wc.byte_len = wqe->length;
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,

View file

@ -0,0 +1,114 @@
/*
* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _UVERBS_STD_TYPES__
#define _UVERBS_STD_TYPES__
#include <rdma/uverbs_types.h>
extern const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel;
extern const struct uverbs_obj_idr_type uverbs_type_attrs_cq;
extern const struct uverbs_obj_idr_type uverbs_type_attrs_qp;
extern const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table;
extern const struct uverbs_obj_idr_type uverbs_type_attrs_wq;
extern const struct uverbs_obj_idr_type uverbs_type_attrs_srq;
extern const struct uverbs_obj_idr_type uverbs_type_attrs_ah;
extern const struct uverbs_obj_idr_type uverbs_type_attrs_flow;
extern const struct uverbs_obj_idr_type uverbs_type_attrs_mr;
extern const struct uverbs_obj_idr_type uverbs_type_attrs_mw;
extern const struct uverbs_obj_idr_type uverbs_type_attrs_pd;
extern const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd;
static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type,
bool write,
struct ib_ucontext *ucontext,
int id)
{
return rdma_lookup_get_uobject(type, ucontext, id, write);
}
#define uobj_get_type(_type) uverbs_type_attrs_##_type.type
#define uobj_get_read(_type, _id, _ucontext) \
__uobj_get(&(_type), false, _ucontext, _id)
#define uobj_get_obj_read(_type, _id, _ucontext) \
({ \
struct ib_uobject *uobj = \
__uobj_get(&uobj_get_type(_type), \
false, _ucontext, _id); \
\
(struct ib_##_type *)(IS_ERR(uobj) ? NULL : uobj->object); \
})
#define uobj_get_write(_type, _id, _ucontext) \
__uobj_get(&(_type), true, _ucontext, _id)
static inline void uobj_put_read(struct ib_uobject *uobj)
{
rdma_lookup_put_uobject(uobj, false);
}
#define uobj_put_obj_read(_obj) \
uobj_put_read((_obj)->uobject)
static inline void uobj_put_write(struct ib_uobject *uobj)
{
rdma_lookup_put_uobject(uobj, true);
}
static inline int __must_check uobj_remove_commit(struct ib_uobject *uobj)
{
return rdma_remove_commit_uobject(uobj);
}
static inline void uobj_alloc_commit(struct ib_uobject *uobj)
{
rdma_alloc_commit_uobject(uobj);
}
static inline void uobj_alloc_abort(struct ib_uobject *uobj)
{
rdma_alloc_abort_uobject(uobj);
}
static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext)
{
return rdma_alloc_begin_uobject(type, ucontext);
}
#define uobj_alloc(_type, ucontext) \
__uobj_alloc(&(_type), ucontext)
#endif

172
include/rdma/uverbs_types.h Normal file
View file

@ -0,0 +1,172 @@
/*
* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _UVERBS_TYPES_
#define _UVERBS_TYPES_
#include <linux/kernel.h>
#include <rdma/ib_verbs.h>
struct uverbs_obj_type;
struct uverbs_obj_type_class {
/*
* Get an ib_uobject that corresponds to the given id from ucontext,
* These functions could create or destroy objects if required.
* The action will be finalized only when commit, abort or put fops are
* called.
* The flow of the different actions is:
* [alloc]: Starts with alloc_begin. The handlers logic is than
* executed. If the handler is successful, alloc_commit
* is called and the object is inserted to the repository.
* Once alloc_commit completes the object is visible to
* other threads and userspace.
e Otherwise, alloc_abort is called and the object is
* destroyed.
* [lookup]: Starts with lookup_get which fetches and locks the
* object. After the handler finished using the object, it
* needs to call lookup_put to unlock it. The exclusive
* flag indicates if the object is locked for exclusive
* access.
* [remove]: Starts with lookup_get with exclusive flag set. This
* locks the object for exclusive access. If the handler
* code completed successfully, remove_commit is called
* and the ib_uobject is removed from the context's
* uobjects repository and put. The object itself is
* destroyed as well. Once remove succeeds new krefs to
* the object cannot be acquired by other threads or
* userspace and the hardware driver is removed from the
* object. Other krefs on the object may still exist.
* If the handler code failed, lookup_put should be
* called. This callback is used when the context
* is destroyed as well (process termination,
* reset flow).
*/
struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext);
void (*alloc_commit)(struct ib_uobject *uobj);
void (*alloc_abort)(struct ib_uobject *uobj);
struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext, int id,
bool exclusive);
void (*lookup_put)(struct ib_uobject *uobj, bool exclusive);
/*
* Must be called with the exclusive lock held. If successful uobj is
* invalid on return. On failure uobject is left completely
* unchanged
*/
int __must_check (*remove_commit)(struct ib_uobject *uobj,
enum rdma_remove_reason why);
u8 needs_kfree_rcu;
};
struct uverbs_obj_type {
const struct uverbs_obj_type_class * const type_class;
size_t obj_size;
unsigned int destroy_order;
};
/*
* Objects type classes which support a detach state (object is still alive but
* it's not attached to any context need to make sure:
* (a) no call through to a driver after a detach is called
* (b) detach isn't called concurrently with context_cleanup
*/
struct uverbs_obj_idr_type {
/*
* In idr based objects, uverbs_obj_type_class points to a generic
* idr operations. In order to specialize the underlying types (e.g. CQ,
* QPs, etc.), we add destroy_object specific callbacks.
*/
struct uverbs_obj_type type;
/* Free driver resources from the uobject, make the driver uncallable,
* and move the uobject to the detached state. If the object was
* destroyed by the user's request, a failure should leave the uobject
* completely unchanged.
*/
int __must_check (*destroy_object)(struct ib_uobject *uobj,
enum rdma_remove_reason why);
};
struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext,
int id, bool exclusive);
void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive);
struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext);
void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj);
int rdma_alloc_commit_uobject(struct ib_uobject *uobj);
struct uverbs_obj_fd_type {
/*
* In fd based objects, uverbs_obj_type_ops points to generic
* fd operations. In order to specialize the underlying types (e.g.
* completion_channel), we use fops, name and flags for fd creation.
* context_closed is called when the context is closed either when
* the driver is removed or the process terminated.
*/
struct uverbs_obj_type type;
int (*context_closed)(struct ib_uobject_file *uobj_file,
enum rdma_remove_reason why);
const struct file_operations *fops;
const char *name;
int flags;
};
extern const struct uverbs_obj_type_class uverbs_idr_class;
extern const struct uverbs_obj_type_class uverbs_fd_class;
#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \
sizeof(char))
#define UVERBS_TYPE_ALLOC_FD(_size, _order) \
{ \
.destroy_order = _order, \
.type_class = &uverbs_fd_class, \
.obj_size = (_size) + \
UVERBS_BUILD_BUG_ON((_size) < \
sizeof(struct ib_uobject_file)),\
}
#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order) \
{ \
.destroy_order = _order, \
.type_class = &uverbs_idr_class, \
.obj_size = (_size) + \
UVERBS_BUILD_BUG_ON((_size) < \
sizeof(struct ib_uobject)), \
}
#define UVERBS_TYPE_ALLOC_IDR(_order) \
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order)
#endif