mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-11-01 17:08:10 +00:00
RDMA/core: Add a netlink command to change net namespace of rdma device
Provide an option to change the net namespace of a rdma device through a netlink command. When multiple rdma devices exists in a system, and when containers are used, this will limit rdma device visibility to a specified net namespace. An example command to change net namespace of mlx5_1 device to the previously created net namespace 'foo' is: $ ip netns add foo $ rdma dev set mlx5_1 netns foo Signed-off-by: Parav Pandit <parav@mellanox.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
parent
decbc7a6b0
commit
2e5b8a0116
4 changed files with 70 additions and 7 deletions
|
@ -350,4 +350,6 @@ int ib_port_register_module_stat(struct ib_device *device, u8 port_num,
|
||||||
const char *name);
|
const char *name);
|
||||||
void ib_port_unregister_module_stat(struct kobject *kobj);
|
void ib_port_unregister_module_stat(struct kobject *kobj);
|
||||||
|
|
||||||
|
int ib_device_set_netns_put(struct sk_buff *skb,
|
||||||
|
struct ib_device *dev, u32 ns_fd);
|
||||||
#endif /* _CORE_PRIV_H */
|
#endif /* _CORE_PRIV_H */
|
||||||
|
|
|
@ -1452,9 +1452,9 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
|
||||||
mutex_lock(&device->unregistration_lock);
|
mutex_lock(&device->unregistration_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If a device not under ib_device_get() or the unregistration_lock
|
* If a device not under ib_device_get() or if the unregistration_lock
|
||||||
* the namespace can be changed, or it can be unregistered. Check
|
* is not held, the namespace can be changed, or it can be unregistered.
|
||||||
* again under the lock.
|
* Check again under the lock.
|
||||||
*/
|
*/
|
||||||
if (refcount_read(&device->refcount) == 0 ||
|
if (refcount_read(&device->refcount) == 0 ||
|
||||||
!net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) {
|
!net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) {
|
||||||
|
@ -1471,12 +1471,12 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
|
||||||
*/
|
*/
|
||||||
write_pnet(&device->coredev.rdma_net, net);
|
write_pnet(&device->coredev.rdma_net, net);
|
||||||
|
|
||||||
|
down_read(&devices_rwsem);
|
||||||
/*
|
/*
|
||||||
* Currently rdma devices are system wide unique. So the device name
|
* Currently rdma devices are system wide unique. So the device name
|
||||||
* is guaranteed free in the new namespace. Publish the new namespace
|
* is guaranteed free in the new namespace. Publish the new namespace
|
||||||
* at the sysfs level.
|
* at the sysfs level.
|
||||||
*/
|
*/
|
||||||
down_read(&devices_rwsem);
|
|
||||||
ret = device_rename(&device->dev, dev_name(&device->dev));
|
ret = device_rename(&device->dev, dev_name(&device->dev));
|
||||||
up_read(&devices_rwsem);
|
up_read(&devices_rwsem);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -1488,7 +1488,7 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
|
||||||
}
|
}
|
||||||
|
|
||||||
ret2 = enable_device_and_get(device);
|
ret2 = enable_device_and_get(device);
|
||||||
if (ret2)
|
if (ret2) {
|
||||||
/*
|
/*
|
||||||
* This shouldn't really happen, but if it does, let the user
|
* This shouldn't really happen, but if it does, let the user
|
||||||
* retry at later point. So don't disable the device.
|
* retry at later point. So don't disable the device.
|
||||||
|
@ -1496,7 +1496,9 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
|
||||||
dev_warn(&device->dev,
|
dev_warn(&device->dev,
|
||||||
"%s: Couldn't re-enable device after namespace change\n",
|
"%s: Couldn't re-enable device after namespace change\n",
|
||||||
__func__);
|
__func__);
|
||||||
|
}
|
||||||
kobject_uevent(&device->dev.kobj, KOBJ_ADD);
|
kobject_uevent(&device->dev.kobj, KOBJ_ADD);
|
||||||
|
|
||||||
ib_device_put(device);
|
ib_device_put(device);
|
||||||
out:
|
out:
|
||||||
mutex_unlock(&device->unregistration_lock);
|
mutex_unlock(&device->unregistration_lock);
|
||||||
|
@ -1505,6 +1507,50 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
|
||||||
return ret2;
|
return ret2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ib_device_set_netns_put(struct sk_buff *skb,
|
||||||
|
struct ib_device *dev, u32 ns_fd)
|
||||||
|
{
|
||||||
|
struct net *net;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
net = get_net_ns_by_fd(ns_fd);
|
||||||
|
if (IS_ERR(net)) {
|
||||||
|
ret = PTR_ERR(net);
|
||||||
|
goto net_err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
|
||||||
|
ret = -EPERM;
|
||||||
|
goto ns_err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Currently supported only for those providers which support
|
||||||
|
* disassociation and don't do port specific sysfs init. Once a
|
||||||
|
* port_cleanup infrastructure is implemented, this limitation will be
|
||||||
|
* removed.
|
||||||
|
*/
|
||||||
|
if (!dev->ops.disassociate_ucontext || dev->ops.init_port ||
|
||||||
|
ib_devices_shared_netns) {
|
||||||
|
ret = -EOPNOTSUPP;
|
||||||
|
goto ns_err;
|
||||||
|
}
|
||||||
|
|
||||||
|
get_device(&dev->dev);
|
||||||
|
ib_device_put(dev);
|
||||||
|
ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net);
|
||||||
|
put_device(&dev->dev);
|
||||||
|
|
||||||
|
put_net(net);
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ns_err:
|
||||||
|
put_net(net);
|
||||||
|
net_err:
|
||||||
|
ib_device_put(dev);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static struct pernet_operations rdma_dev_net_ops = {
|
static struct pernet_operations rdma_dev_net_ops = {
|
||||||
.init = rdma_dev_init_net,
|
.init = rdma_dev_init_net,
|
||||||
.exit = rdma_dev_exit_net,
|
.exit = rdma_dev_exit_net,
|
||||||
|
|
|
@ -119,6 +119,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
|
||||||
[RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
|
[RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
|
||||||
[RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING,
|
[RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING,
|
||||||
.len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
|
.len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
|
||||||
|
[RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 },
|
||||||
};
|
};
|
||||||
|
|
||||||
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
|
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
|
||||||
|
@ -691,9 +692,20 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||||
nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
|
nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
|
||||||
IB_DEVICE_NAME_MAX);
|
IB_DEVICE_NAME_MAX);
|
||||||
err = ib_device_rename(device, name);
|
err = ib_device_rename(device, name);
|
||||||
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tb[RDMA_NLDEV_NET_NS_FD]) {
|
||||||
|
u32 ns_fd;
|
||||||
|
|
||||||
|
ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
|
||||||
|
err = ib_device_set_netns_put(skb, device, ns_fd);
|
||||||
|
goto put_done;
|
||||||
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
ib_device_put(device);
|
ib_device_put(device);
|
||||||
|
put_done:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -909,7 +921,6 @@ static int _nldev_res_get_dumpit(struct ib_device *device,
|
||||||
nlmsg_cancel(skb, nlh);
|
nlmsg_cancel(skb, nlh);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
nlmsg_end(skb, nlh);
|
nlmsg_end(skb, nlh);
|
||||||
|
|
||||||
idx++;
|
idx++;
|
||||||
|
|
|
@ -469,12 +469,16 @@ enum rdma_nldev_attr {
|
||||||
* either shared or exclusive among multiple net namespaces.
|
* either shared or exclusive among multiple net namespaces.
|
||||||
*/
|
*/
|
||||||
RDMA_NLDEV_SYS_ATTR_NETNS_MODE, /* u8 */
|
RDMA_NLDEV_SYS_ATTR_NETNS_MODE, /* u8 */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Device protocol, e.g. ib, iw, usnic, roce and opa
|
* Device protocol, e.g. ib, iw, usnic, roce and opa
|
||||||
*/
|
*/
|
||||||
RDMA_NLDEV_ATTR_DEV_PROTOCOL, /* string */
|
RDMA_NLDEV_ATTR_DEV_PROTOCOL, /* string */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* File descriptor handle of the net namespace object
|
||||||
|
*/
|
||||||
|
RDMA_NLDEV_NET_NS_FD, /* u32 */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Always the end
|
* Always the end
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in a new issue