linux-stable/fs/kernfs/mount.c
Ian Kent 47b5c64d0a kernfs: use i_lock to protect concurrent inode updates
The inode operations .permission() and .getattr() use the kernfs node
write lock but all that's needed is the read lock to protect against
partial updates of these kernfs node fields which are all done under
the write lock.

And .permission() is called frequently during path walks and can cause
quite a bit of contention between kernfs node operations and path
walks when the number of concurrent walks is high.

To change kernfs_iop_getattr() and kernfs_iop_permission() to take
the rw sem read lock instead of the write lock an additional lock is
needed to protect against multiple processes concurrently updating
the inode attributes and link count in kernfs_refresh_inode().

The inode i_lock seems like the sensible thing to use to protect these
inode attribute updates so use it in kernfs_refresh_inode().

The last hunk in the patch, applied to kernfs_fill_super(), is possibly
not needed but taking the lock was present originally. I prefer to
continue to take it to protect against a partial update of the source
kernfs fields during the call to kernfs_refresh_inode() made by
kernfs_get_inode().

Reviewed-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Ian Kent <raven@themaw.net>
Link: https://lore.kernel.org/r/162642771474.63632.16295959115893904470.stgit@web.messagingengine.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2021-07-27 09:29:15 +02:00

397 lines
9.5 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* fs/kernfs/mount.c - kernfs mount implementation
*
* Copyright (c) 2001-3 Patrick Mochel
* Copyright (c) 2007 SUSE Linux Products GmbH
* Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
*/
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/init.h>
#include <linux/magic.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/namei.h>
#include <linux/seq_file.h>
#include <linux/exportfs.h>
#include "kernfs-internal.h"
struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache;
static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
{
struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry));
struct kernfs_syscall_ops *scops = root->syscall_ops;
if (scops && scops->show_options)
return scops->show_options(sf, root);
return 0;
}
static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry)
{
struct kernfs_node *node = kernfs_dentry_node(dentry);
struct kernfs_root *root = kernfs_root(node);
struct kernfs_syscall_ops *scops = root->syscall_ops;
if (scops && scops->show_path)
return scops->show_path(sf, node, root);
seq_dentry(sf, dentry, " \t\n\\");
return 0;
}
const struct super_operations kernfs_sops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
.evict_inode = kernfs_evict_inode,
.show_options = kernfs_sop_show_options,
.show_path = kernfs_sop_show_path,
};
static int kernfs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
struct inode *parent)
{
struct kernfs_node *kn = inode->i_private;
if (*max_len < 2) {
*max_len = 2;
return FILEID_INVALID;
}
*max_len = 2;
*(u64 *)fh = kn->id;
return FILEID_KERNFS;
}
static struct dentry *__kernfs_fh_to_dentry(struct super_block *sb,
struct fid *fid, int fh_len,
int fh_type, bool get_parent)
{
struct kernfs_super_info *info = kernfs_info(sb);
struct kernfs_node *kn;
struct inode *inode;
u64 id;
if (fh_len < 2)
return NULL;
switch (fh_type) {
case FILEID_KERNFS:
id = *(u64 *)fid;
break;
case FILEID_INO32_GEN:
case FILEID_INO32_GEN_PARENT:
/*
* blk_log_action() exposes "LOW32,HIGH32" pair without
* type and userland can call us with generic fid
* constructed from them. Combine it back to ID. See
* blk_log_action().
*/
id = ((u64)fid->i32.gen << 32) | fid->i32.ino;
break;
default:
return NULL;
}
kn = kernfs_find_and_get_node_by_id(info->root, id);
if (!kn)
return ERR_PTR(-ESTALE);
if (get_parent) {
struct kernfs_node *parent;
parent = kernfs_get_parent(kn);
kernfs_put(kn);
kn = parent;
if (!kn)
return ERR_PTR(-ESTALE);
}
inode = kernfs_get_inode(sb, kn);
kernfs_put(kn);
if (!inode)
return ERR_PTR(-ESTALE);
return d_obtain_alias(inode);
}
static struct dentry *kernfs_fh_to_dentry(struct super_block *sb,
struct fid *fid, int fh_len,
int fh_type)
{
return __kernfs_fh_to_dentry(sb, fid, fh_len, fh_type, false);
}
static struct dentry *kernfs_fh_to_parent(struct super_block *sb,
struct fid *fid, int fh_len,
int fh_type)
{
return __kernfs_fh_to_dentry(sb, fid, fh_len, fh_type, true);
}
static struct dentry *kernfs_get_parent_dentry(struct dentry *child)
{
struct kernfs_node *kn = kernfs_dentry_node(child);
return d_obtain_alias(kernfs_get_inode(child->d_sb, kn->parent));
}
static const struct export_operations kernfs_export_ops = {
.encode_fh = kernfs_encode_fh,
.fh_to_dentry = kernfs_fh_to_dentry,
.fh_to_parent = kernfs_fh_to_parent,
.get_parent = kernfs_get_parent_dentry,
};
/**
* kernfs_root_from_sb - determine kernfs_root associated with a super_block
* @sb: the super_block in question
*
* Return the kernfs_root associated with @sb. If @sb is not a kernfs one,
* %NULL is returned.
*/
struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
{
if (sb->s_op == &kernfs_sops)
return kernfs_info(sb)->root;
return NULL;
}
/*
* find the next ancestor in the path down to @child, where @parent was the
* ancestor whose descendant we want to find.
*
* Say the path is /a/b/c/d. @child is d, @parent is NULL. We return the root
* node. If @parent is b, then we return the node for c.
* Passing in d as @parent is not ok.
*/
static struct kernfs_node *find_next_ancestor(struct kernfs_node *child,
struct kernfs_node *parent)
{
if (child == parent) {
pr_crit_once("BUG in find_next_ancestor: called with parent == child");
return NULL;
}
while (child->parent != parent) {
if (!child->parent)
return NULL;
child = child->parent;
}
return child;
}
/**
* kernfs_node_dentry - get a dentry for the given kernfs_node
* @kn: kernfs_node for which a dentry is needed
* @sb: the kernfs super_block
*/
struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
struct super_block *sb)
{
struct dentry *dentry;
struct kernfs_node *knparent = NULL;
BUG_ON(sb->s_op != &kernfs_sops);
dentry = dget(sb->s_root);
/* Check if this is the root kernfs_node */
if (!kn->parent)
return dentry;
knparent = find_next_ancestor(kn, NULL);
if (WARN_ON(!knparent)) {
dput(dentry);
return ERR_PTR(-EINVAL);
}
do {
struct dentry *dtmp;
struct kernfs_node *kntmp;
if (kn == knparent)
return dentry;
kntmp = find_next_ancestor(kn, knparent);
if (WARN_ON(!kntmp)) {
dput(dentry);
return ERR_PTR(-EINVAL);
}
dtmp = lookup_positive_unlocked(kntmp->name, dentry,
strlen(kntmp->name));
dput(dentry);
if (IS_ERR(dtmp))
return dtmp;
knparent = kntmp;
dentry = dtmp;
} while (true);
}
static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc)
{
struct kernfs_super_info *info = kernfs_info(sb);
struct inode *inode;
struct dentry *root;
info->sb = sb;
/* Userspace would break if executables or devices appear on sysfs */
sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = kfc->magic;
sb->s_op = &kernfs_sops;
sb->s_xattr = kernfs_xattr_handlers;
if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP)
sb->s_export_op = &kernfs_export_ops;
sb->s_time_gran = 1;
/* sysfs dentries and inodes don't require IO to create */
sb->s_shrink.seeks = 0;
/* get root inode, initialize and unlock it */
down_read(&kernfs_rwsem);
inode = kernfs_get_inode(sb, info->root->kn);
up_read(&kernfs_rwsem);
if (!inode) {
pr_debug("kernfs: could not get root inode\n");
return -ENOMEM;
}
/* instantiate and link root dentry */
root = d_make_root(inode);
if (!root) {
pr_debug("%s: could not get root dentry!\n", __func__);
return -ENOMEM;
}
sb->s_root = root;
sb->s_d_op = &kernfs_dops;
return 0;
}
static int kernfs_test_super(struct super_block *sb, struct fs_context *fc)
{
struct kernfs_super_info *sb_info = kernfs_info(sb);
struct kernfs_super_info *info = fc->s_fs_info;
return sb_info->root == info->root && sb_info->ns == info->ns;
}
static int kernfs_set_super(struct super_block *sb, struct fs_context *fc)
{
struct kernfs_fs_context *kfc = fc->fs_private;
kfc->ns_tag = NULL;
return set_anon_super_fc(sb, fc);
}
/**
* kernfs_super_ns - determine the namespace tag of a kernfs super_block
* @sb: super_block of interest
*
* Return the namespace tag associated with kernfs super_block @sb.
*/
const void *kernfs_super_ns(struct super_block *sb)
{
struct kernfs_super_info *info = kernfs_info(sb);
return info->ns;
}
/**
* kernfs_get_tree - kernfs filesystem access/retrieval helper
* @fc: The filesystem context.
*
* This is to be called from each kernfs user's fs_context->ops->get_tree()
* implementation, which should set the specified ->@fs_type and ->@flags, and
* specify the hierarchy and namespace tag to mount via ->@root and ->@ns,
* respectively.
*/
int kernfs_get_tree(struct fs_context *fc)
{
struct kernfs_fs_context *kfc = fc->fs_private;
struct super_block *sb;
struct kernfs_super_info *info;
int error;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
info->root = kfc->root;
info->ns = kfc->ns_tag;
INIT_LIST_HEAD(&info->node);
fc->s_fs_info = info;
sb = sget_fc(fc, kernfs_test_super, kernfs_set_super);
if (IS_ERR(sb))
return PTR_ERR(sb);
if (!sb->s_root) {
struct kernfs_super_info *info = kernfs_info(sb);
kfc->new_sb_created = true;
error = kernfs_fill_super(sb, kfc);
if (error) {
deactivate_locked_super(sb);
return error;
}
sb->s_flags |= SB_ACTIVE;
down_write(&kernfs_rwsem);
list_add(&info->node, &info->root->supers);
up_write(&kernfs_rwsem);
}
fc->root = dget(sb->s_root);
return 0;
}
void kernfs_free_fs_context(struct fs_context *fc)
{
/* Note that we don't deal with kfc->ns_tag here. */
kfree(fc->s_fs_info);
fc->s_fs_info = NULL;
}
/**
* kernfs_kill_sb - kill_sb for kernfs
* @sb: super_block being killed
*
* This can be used directly for file_system_type->kill_sb(). If a kernfs
* user needs extra cleanup, it can implement its own kill_sb() and call
* this function at the end.
*/
void kernfs_kill_sb(struct super_block *sb)
{
struct kernfs_super_info *info = kernfs_info(sb);
down_write(&kernfs_rwsem);
list_del(&info->node);
up_write(&kernfs_rwsem);
/*
* Remove the superblock from fs_supers/s_instances
* so we can't find it, before freeing kernfs_super_info.
*/
kill_anon_super(sb);
kfree(info);
}
void __init kernfs_init(void)
{
kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
sizeof(struct kernfs_node),
0, SLAB_PANIC, NULL);
/* Creates slab cache for kernfs inode attributes */
kernfs_iattrs_cache = kmem_cache_create("kernfs_iattrs_cache",
sizeof(struct kernfs_iattrs),
0, SLAB_PANIC, NULL);
}