linux-stable/include/linux/xattr.h
Linus Torvalds 02bf43c7b7 fs.xattr.simple.rework.rbtree.rwlock.v6.2
-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCY5bw/wAKCRCRxhvAZXjc
 ol79AQCsHS9s78dLUvdasfQ1023dyF9zaQ8XGkDO6tRssJzGAAD7B8odxDsfQgjQ
 Qzzn9YPZVUgHjd4xBg21UVPmRP5snwQ=
 =wYgr
 -----END PGP SIGNATURE-----

Merge tag 'fs.xattr.simple.rework.rbtree.rwlock.v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping

Pull simple-xattr updates from Christian Brauner:
 "This ports the simple xattr infrastucture to rely on a simple rbtree
  protected by a read-write lock instead of a linked list protected by a
  spinlock.

  A while ago we received reports about scaling issues for filesystems
  using the simple xattr infrastructure that also support setting a
  larger number of xattrs. Specifically, cgroups and tmpfs.

  Both cgroupfs and tmpfs can be mounted by unprivileged users in
  unprivileged containers and root in an unprivileged container can set
  an unrestricted number of security.* xattrs and privileged users can
  also set unlimited trusted.* xattrs. A few more words on further that
  below. Other xattrs such as user.* are restricted for kernfs-based
  instances to a fairly limited number.

  As there are apparently users that have a fairly large number of
  xattrs we should scale a bit better. Using a simple linked list
  protected by a spinlock used for set, get, and list operations doesn't
  scale well if users use a lot of xattrs even if it's not a crazy
  number.

  Let's switch to a simple rbtree protected by a rwlock. It scales way
  better and gets rid of the perf issues some people reported. We
  originally had fancier solutions even using an rcu+seqlock protected
  rbtree but we had concerns about being to clever and also that
  deletion from an rbtree with rcu+seqlock isn't entirely safe.

  The rbtree plus rwlock is perfectly fine. By far the most common
  operation is getting an xattr. While setting an xattr is not and
  should be comparatively rare. And listxattr() often only happens when
  copying xattrs between files or together with the contents to a new
  file.

  Holding a lock across listxattr() is unproblematic because it doesn't
  list the values of xattrs. It can only be used to list the names of
  all xattrs set on a file. And the number of xattr names that can be
  listed with listxattr() is limited to XATTR_LIST_MAX aka 65536 bytes.
  If a larger buffer is passed then vfs_listxattr() caps it to
  XATTR_LIST_MAX and if more xattr names are found it will return
  -E2BIG. In short, the maximum amount of memory that can be retrieved
  via listxattr() is limited and thus listxattr() bounded.

  Of course, the API is broken as documented on xattr(7) already. While
  I have no idea how the xattr api ended up in this state we should
  probably try to come up with something here at some point. An iterator
  pattern similar to readdir() as an alternative to listxattr() or
  something else.

  Right now it is extremly strange that users can set millions of xattrs
  but then can't use listxattr() to know which xattrs are actually set.
  And it's really trivial to do:

	for i in {1..1000000}; do setfattr -n security.$i -v $i ./file1; done

  And around 5000 xattrs it's impossible to use listxattr() to figure
  out which xattrs are actually set. So I have suggested that we try to
  limit the number of xattrs for simple xattrs at least. But that's a
  future patch and I don't consider it very urgent.

  A bonus of this port to rbtree+rwlock is that we shrink the memory
  consumption for users of the simple xattr infrastructure.

  This also adds kernel documentation to all the functions"

* tag 'fs.xattr.simple.rework.rbtree.rwlock.v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping:
  xattr: use rbtree for simple_xattrs
2022-12-13 10:08:36 -08:00

113 lines
3.7 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
File: linux/xattr.h
Extended attributes handling.
Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved.
Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
*/
#ifndef _LINUX_XATTR_H
#define _LINUX_XATTR_H
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/user_namespace.h>
#include <uapi/linux/xattr.h>
struct inode;
struct dentry;
static inline bool is_posix_acl_xattr(const char *name)
{
return (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0);
}
/*
* struct xattr_handler: When @name is set, match attributes with exactly that
* name. When @prefix is set instead, match attributes with that prefix and
* with a non-empty suffix.
*/
struct xattr_handler {
const char *name;
const char *prefix;
int flags; /* fs private flags */
bool (*list)(struct dentry *dentry);
int (*get)(const struct xattr_handler *, struct dentry *dentry,
struct inode *inode, const char *name, void *buffer,
size_t size);
int (*set)(const struct xattr_handler *,
struct user_namespace *mnt_userns, struct dentry *dentry,
struct inode *inode, const char *name, const void *buffer,
size_t size, int flags);
};
const char *xattr_full_name(const struct xattr_handler *, const char *);
struct xattr {
const char *name;
void *value;
size_t value_len;
};
ssize_t __vfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t);
ssize_t vfs_getxattr(struct user_namespace *, struct dentry *, const char *,
void *, size_t);
ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
int __vfs_setxattr(struct user_namespace *, struct dentry *, struct inode *,
const char *, const void *, size_t, int);
int __vfs_setxattr_noperm(struct user_namespace *, struct dentry *,
const char *, const void *, size_t, int);
int __vfs_setxattr_locked(struct user_namespace *, struct dentry *,
const char *, const void *, size_t, int,
struct inode **);
int vfs_setxattr(struct user_namespace *, struct dentry *, const char *,
const void *, size_t, int);
int __vfs_removexattr(struct user_namespace *, struct dentry *, const char *);
int __vfs_removexattr_locked(struct user_namespace *, struct dentry *,
const char *, struct inode **);
int vfs_removexattr(struct user_namespace *, struct dentry *, const char *);
ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size);
int vfs_getxattr_alloc(struct user_namespace *mnt_userns,
struct dentry *dentry, const char *name,
char **xattr_value, size_t size, gfp_t flags);
int xattr_supported_namespace(struct inode *inode, const char *prefix);
static inline const char *xattr_prefix(const struct xattr_handler *handler)
{
return handler->prefix ?: handler->name;
}
struct simple_xattrs {
struct rb_root rb_root;
rwlock_t lock;
};
struct simple_xattr {
struct rb_node rb_node;
char *name;
size_t size;
char value[];
};
void simple_xattrs_init(struct simple_xattrs *xattrs);
void simple_xattrs_free(struct simple_xattrs *xattrs);
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
void *buffer, size_t size);
int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
const void *value, size_t size, int flags,
ssize_t *removed_size);
ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
char *buffer, size_t size);
void simple_xattr_add(struct simple_xattrs *xattrs,
struct simple_xattr *new_xattr);
#endif /* _LINUX_XATTR_H */