linux-stable/fs/xfs/libxfs/xfs_attr.h
Dave Chinner ddbca70cc4 xfs: allocate xattr buffer on demand
When doing file lookups and checking for permissions, we end up in
xfs_get_acl() to see if there are any ACLs on the inode. This
requires and xattr lookup, and to do that we have to supply a buffer
large enough to hold an maximum sized xattr.

On workloads were we are accessing a wide range of cache cold files
under memory pressure (e.g. NFS fileservers) we end up spending a
lot of time allocating the buffer. The buffer is 64k in length, so
is a contiguous multi-page allocation, and if that then fails we
fall back to vmalloc(). Hence the allocation here is /expensive/
when we are looking up hundreds of thousands of files a second.

Initial numbers from a bpf trace show average time in xfs_get_acl()
is ~32us, with ~19us of that in the memory allocation. Note these
are average times, so there are going to be affected by the worst
case allocations more than the common fast case...

To avoid this, we could just do a "null"  lookup to see if the ACL
xattr exists and then only do the allocation if it exists. This,
however, optimises the path for the "no ACL present" case at the
expense of the "acl present" case. i.e. we can halve the time in
xfs_get_acl() for the no acl case (i.e down to ~10-15us), but that
then increases the ACL case by 30% (i.e. up to 40-45us).

To solve this and speed up both cases, drive the xattr buffer
allocation into the attribute code once we know what the actual
xattr length is. For the no-xattr case, we avoid the allocation
completely, speeding up that case. For the common ACL case, we'll
end up with a fast heap allocation (because it'll be smaller than a
page), and only for the rarer "we have a remote xattr" will we have
a multi-page allocation occur. Hence the common ACL case will be
much faster, too.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2019-08-30 22:43:57 -07:00

158 lines
5.9 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
#ifndef __XFS_ATTR_H__
#define __XFS_ATTR_H__
struct xfs_inode;
struct xfs_da_args;
struct xfs_attr_list_context;
/*
* Large attribute lists are structured around Btrees where all the data
* elements are in the leaf nodes. Attribute names are hashed into an int,
* then that int is used as the index into the Btree. Since the hashval
* of an attribute name may not be unique, we may have duplicate keys.
* The internal links in the Btree are logical block offsets into the file.
*
* Small attribute lists use a different format and are packed as tightly
* as possible so as to fit into the literal area of the inode.
*/
/*========================================================================
* External interfaces
*========================================================================*/
#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */
#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */
#define ATTR_TRUST 0x0004 /* -- unused, from IRIX -- */
#define ATTR_SECURE 0x0008 /* use attrs in security namespace */
#define ATTR_CREATE 0x0010 /* pure create: fail if attr already exists */
#define ATTR_REPLACE 0x0020 /* pure set: fail if attr does not exist */
#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */
#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */
#define ATTR_INCOMPLETE 0x4000 /* [kernel] return INCOMPLETE attr keys */
#define ATTR_ALLOC 0x8000 /* allocate xattr buffer on demand */
#define XFS_ATTR_FLAGS \
{ ATTR_DONTFOLLOW, "DONTFOLLOW" }, \
{ ATTR_ROOT, "ROOT" }, \
{ ATTR_TRUST, "TRUST" }, \
{ ATTR_SECURE, "SECURE" }, \
{ ATTR_CREATE, "CREATE" }, \
{ ATTR_REPLACE, "REPLACE" }, \
{ ATTR_KERNOTIME, "KERNOTIME" }, \
{ ATTR_KERNOVAL, "KERNOVAL" }, \
{ ATTR_INCOMPLETE, "INCOMPLETE" }, \
{ ATTR_ALLOC, "ALLOC" }
/*
* The maximum size (into the kernel or returned from the kernel) of an
* attribute value or the buffer used for an attr_list() call. Larger
* sizes will result in an ERANGE return code.
*/
#define ATTR_MAX_VALUELEN (64*1024) /* max length of a value */
/*
* Define how lists of attribute names are returned to the user from
* the attr_list() call. A large, 32bit aligned, buffer is passed in
* along with its size. We put an array of offsets at the top that each
* reference an attrlist_ent_t and pack the attrlist_ent_t's at the bottom.
*/
typedef struct attrlist {
__s32 al_count; /* number of entries in attrlist */
__s32 al_more; /* T/F: more attrs (do call again) */
__s32 al_offset[1]; /* byte offsets of attrs [var-sized] */
} attrlist_t;
/*
* Show the interesting info about one attribute. This is what the
* al_offset[i] entry points to.
*/
typedef struct attrlist_ent { /* data from attr_list() */
__u32 a_valuelen; /* number bytes in value of attr */
char a_name[1]; /* attr name (NULL terminated) */
} attrlist_ent_t;
/*
* Given a pointer to the (char*) buffer containing the attr_list() result,
* and an index, return a pointer to the indicated attribute in the buffer.
*/
#define ATTR_ENTRY(buffer, index) \
((attrlist_ent_t *) \
&((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ])
/*
* Kernel-internal version of the attrlist cursor.
*/
typedef struct attrlist_cursor_kern {
__u32 hashval; /* hash value of next entry to add */
__u32 blkno; /* block containing entry (suggestion) */
__u32 offset; /* offset in list of equal-hashvals */
__u16 pad1; /* padding to match user-level */
__u8 pad2; /* padding to match user-level */
__u8 initted; /* T/F: cursor has been initialized */
} attrlist_cursor_kern_t;
/*========================================================================
* Structure used to pass context around among the routines.
*========================================================================*/
/* void; state communicated via *context */
typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
unsigned char *, int, int);
typedef struct xfs_attr_list_context {
struct xfs_trans *tp;
struct xfs_inode *dp; /* inode */
struct attrlist_cursor_kern *cursor; /* position in list */
char *alist; /* output buffer */
/*
* Abort attribute list iteration if non-zero. Can be used to pass
* error values to the xfs_attr_list caller.
*/
int seen_enough;
ssize_t count; /* num used entries */
int dupcnt; /* count dup hashvals seen */
int bufsize; /* total buffer size */
int firstu; /* first used byte in buffer */
int flags; /* from VOP call */
int resynch; /* T/F: resynch with cursor */
put_listent_func_t put_listent; /* list output fmt function */
int index; /* index into output buffer */
} xfs_attr_list_context_t;
/*========================================================================
* Function prototypes for the kernel.
*========================================================================*/
/*
* Overall external interface routines.
*/
int xfs_attr_inactive(struct xfs_inode *dp);
int xfs_attr_list_int_ilocked(struct xfs_attr_list_context *);
int xfs_attr_list_int(struct xfs_attr_list_context *);
int xfs_inode_hasattr(struct xfs_inode *ip);
int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args);
int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
unsigned char **value, int *valuelenp, int flags);
int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
unsigned char *value, int valuelen, int flags);
int xfs_attr_set_args(struct xfs_da_args *args);
int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
int xfs_attr_remove_args(struct xfs_da_args *args);
int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
int flags, struct attrlist_cursor_kern *cursor);
bool xfs_attr_namecheck(const void *name, size_t length);
#endif /* __XFS_ATTR_H__ */