diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst index eb9c2d9a4f5f..17779a2772e5 100644 --- a/Documentation/fault-injection/fault-injection.rst +++ b/Documentation/fault-injection/fault-injection.rst @@ -169,6 +169,13 @@ configuration of fault-injection capabilities. default is 'N', setting it to 'Y' will disable disconnect injection on the RPC server. +- /sys/kernel/debug/fail_sunrpc/ignore-cache-wait: + + Format: { 'Y' | 'N' } + + default is 'N', setting it to 'Y' will disable cache wait + injection on the RPC server. + - /sys/kernel/debug/fail_function/inject: Format: { 'function-name' | '!function-name' | '' } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 176b468a61c7..bf274f23969b 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -32,6 +32,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, if (!nlmsvc_ops) return nlm_lck_denied_nolocks; + if (lock->lock_start > OFFSET_MAX || + (lock->lock_len && ((lock->lock_len - 1) > (OFFSET_MAX - lock->lock_start)))) + return nlm4_fbig; + /* Obtain host handle */ if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len)) || (argp->monitor && nsm_monitor(host) < 0)) @@ -50,6 +54,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; + lock->fl.fl_start = (loff_t)lock->lock_start; + lock->fl.fl_end = lock->lock_len ? + (loff_t)(lock->lock_start + lock->lock_len - 1) : + OFFSET_MAX; lock->fl.fl_lmops = &nlmsvc_lock_operations; nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); if (!lock->fl.fl_owner) { @@ -87,6 +95,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) struct nlm_args *argp = rqstp->rq_argp; struct nlm_host *host; struct nlm_file *file; + struct nlm_lockowner *test_owner; __be32 rc = rpc_success; dprintk("lockd: TEST4 called\n"); @@ -96,6 +105,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; + test_owner = argp->lock.fl.fl_owner; /* Now check for conflicting locks */ resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie); if (resp->status == nlm_drop_reply) @@ -103,7 +113,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); - nlmsvc_release_lockowner(&argp->lock); + nlmsvc_put_lockowner(test_owner); nlmsvc_release_host(host); nlm_release_file(file); return rc; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index cb3658ab9b7a..9c1aa75441e1 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -340,7 +340,7 @@ nlmsvc_get_lockowner(struct nlm_lockowner *lockowner) return lockowner; } -static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner) +void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner) { if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) return; @@ -590,7 +590,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, int error; int mode; __be32 ret; - struct nlm_lockowner *test_owner; dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", nlmsvc_file_inode(file)->i_sb->s_id, @@ -604,9 +603,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } - /* If there's a conflicting lock, remember to clean up the test lock */ - test_owner = (struct nlm_lockowner *)lock->fl.fl_owner; - mode = lock_to_openmode(&lock->fl); error = vfs_test_lock(file->f_file[mode], &lock->fl); if (error) { @@ -635,10 +631,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, conflock->fl.fl_end = lock->fl.fl_end; locks_release_private(&lock->fl); - /* Clean up the test lock */ - lock->fl.fl_owner = NULL; - nlmsvc_put_lockowner(test_owner); - ret = nlm_lck_denied; out: return ret; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 4dc1b40a489a..b09ca35b527c 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -116,6 +116,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) struct nlm_args *argp = rqstp->rq_argp; struct nlm_host *host; struct nlm_file *file; + struct nlm_lockowner *test_owner; __be32 rc = rpc_success; dprintk("lockd: TEST called\n"); @@ -125,6 +126,8 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; + test_owner = argp->lock.fl.fl_owner; + /* Now check for conflicting locks */ resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie)); if (resp->status == nlm_drop_reply) @@ -133,7 +136,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) dprintk("lockd: TEST status %d vers %d\n", ntohl(resp->status), rqstp->rq_vers); - nlmsvc_release_lockowner(&argp->lock); + nlmsvc_put_lockowner(test_owner); nlmsvc_release_host(host); nlm_release_file(file); return rc; diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 856267c0864b..712fdfeb8ef0 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -20,13 +20,6 @@ #include "svcxdr.h" -static inline loff_t -s64_to_loff_t(__s64 offset) -{ - return (loff_t)offset; -} - - static inline s64 loff_t_to_s64(loff_t offset) { @@ -70,8 +63,6 @@ static bool svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) { struct file_lock *fl = &lock->fl; - u64 len, start; - s64 end; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) return false; @@ -81,20 +72,14 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) return false; if (xdr_stream_decode_u32(xdr, &lock->svid) < 0) return false; - if (xdr_stream_decode_u64(xdr, &start) < 0) + if (xdr_stream_decode_u64(xdr, &lock->lock_start) < 0) return false; - if (xdr_stream_decode_u64(xdr, &len) < 0) + if (xdr_stream_decode_u64(xdr, &lock->lock_len) < 0) return false; locks_init_lock(fl); fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; - end = start + len - 1; - fl->fl_start = s64_to_loff_t(start); - if (len == 0 || end < 0) - fl->fl_end = OFFSET_MAX; - else - fl->fl_end = s64_to_loff_t(end); return true; } diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index ba14d2f4b64f..4b7324458a94 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -38,6 +38,8 @@ struct nfs4_acl; struct svc_fh; struct svc_rqst; +struct nfsd_attrs; +enum nfs_ftype4; int nfs4_acl_bytes(int entries); int nfs4_acl_get_whotype(char *, u32); @@ -45,7 +47,7 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl); -__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl); +__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl, + struct nfsd_attrs *attr); #endif /* LINUX_NFS4_ACL_H */ diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index a605c0e39b09..eeed4ae5b4ad 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "vfs.h" #include "nfsd.h" @@ -21,28 +22,19 @@ #include "filecache.h" #include "trace.h" -#define NFSDDBG_FACILITY NFSDDBG_FH - -/* FIXME: dynamically size this for the machine somehow? */ -#define NFSD_FILE_HASH_BITS 12 -#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) #define NFSD_LAUNDRETTE_DELAY (2 * HZ) -#define NFSD_FILE_SHUTDOWN (1) -#define NFSD_FILE_LRU_THRESHOLD (4096UL) -#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) +#define NFSD_FILE_CACHE_UP (0) /* We only care about NFSD_MAY_READ/WRITE for this cache */ #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) -struct nfsd_fcache_bucket { - struct hlist_head nfb_head; - spinlock_t nfb_lock; - unsigned int nfb_count; - unsigned int nfb_maxcount; -}; - static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); +static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); +static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); +static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); +static DEFINE_PER_CPU(unsigned long, nfsd_file_pages_flushed); +static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); struct nfsd_fcache_disposal { struct work_struct work; @@ -54,21 +46,146 @@ static struct workqueue_struct *nfsd_filecache_wq __read_mostly; static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; -static struct nfsd_fcache_bucket *nfsd_file_hashtbl; static struct list_lru nfsd_file_lru; -static long nfsd_file_lru_flags; +static unsigned long nfsd_file_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; -static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; +static struct rhashtable nfsd_file_rhash_tbl + ____cacheline_aligned_in_smp; -static void nfsd_file_gc(void); +enum nfsd_file_lookup_type { + NFSD_FILE_KEY_INODE, + NFSD_FILE_KEY_FULL, +}; + +struct nfsd_file_lookup_key { + struct inode *inode; + struct net *net; + const struct cred *cred; + unsigned char need; + enum nfsd_file_lookup_type type; +}; + +/* + * The returned hash value is based solely on the address of an in-code + * inode, a pointer to a slab-allocated object. The entropy in such a + * pointer is concentrated in its middle bits. + */ +static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) +{ + unsigned long ptr = (unsigned long)inode; + u32 k; + + k = ptr >> L1_CACHE_SHIFT; + k &= 0x00ffffff; + return jhash2(&k, 1, seed); +} + +/** + * nfsd_file_key_hashfn - Compute the hash value of a lookup key + * @data: key on which to compute the hash value + * @len: rhash table's key_len parameter (unused) + * @seed: rhash table's random seed of the day + * + * Return value: + * Computed 32-bit hash value + */ +static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) +{ + const struct nfsd_file_lookup_key *key = data; + + return nfsd_file_inode_hash(key->inode, seed); +} + +/** + * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file + * @data: object on which to compute the hash value + * @len: rhash table's key_len parameter (unused) + * @seed: rhash table's random seed of the day + * + * Return value: + * Computed 32-bit hash value + */ +static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct nfsd_file *nf = data; + + return nfsd_file_inode_hash(nf->nf_inode, seed); +} + +static bool +nfsd_match_cred(const struct cred *c1, const struct cred *c2) +{ + int i; + + if (!uid_eq(c1->fsuid, c2->fsuid)) + return false; + if (!gid_eq(c1->fsgid, c2->fsgid)) + return false; + if (c1->group_info == NULL || c2->group_info == NULL) + return c1->group_info == c2->group_info; + if (c1->group_info->ngroups != c2->group_info->ngroups) + return false; + for (i = 0; i < c1->group_info->ngroups; i++) { + if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) + return false; + } + return true; +} + +/** + * nfsd_file_obj_cmpfn - Match a cache item against search criteria + * @arg: search criteria + * @ptr: cache item to check + * + * Return values: + * %0 - Item matches search criteria + * %1 - Item does not match search criteria + */ +static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nfsd_file_lookup_key *key = arg->key; + const struct nfsd_file *nf = ptr; + + switch (key->type) { + case NFSD_FILE_KEY_INODE: + if (nf->nf_inode != key->inode) + return 1; + break; + case NFSD_FILE_KEY_FULL: + if (nf->nf_inode != key->inode) + return 1; + if (nf->nf_may != key->need) + return 1; + if (nf->nf_net != key->net) + return 1; + if (!nfsd_match_cred(nf->nf_cred, key->cred)) + return 1; + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) + return 1; + break; + } + return 0; +} + +static const struct rhashtable_params nfsd_file_rhash_params = { + .key_len = sizeof_field(struct nfsd_file, nf_inode), + .key_offset = offsetof(struct nfsd_file, nf_inode), + .head_offset = offsetof(struct nfsd_file, nf_rhash), + .hashfn = nfsd_file_key_hashfn, + .obj_hashfn = nfsd_file_obj_hashfn, + .obj_cmpfn = nfsd_file_obj_cmpfn, + /* Reduce resizing churn on light workloads */ + .min_size = 512, /* buckets */ + .automatic_shrinking = true, +}; static void nfsd_file_schedule_laundrette(void) { - long count = atomic_long_read(&nfsd_filecache_count); - - if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) + if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) || + test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) return; queue_delayed_work(system_wq, &nfsd_filecache_laundrette, @@ -111,12 +228,11 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm) } static struct nfsd_file_mark * -nfsd_file_mark_find_or_create(struct nfsd_file *nf) +nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) { int err; struct fsnotify_mark *mark; struct nfsd_file_mark *nfm = NULL, *new; - struct inode *inode = nf->nf_inode; do { fsnotify_group_lock(nfsd_file_fsnotify_group); @@ -167,31 +283,25 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) } static struct nfsd_file * -nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, - struct net *net) +nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) { struct nfsd_file *nf; nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); if (nf) { - INIT_HLIST_NODE(&nf->nf_node); INIT_LIST_HEAD(&nf->nf_lru); + nf->nf_birthtime = ktime_get(); nf->nf_file = NULL; nf->nf_cred = get_current_cred(); - nf->nf_net = net; + nf->nf_net = key->net; nf->nf_flags = 0; - nf->nf_inode = inode; - nf->nf_hashval = hashval; - refcount_set(&nf->nf_ref, 1); - nf->nf_may = may & NFSD_FILE_MAY_MASK; - if (may & NFSD_MAY_NOT_BREAK_LEASE) { - if (may & NFSD_MAY_WRITE) - __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); - if (may & NFSD_MAY_READ) - __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); - } + __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); + __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); + nf->nf_inode = key->inode; + /* nf_ref is pre-incremented for hash table */ + refcount_set(&nf->nf_ref, 2); + nf->nf_may = key->need; nf->nf_mark = NULL; - trace_nfsd_file_alloc(nf); } return nf; } @@ -199,8 +309,12 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, static bool nfsd_file_free(struct nfsd_file *nf) { + s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); bool flush = false; + this_cpu_inc(nfsd_file_releases); + this_cpu_add(nfsd_file_total_age, age); + trace_nfsd_file_put_final(nf); if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); @@ -210,6 +324,14 @@ nfsd_file_free(struct nfsd_file *nf) fput(nf->nf_file); flush = true; } + + /* + * If this item is still linked via nf_lru, that's a bug. + * WARN and leak it to preserve system stability. + */ + if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) + return flush; + call_rcu(&nf->nf_rcu, nfsd_file_slab_free); return flush; } @@ -240,31 +362,44 @@ nfsd_file_check_write_error(struct nfsd_file *nf) static void nfsd_file_flush(struct nfsd_file *nf) { - if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0) + struct file *file = nf->nf_file; + + if (!file || !(file->f_mode & FMODE_WRITE)) + return; + this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages); + if (vfs_fsync(file, 1) != 0) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); } -static void -nfsd_file_do_unhash(struct nfsd_file *nf) +static void nfsd_file_lru_add(struct nfsd_file *nf) { - lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) + trace_nfsd_file_lru_add(nf); +} +static void nfsd_file_lru_remove(struct nfsd_file *nf) +{ + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) + trace_nfsd_file_lru_del(nf); +} + +static void +nfsd_file_hash_remove(struct nfsd_file *nf) +{ trace_nfsd_file_unhash(nf); if (nfsd_file_check_write_error(nf)) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); - --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; - hlist_del_rcu(&nf->nf_node); - atomic_long_dec(&nfsd_filecache_count); + rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, + nfsd_file_rhash_params); } static bool nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_do_unhash(nf); - if (!list_empty(&nf->nf_lru)) - list_lru_del(&nfsd_file_lru, &nf->nf_lru); + nfsd_file_hash_remove(nf); return true; } return false; @@ -274,17 +409,16 @@ nfsd_file_unhash(struct nfsd_file *nf) * Return true if the file was unhashed. */ static bool -nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) +nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose) { - lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - - trace_nfsd_file_unhash_and_release_locked(nf); + trace_nfsd_file_unhash_and_dispose(nf); if (!nfsd_file_unhash(nf)) return false; /* keep final reference for nfsd_file_lru_dispose */ if (refcount_dec_not_one(&nf->nf_ref)) return true; + nfsd_file_lru_remove(nf); list_add(&nf->nf_lru, dispose); return true; } @@ -296,6 +430,7 @@ nfsd_file_put_noref(struct nfsd_file *nf) if (refcount_dec_and_test(&nf->nf_ref)) { WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); + nfsd_file_lru_remove(nf); nfsd_file_free(nf); } } @@ -305,7 +440,7 @@ nfsd_file_put(struct nfsd_file *nf) { might_sleep(); - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + nfsd_file_lru_add(nf); if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { nfsd_file_flush(nf); nfsd_file_put_noref(nf); @@ -314,9 +449,24 @@ nfsd_file_put(struct nfsd_file *nf) nfsd_file_schedule_laundrette(); } else nfsd_file_put_noref(nf); +} - if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) - nfsd_file_gc(); +/** + * nfsd_file_close - Close an nfsd_file + * @nf: nfsd_file to close + * + * If this is the final reference for @nf, free it immediately. + * This reflects an on-the-wire CLOSE or DELEGRETURN into the + * VFS and exported filesystem. + */ +void nfsd_file_close(struct nfsd_file *nf) +{ + nfsd_file_put(nf); + if (refcount_dec_if_one(&nf->nf_ref)) { + nfsd_file_unhash(nf); + nfsd_file_lru_remove(nf); + nfsd_file_free(nf); + } } struct nfsd_file * @@ -334,7 +484,7 @@ nfsd_file_dispose_list(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del(&nf->nf_lru); + list_del_init(&nf->nf_lru); nfsd_file_flush(nf); nfsd_file_put_noref(nf); } @@ -348,7 +498,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del(&nf->nf_lru); + list_del_init(&nf->nf_lru); nfsd_file_flush(nf); if (!refcount_dec_and_test(&nf->nf_ref)) continue; @@ -405,8 +555,19 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) } } -/* +/** + * nfsd_file_lru_cb - Examine an entry on the LRU list + * @item: LRU entry to examine + * @lru: controlling LRU + * @lock: LRU list lock (unused) + * @arg: dispose list + * * Note this can deadlock with nfsd_file_cache_purge. + * + * Return values: + * %LRU_REMOVED: @item was removed from the LRU + * %LRU_ROTATE: @item is to be moved to the LRU tail + * %LRU_SKIP: @item cannot be evicted */ static enum lru_status nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, @@ -427,55 +588,65 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, * counter. Here we check the counter and then test and clear the flag. * That order is deliberate to ensure that we can do this locklessly. */ - if (refcount_read(&nf->nf_ref) > 1) - goto out_skip; + if (refcount_read(&nf->nf_ref) > 1) { + list_lru_isolate(lru, &nf->nf_lru); + trace_nfsd_file_gc_in_use(nf); + return LRU_REMOVED; + } /* * Don't throw out files that are still undergoing I/O or * that have uncleared errors pending. */ - if (nfsd_file_check_writeback(nf)) - goto out_skip; + if (nfsd_file_check_writeback(nf)) { + trace_nfsd_file_gc_writeback(nf); + return LRU_SKIP; + } - if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) - goto out_skip; + if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { + trace_nfsd_file_gc_referenced(nf); + return LRU_ROTATE; + } - if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) - goto out_skip; + if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + trace_nfsd_file_gc_hashed(nf); + return LRU_SKIP; + } list_lru_isolate_move(lru, &nf->nf_lru, head); + this_cpu_inc(nfsd_file_evictions); + trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; -out_skip: - return LRU_SKIP; } -static unsigned long -nfsd_file_lru_walk_list(struct shrink_control *sc) +/* + * Unhash items on @dispose immediately, then queue them on the + * disposal workqueue to finish releasing them in the background. + * + * cel: Note that between the time list_lru_shrink_walk runs and + * now, these items are in the hash table but marked unhashed. + * Why release these outside of lru_cb ? There's no lock ordering + * problem since lru_cb currently takes no lock. + */ +static void nfsd_file_gc_dispose_list(struct list_head *dispose) { - LIST_HEAD(head); struct nfsd_file *nf; - unsigned long ret; - if (sc) - ret = list_lru_shrink_walk(&nfsd_file_lru, sc, - nfsd_file_lru_cb, &head); - else - ret = list_lru_walk(&nfsd_file_lru, - nfsd_file_lru_cb, - &head, LONG_MAX); - list_for_each_entry(nf, &head, nf_lru) { - spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - nfsd_file_do_unhash(nf); - spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - } - nfsd_file_dispose_list_delayed(&head); - return ret; + list_for_each_entry(nf, dispose, nf_lru) + nfsd_file_hash_remove(nf); + nfsd_file_dispose_list_delayed(dispose); } static void nfsd_file_gc(void) { - nfsd_file_lru_walk_list(NULL); + LIST_HEAD(dispose); + unsigned long ret; + + ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, + &dispose, list_lru_count(&nfsd_file_lru)); + trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); + nfsd_file_gc_dispose_list(&dispose); } static void @@ -494,7 +665,14 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) static unsigned long nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) { - return nfsd_file_lru_walk_list(sc); + LIST_HEAD(dispose); + unsigned long ret; + + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, + nfsd_file_lru_cb, &dispose); + trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); + nfsd_file_gc_dispose_list(&dispose); + return ret; } static struct shrinker nfsd_file_shrinker = { @@ -503,39 +681,47 @@ static struct shrinker nfsd_file_shrinker = { .seeks = 1, }; -static void -__nfsd_file_close_inode(struct inode *inode, unsigned int hashval, - struct list_head *dispose) +/* + * Find all cache items across all net namespaces that match @inode and + * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). + */ +static unsigned int +__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) { - struct nfsd_file *nf; - struct hlist_node *tmp; + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_INODE, + .inode = inode, + }; + unsigned int count = 0; + struct nfsd_file *nf; - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { - if (inode == nf->nf_inode) - nfsd_file_unhash_and_release_locked(nf, dispose); - } - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + rcu_read_lock(); + do { + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params); + if (!nf) + break; + nfsd_file_unhash_and_dispose(nf, dispose); + count++; + } while (1); + rcu_read_unlock(); + return count; } /** * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file * @inode: inode of the file to attempt to remove * - * Walk the whole hash bucket, looking for any files that correspond to "inode". - * If any do, then unhash them and put the hashtable reference to them and - * destroy any that had their last reference put. Also ensure that any of the - * fputs also have their final __fput done as well. + * Unhash and put, then flush and fput all cache items associated with @inode. */ void nfsd_file_close_inode_sync(struct inode *inode) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); LIST_HEAD(dispose); + unsigned int count; - __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); + count = __nfsd_file_close_inode(inode, &dispose); + trace_nfsd_file_close_inode_sync(inode, count); nfsd_file_dispose_list_sync(&dispose); } @@ -543,19 +729,16 @@ nfsd_file_close_inode_sync(struct inode *inode) * nfsd_file_close_inode - attempt a delayed close of a nfsd_file * @inode: inode of the file to attempt to remove * - * Walk the whole hash bucket, looking for any files that correspond to "inode". - * If any do, then unhash them and put the hashtable reference to them and - * destroy any that had their last reference put. + * Unhash and put all cache item associated with @inode. */ static void nfsd_file_close_inode(struct inode *inode) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); LIST_HEAD(dispose); + unsigned int count; - __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); + count = __nfsd_file_close_inode(inode, &dispose); + trace_nfsd_file_close_inode(inode, count); nfsd_file_dispose_list_delayed(&dispose); } @@ -630,25 +813,21 @@ static const struct fsnotify_ops nfsd_file_fsnotify_ops = { int nfsd_file_cache_init(void) { - int ret = -ENOMEM; - unsigned int i; + int ret; - clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); - - if (nfsd_file_hashtbl) + lockdep_assert_held(&nfsd_mutex); + if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) return 0; + ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); + if (ret) + return ret; + + ret = -ENOMEM; nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); if (!nfsd_filecache_wq) goto out; - nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, - sizeof(*nfsd_file_hashtbl), GFP_KERNEL); - if (!nfsd_file_hashtbl) { - pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); - goto out_err; - } - nfsd_file_slab = kmem_cache_create("nfsd_file", sizeof(struct nfsd_file), 0, 0, NULL); if (!nfsd_file_slab) { @@ -692,11 +871,6 @@ nfsd_file_cache_init(void) goto out_notifier; } - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); - spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); - } - INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); out: return ret; @@ -711,46 +885,47 @@ out_err: nfsd_file_slab = NULL; kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kvfree(nfsd_file_hashtbl); - nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; + rhashtable_destroy(&nfsd_file_rhash_tbl); goto out; } /* * Note this can deadlock with nfsd_file_lru_cb. */ -void -nfsd_file_cache_purge(struct net *net) +static void +__nfsd_file_cache_purge(struct net *net) { - unsigned int i; - struct nfsd_file *nf; - struct hlist_node *next; + struct rhashtable_iter iter; + struct nfsd_file *nf; LIST_HEAD(dispose); bool del; - if (!nfsd_file_hashtbl) - return; + rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); + do { + rhashtable_walk_start(&iter); - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; - - spin_lock(&nfb->nfb_lock); - hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { + nf = rhashtable_walk_next(&iter); + while (!IS_ERR_OR_NULL(nf)) { if (net && nf->nf_net != net) continue; - del = nfsd_file_unhash_and_release_locked(nf, &dispose); + del = nfsd_file_unhash_and_dispose(nf, &dispose); /* * Deadlock detected! Something marked this entry as * unhased, but hasn't removed it from the hash list. */ WARN_ON_ONCE(!del); + + nf = rhashtable_walk_next(&iter); } - spin_unlock(&nfb->nfb_lock); - nfsd_file_dispose_list(&dispose); - } + + rhashtable_walk_stop(&iter); + } while (nf == ERR_PTR(-EAGAIN)); + rhashtable_walk_exit(&iter); + + nfsd_file_dispose_list(&dispose); } static struct nfsd_fcache_disposal * @@ -793,6 +968,19 @@ nfsd_file_cache_start_net(struct net *net) return nn->fcache_disposal ? 0 : -ENOMEM; } +/** + * nfsd_file_cache_purge - Remove all cache items associated with @net + * @net: target net namespace + * + */ +void +nfsd_file_cache_purge(struct net *net) +{ + lockdep_assert_held(&nfsd_mutex); + if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) + __nfsd_file_cache_purge(net); +} + void nfsd_file_cache_shutdown_net(struct net *net) { @@ -803,7 +991,11 @@ nfsd_file_cache_shutdown_net(struct net *net) void nfsd_file_cache_shutdown(void) { - set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); + int i; + + lockdep_assert_held(&nfsd_mutex); + if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) + return; lease_unregister_notifier(&nfsd_file_lease_notifier); unregister_shrinker(&nfsd_file_shrinker); @@ -812,7 +1004,7 @@ nfsd_file_cache_shutdown(void) * calling nfsd_file_cache_purge */ cancel_delayed_work_sync(&nfsd_filecache_laundrette); - nfsd_file_cache_purge(NULL); + __nfsd_file_cache_purge(NULL); list_lru_destroy(&nfsd_file_lru); rcu_barrier(); fsnotify_put_group(nfsd_file_fsnotify_group); @@ -822,124 +1014,96 @@ nfsd_file_cache_shutdown(void) fsnotify_wait_marks_destroyed(); kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kvfree(nfsd_file_hashtbl); - nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; -} + rhashtable_destroy(&nfsd_file_rhash_tbl); -static bool -nfsd_match_cred(const struct cred *c1, const struct cred *c2) -{ - int i; - - if (!uid_eq(c1->fsuid, c2->fsuid)) - return false; - if (!gid_eq(c1->fsgid, c2->fsgid)) - return false; - if (c1->group_info == NULL || c2->group_info == NULL) - return c1->group_info == c2->group_info; - if (c1->group_info->ngroups != c2->group_info->ngroups) - return false; - for (i = 0; i < c1->group_info->ngroups; i++) { - if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) - return false; + for_each_possible_cpu(i) { + per_cpu(nfsd_file_cache_hits, i) = 0; + per_cpu(nfsd_file_acquisitions, i) = 0; + per_cpu(nfsd_file_releases, i) = 0; + per_cpu(nfsd_file_total_age, i) = 0; + per_cpu(nfsd_file_pages_flushed, i) = 0; + per_cpu(nfsd_file_evictions, i) = 0; } - return true; -} - -static struct nfsd_file * -nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, - unsigned int hashval, struct net *net) -{ - struct nfsd_file *nf; - unsigned char need = may_flags & NFSD_FILE_MAY_MASK; - - hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, - nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { - if (nf->nf_may != need) - continue; - if (nf->nf_inode != inode) - continue; - if (nf->nf_net != net) - continue; - if (!nfsd_match_cred(nf->nf_cred, current_cred())) - continue; - if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) - continue; - if (nfsd_file_get(nf) != NULL) - return nf; - } - return NULL; } /** - * nfsd_file_is_cached - are there any cached open files for this fh? - * @inode: inode of the file to check + * nfsd_file_is_cached - are there any cached open files for this inode? + * @inode: inode to check * - * Scan the hashtable for open files that match this fh. Returns true if there - * are any, and false if not. + * The lookup matches inodes in all net namespaces and is atomic wrt + * nfsd_file_acquire(). + * + * Return values: + * %true: filecache contains at least one file matching this inode + * %false: filecache contains no files matching this inode */ bool nfsd_file_is_cached(struct inode *inode) { - bool ret = false; - struct nfsd_file *nf; - unsigned int hashval; + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_INODE, + .inode = inode, + }; + bool ret = false; - hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); - - rcu_read_lock(); - hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, - nf_node) { - if (inode == nf->nf_inode) { - ret = true; - break; - } - } - rcu_read_unlock(); - trace_nfsd_file_is_cached(inode, hashval, (int)ret); + if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params) != NULL) + ret = true; + trace_nfsd_file_is_cached(inode, (int)ret); return ret; } static __be32 -nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, +nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf, bool open) { - __be32 status; - struct net *net = SVC_NET(rqstp); + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_FULL, + .need = may_flags & NFSD_FILE_MAY_MASK, + .net = SVC_NET(rqstp), + }; struct nfsd_file *nf, *new; - struct inode *inode; - unsigned int hashval; bool retry = true; + __be32 status; - /* FIXME: skip this if fh_dentry is already set? */ status = fh_verify(rqstp, fhp, S_IFREG, may_flags|NFSD_MAY_OWNER_OVERRIDE); if (status != nfs_ok) return status; + key.inode = d_inode(fhp->fh_dentry); + key.cred = get_current_cred(); - inode = d_inode(fhp->fh_dentry); - hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); retry: - rcu_read_lock(); - nf = nfsd_file_find_locked(inode, may_flags, hashval, net); - rcu_read_unlock(); + /* Avoid allocation if the item is already in cache */ + nf = rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params); + if (nf) + nf = nfsd_file_get(nf); if (nf) goto wait_for_construction; - new = nfsd_file_alloc(inode, may_flags, hashval, net); + new = nfsd_file_alloc(&key, may_flags); if (!new) { - trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, - NULL, nfserr_jukebox); - return nfserr_jukebox; + status = nfserr_jukebox; + goto out_status; } - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - nf = nfsd_file_find_locked(inode, may_flags, hashval, net); - if (nf == NULL) + nf = rhashtable_lookup_get_insert_key(&nfsd_file_rhash_tbl, + &key, &new->nf_rhash, + nfsd_file_rhash_params); + if (!nf) { + nf = new; goto open_file; - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + } + if (IS_ERR(nf)) + goto insert_err; + nf = nfsd_file_get(nf); + if (nf == NULL) { + nf = new; + goto open_file; + } nfsd_file_slab_free(&new->nf_rcu); wait_for_construction: @@ -947,6 +1111,7 @@ wait_for_construction: /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); if (!retry) { status = nfserr_jukebox; goto out; @@ -956,49 +1121,29 @@ wait_for_construction: goto retry; } + nfsd_file_lru_remove(nf); this_cpu_inc(nfsd_file_cache_hits); - if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { - bool write = (may_flags & NFSD_MAY_WRITE); - - if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || - (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { - status = nfserrno(nfsd_open_break_lease( - file_inode(nf->nf_file), may_flags)); - if (status == nfs_ok) { - clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); - if (write) - clear_bit(NFSD_FILE_BREAK_WRITE, - &nf->nf_flags); - } - } - } + status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); out: if (status == nfs_ok) { + if (open) + this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { nfsd_file_put(nf); nf = NULL; } - trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); +out_status: + put_cred(key.cred); + if (open) + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; -open_file: - nf = new; - /* Take reference for the hashtable */ - refcount_inc(&nf->nf_ref); - __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); - __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); - list_lru_add(&nfsd_file_lru, &nf->nf_lru); - hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); - ++nfsd_file_hashtbl[hashval].nfb_count; - nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, - nfsd_file_hashtbl[hashval].nfb_count); - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) - nfsd_file_gc(); - nf->nf_mark = nfsd_file_mark_find_or_create(nf); +open_file: + trace_nfsd_file_alloc(nf); + nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); if (nf->nf_mark) { if (open) { status = nfsd_open_verified(rqstp, fhp, may_flags, @@ -1012,18 +1157,20 @@ open_file: * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status != nfs_ok || inode->i_nlink == 0) { - bool do_free; - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - do_free = nfsd_file_unhash(nf); - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - if (do_free) + if (status != nfs_ok || key.inode->i_nlink == 0) + if (nfsd_file_unhash(nf)) nfsd_file_put_noref(nf); - } clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); smp_mb__after_atomic(); wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); goto out; + +insert_err: + nfsd_file_slab_free(&new->nf_rcu); + trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, PTR_ERR(nf)); + nf = NULL; + status = nfserr_jukebox; + goto out_status; } /** @@ -1040,7 +1187,7 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true); } /** @@ -1057,7 +1204,7 @@ __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false); } /* @@ -1067,29 +1214,49 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { - unsigned int i, count = 0, longest = 0; - unsigned long hits = 0; + unsigned long releases = 0, pages_flushed = 0, evictions = 0; + unsigned long hits = 0, acquisitions = 0; + unsigned int i, count = 0, buckets = 0; + unsigned long lru = 0, total_age = 0; - /* - * No need for spinlocks here since we're not terribly interested in - * accuracy. We do take the nfsd_mutex simply to ensure that we - * don't end up racing with server shutdown - */ + /* Serialize with server shutdown */ mutex_lock(&nfsd_mutex); - if (nfsd_file_hashtbl) { - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - count += nfsd_file_hashtbl[i].nfb_count; - longest = max(longest, nfsd_file_hashtbl[i].nfb_count); - } + if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { + struct bucket_table *tbl; + struct rhashtable *ht; + + lru = list_lru_count(&nfsd_file_lru); + + rcu_read_lock(); + ht = &nfsd_file_rhash_tbl; + count = atomic_read(&ht->nelems); + tbl = rht_dereference_rcu(ht->tbl, ht); + buckets = tbl->size; + rcu_read_unlock(); } mutex_unlock(&nfsd_mutex); - for_each_possible_cpu(i) + for_each_possible_cpu(i) { hits += per_cpu(nfsd_file_cache_hits, i); + acquisitions += per_cpu(nfsd_file_acquisitions, i); + releases += per_cpu(nfsd_file_releases, i); + total_age += per_cpu(nfsd_file_total_age, i); + evictions += per_cpu(nfsd_file_evictions, i); + pages_flushed += per_cpu(nfsd_file_pages_flushed, i); + } seq_printf(m, "total entries: %u\n", count); - seq_printf(m, "longest chain: %u\n", longest); + seq_printf(m, "hash buckets: %u\n", buckets); + seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); + seq_printf(m, "acquisitions: %lu\n", acquisitions); + seq_printf(m, "releases: %lu\n", releases); + seq_printf(m, "evictions: %lu\n", evictions); + if (releases) + seq_printf(m, "mean age (ms): %ld\n", total_age / releases); + else + seq_printf(m, "mean age (ms): -\n"); + seq_printf(m, "pages flushed: %lu\n", pages_flushed); return 0; } diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 1da0c79a5580..8e8c0c47d67d 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -29,7 +29,7 @@ struct nfsd_file_mark { * never be dereferenced, only used for comparison. */ struct nfsd_file { - struct hlist_node nf_node; + struct rhash_head nf_rhash; struct list_head nf_lru; struct rcu_head nf_rcu; struct file *nf_file; @@ -37,15 +37,13 @@ struct nfsd_file { struct net *nf_net; #define NFSD_FILE_HASHED (0) #define NFSD_FILE_PENDING (1) -#define NFSD_FILE_BREAK_READ (2) -#define NFSD_FILE_BREAK_WRITE (3) -#define NFSD_FILE_REFERENCED (4) +#define NFSD_FILE_REFERENCED (2) unsigned long nf_flags; - struct inode *nf_inode; - unsigned int nf_hashval; + struct inode *nf_inode; /* don't deref */ refcount_t nf_ref; unsigned char nf_may; struct nfsd_file_mark *nf_mark; + ktime_t nf_birthtime; }; int nfsd_file_cache_init(void); @@ -54,6 +52,7 @@ void nfsd_file_cache_shutdown(void); int nfsd_file_cache_start_net(struct net *net); void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); +void nfsd_file_close(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); bool nfsd_file_is_cached(struct inode *inode); diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 1b1a962a1804..ffe17743cc74 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -189,6 +189,9 @@ struct nfsd_net { struct nfsd_fcache_disposal *fcache_disposal; siphash_key_t siphash_key; + + atomic_t nfs4_client_count; + int nfs4_max_clients; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index b5760801d377..9edd3c1a30fb 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -111,7 +111,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_errno; - fh_lock(fh); + inode_lock(inode); error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, argp->acl_access); @@ -122,7 +122,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_drop_lock; - fh_unlock(fh); + inode_unlock(inode); fh_drop_write(fh); @@ -136,7 +136,7 @@ out: return rpc_success; out_drop_lock: - fh_unlock(fh); + inode_unlock(inode); fh_drop_write(fh); out_errno: resp->status = nfserrno(error); diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 35b2ebda14da..9446c6743664 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -101,7 +101,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_errno; - fh_lock(fh); + inode_lock(inode); error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, argp->acl_access); @@ -111,7 +111,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) argp->acl_default); out_drop_lock: - fh_unlock(fh); + inode_unlock(inode); fh_drop_write(fh); out_errno: resp->status = nfserrno(error); diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 981a3a7a6e16..a41cca619338 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -67,12 +67,15 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp) { struct nfsd3_sattrargs *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; dprintk("nfsd: SETATTR(3) %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_setattr(rqstp, &resp->fh, &argp->attrs, + resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, argp->check_guard, argp->guardtime); return rpc_success; } @@ -233,6 +236,9 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, { struct iattr *iap = &argp->attrs; struct dentry *parent, *child; + struct nfsd_attrs attrs = { + .na_iattr = iap, + }; __u32 v_mtime, v_atime; struct inode *inode; __be32 status; @@ -254,7 +260,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err) return nfserrno(host_err); - fh_lock_nested(fhp, I_MUTEX_PARENT); + inode_lock_nested(inode, I_MUTEX_PARENT); child = lookup_one_len(argp->name, parent, argp->len); if (IS_ERR(child)) { @@ -312,11 +318,13 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!IS_POSIXACL(inode)) iap->ia_mode &= ~current_umask(); + fh_fill_pre_attrs(fhp); host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true); if (host_err < 0) { status = nfserrno(host_err); goto out; } + fh_fill_post_attrs(fhp); /* A newly created file already has a file size of zero. */ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) @@ -331,10 +339,10 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, } set_attr: - status = nfsd_create_setattr(rqstp, fhp, resfhp, iap); + status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); out: - fh_unlock(fhp); + inode_unlock(inode); if (child && !IS_ERR(child)) dput(child); fh_drop_write(fhp); @@ -368,6 +376,9 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) { struct nfsd3_createargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; dprintk("nfsd: MKDIR(3) %s %.*s\n", SVCFH_fmt(&argp->fh), @@ -378,8 +389,7 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, - &argp->attrs, S_IFDIR, 0, &resp->fh); - fh_unlock(&resp->dirfh); + &attrs, S_IFDIR, 0, &resp->fh); return rpc_success; } @@ -388,6 +398,9 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) { struct nfsd3_symlinkargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; if (argp->tlen == 0) { resp->status = nfserr_inval; @@ -414,7 +427,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) fh_copy(&resp->dirfh, &argp->ffh); fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, - argp->flen, argp->tname, &resp->fh); + argp->flen, argp->tname, &attrs, &resp->fh); kfree(argp->tname); out: return rpc_success; @@ -428,6 +441,9 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) { struct nfsd3_mknodargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; int type; dev_t rdev = 0; @@ -453,8 +469,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) type = nfs3_ftypes[argp->ftype]; resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, - &argp->attrs, type, rdev, &resp->fh); - fh_unlock(&resp->dirfh); + &attrs, type, rdev, &resp->fh); out: return rpc_success; } @@ -477,7 +492,6 @@ nfsd3_proc_remove(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); - fh_unlock(&resp->fh); return rpc_success; } @@ -498,7 +512,6 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); - fh_unlock(&resp->fh); return rpc_success; } diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index eaa3a0cf38f1..bb8e2f6d7d03 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -751,58 +751,26 @@ out_estate: return ret; } -__be32 -nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl) +__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl, + struct nfsd_attrs *attr) { - __be32 error; int host_error; - struct dentry *dentry; - struct inode *inode; - struct posix_acl *pacl = NULL, *dpacl = NULL; unsigned int flags = 0; - /* Get inode */ - error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); - if (error) - return error; + if (!acl) + return nfs_ok; - dentry = fhp->fh_dentry; - inode = d_inode(dentry); - - if (S_ISDIR(inode->i_mode)) + if (type == NF4DIR) flags = NFS4_ACL_DIR; - host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); + host_error = nfs4_acl_nfsv4_to_posix(acl, &attr->na_pacl, + &attr->na_dpacl, flags); if (host_error == -EINVAL) return nfserr_attrnotsupp; - if (host_error < 0) - goto out_nfserr; - - fh_lock(fhp); - - host_error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, pacl); - if (host_error < 0) - goto out_drop_lock; - - if (S_ISDIR(inode->i_mode)) { - host_error = set_posix_acl(&init_user_ns, inode, - ACL_TYPE_DEFAULT, dpacl); - } - -out_drop_lock: - fh_unlock(fhp); - - posix_acl_release(pacl); - posix_acl_release(dpacl); -out_nfserr: - if (host_error == -EOPNOTSUPP) - return nfserr_attrnotsupp; else return nfserrno(host_error); } - static short ace2type(struct nfs4_ace *ace) { diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 11f8715d92d6..4ce328209f61 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -679,7 +679,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp, * case NFS4_OK: * write_response4 coa_resok4; * default: - * length4 coa_bytes_copied; + * length4 coa_bytes_copied; * }; * struct CB_OFFLOAD4args { * nfs_fh4 coa_fh; @@ -688,21 +688,22 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp, * }; */ static void encode_offload_info4(struct xdr_stream *xdr, - __be32 nfserr, - const struct nfsd4_copy *cp) + const struct nfsd4_cb_offload *cbo) { __be32 *p; p = xdr_reserve_space(xdr, 4); - *p++ = nfserr; - if (!nfserr) { + *p = cbo->co_nfserr; + switch (cbo->co_nfserr) { + case nfs_ok: p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE); p = xdr_encode_empty_array(p); - p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written); - *p++ = cpu_to_be32(cp->cp_res.wr_stable_how); - p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data, + p = xdr_encode_hyper(p, cbo->co_res.wr_bytes_written); + *p++ = cpu_to_be32(cbo->co_res.wr_stable_how); + p = xdr_encode_opaque_fixed(p, cbo->co_res.wr_verifier.data, NFS4_VERIFIER_SIZE); - } else { + break; + default: p = xdr_reserve_space(xdr, 8); /* We always return success if bytes were written */ p = xdr_encode_hyper(p, 0); @@ -710,18 +711,16 @@ static void encode_offload_info4(struct xdr_stream *xdr, } static void encode_cb_offload4args(struct xdr_stream *xdr, - __be32 nfserr, - const struct knfsd_fh *fh, - const struct nfsd4_copy *cp, + const struct nfsd4_cb_offload *cbo, struct nfs4_cb_compound_hdr *hdr) { __be32 *p; p = xdr_reserve_space(xdr, 4); - *p++ = cpu_to_be32(OP_CB_OFFLOAD); - encode_nfs_fh4(xdr, fh); - encode_stateid4(xdr, &cp->cp_res.cb_stateid); - encode_offload_info4(xdr, nfserr, cp); + *p = cpu_to_be32(OP_CB_OFFLOAD); + encode_nfs_fh4(xdr, &cbo->co_fh); + encode_stateid4(xdr, &cbo->co_res.cb_stateid); + encode_offload_info4(xdr, cbo); hdr->nops++; } @@ -731,8 +730,8 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, const void *data) { const struct nfsd4_callback *cb = data; - const struct nfsd4_copy *cp = - container_of(cb, struct nfsd4_copy, cp_cb); + const struct nfsd4_cb_offload *cbo = + container_of(cb, struct nfsd4_cb_offload, co_cb); struct nfs4_cb_compound_hdr hdr = { .ident = 0, .minorversion = cb->cb_clp->cl_minorversion, @@ -740,7 +739,7 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, encode_cb_compound4args(xdr, &hdr); encode_cb_sequence4args(xdr, cb, &hdr); - encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr); + encode_cb_offload4args(xdr, cbo, &hdr); encode_cb_nops(&hdr); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3895eb52d2b1..a72ab97f77ef 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -64,36 +64,6 @@ MODULE_PARM_DESC(nfsd4_ssc_umount_timeout, "idle msecs before unmount export from source server"); #endif -#ifdef CONFIG_NFSD_V4_SECURITY_LABEL -#include - -static inline void -nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) -{ - struct inode *inode = d_inode(resfh->fh_dentry); - int status; - - inode_lock(inode); - status = security_inode_setsecctx(resfh->fh_dentry, - label->data, label->len); - inode_unlock(inode); - - if (status) - /* - * XXX: We should really fail the whole open, but we may - * already have created a new file, so it may be too - * late. For now this seems the least of evils: - */ - bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; - - return; -} -#else -static inline void -nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) -{ } -#endif - #define NFSDDBG_FACILITY NFSDDBG_PROC static u32 nfsd_attrmask[] = { @@ -158,26 +128,6 @@ is_create_with_attrs(struct nfsd4_open *open) || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1); } -/* - * if error occurs when setting the acl, just clear the acl bit - * in the returned attr bitmap. - */ -static void -do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl, u32 *bmval) -{ - __be32 status; - - status = nfsd4_set_nfs4_acl(rqstp, fhp, acl); - if (status) - /* - * We should probably fail the whole open at this point, - * but we've already created the file, so it's too late; - * So this seems the least of evils: - */ - bmval[0] &= ~FATTR4_WORD0_ACL; -} - static inline void fh_dup2(struct svc_fh *dst, struct svc_fh *src) { @@ -286,6 +236,10 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct svc_fh *resfhp, struct nfsd4_open *open) { struct iattr *iap = &open->op_iattr; + struct nfsd_attrs attrs = { + .na_iattr = iap, + .na_seclabel = &open->op_label, + }; struct dentry *parent, *child; __u32 v_mtime, v_atime; struct inode *inode; @@ -307,7 +261,10 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err) return nfserrno(host_err); - fh_lock_nested(fhp, I_MUTEX_PARENT); + if (is_create_with_attrs(open)) + nfsd4_acl_to_attr(NF4REG, open->op_acl, &attrs); + + inode_lock_nested(inode, I_MUTEX_PARENT); child = lookup_one_len(open->op_fname, parent, open->op_fnamelen); if (IS_ERR(child)) { @@ -345,6 +302,11 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (d_really_is_positive(child)) { status = nfs_ok; + /* NFSv4 protocol requires change attributes even though + * no change happened. + */ + fh_fill_both_attrs(fhp); + switch (open->op_createmode) { case NFS4_CREATE_UNCHECKED: if (!d_is_reg(child)) @@ -386,10 +348,12 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!IS_POSIXACL(inode)) iap->ia_mode &= ~current_umask(); + fh_fill_pre_attrs(fhp); status = nfsd4_vfs_create(fhp, child, open); if (status != nfs_ok) goto out; open->op_created = true; + fh_fill_post_attrs(fhp); /* A newly created file already has a file size of zero. */ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) @@ -404,10 +368,15 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, } set_attr: - status = nfsd_create_setattr(rqstp, fhp, resfhp, iap); + status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); + if (attrs.na_labelerr) + open->op_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (attrs.na_aclerr) + open->op_bmval[0] &= ~FATTR4_WORD0_ACL; out: - fh_unlock(fhp); + inode_unlock(inode); + nfsd_attrs_free(&attrs); if (child && !IS_ERR(child)) dput(child); fh_drop_write(fhp); @@ -447,9 +416,6 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru status = nfsd4_create_file(rqstp, current_fh, *resfh, open); current->fs->umask = 0; - if (!status && open->op_label.len) - nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); - /* * Following rfc 3530 14.2.16, and rfc 5661 18.16.4 * use the returned bitmask to indicate which attributes @@ -458,24 +424,21 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru if (nfsd4_create_is_exclusive(open->op_createmode) && status == 0) open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_MODIFY); - } else - /* - * Note this may exit with the parent still locked. - * We will hold the lock until nfsd4_open's final - * lookup, to prevent renames or unlinks until we've had - * a chance to an acquire a delegation if appropriate. - */ + } else { status = nfsd_lookup(rqstp, current_fh, open->op_fname, open->op_fnamelen, *resfh); + if (!status) + /* NFSv4 protocol requires change attributes even though + * no change happened. + */ + fh_fill_both_attrs(current_fh); + } if (status) goto out; status = nfsd_check_obj_isreg(*resfh); if (status) goto out; - if (is_create_with_attrs(open) && open->op_acl != NULL) - do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval); - nfsd4_set_open_owner_reply_cache(cstate, open, *resfh); accmode = NFSD_MAY_NOP; if (open->op_created || @@ -547,6 +510,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, open->op_openowner); open->op_filp = NULL; + open->op_rqstp = rqstp; /* This check required by spec. */ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) @@ -630,9 +594,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } status = nfsd4_process_open2(rqstp, resfh, open); - WARN(status && open->op_created, - "nfsd4_process_open2 failed to open newly-created file! status=%u\n", - be32_to_cpu(status)); + if (status && open->op_created) + pr_warn("nfsd4_process_open2 failed to open newly-created file: status=%u\n", + be32_to_cpu(status)); if (reclaim && !status) nn->somebody_reclaimed = true; out: @@ -786,6 +750,10 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_create *create = &u->create; + struct nfsd_attrs attrs = { + .na_iattr = &create->cr_iattr, + .na_seclabel = &create->cr_label, + }; struct svc_fh resfh; __be32 status; dev_t rdev; @@ -801,12 +769,13 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; + status = nfsd4_acl_to_attr(create->cr_type, create->cr_acl, &attrs); current->fs->umask = create->cr_umask; switch (create->cr_type) { case NF4LNK: status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - create->cr_data, &resfh); + create->cr_data, &attrs, &resfh); break; case NF4BLK: @@ -817,7 +786,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFBLK, rdev, &resfh); + &attrs, S_IFBLK, rdev, &resfh); break; case NF4CHR: @@ -828,26 +797,26 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr,S_IFCHR, rdev, &resfh); + &attrs, S_IFCHR, rdev, &resfh); break; case NF4SOCK: status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFSOCK, 0, &resfh); + &attrs, S_IFSOCK, 0, &resfh); break; case NF4FIFO: status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFIFO, 0, &resfh); + &attrs, S_IFIFO, 0, &resfh); break; case NF4DIR: create->cr_iattr.ia_valid &= ~ATTR_SIZE; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFDIR, 0, &resfh); + &attrs, S_IFDIR, 0, &resfh); break; default: @@ -857,20 +826,17 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - if (create->cr_label.len) - nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); - - if (create->cr_acl != NULL) - do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, - create->cr_bmval); - - fh_unlock(&cstate->current_fh); + if (attrs.na_labelerr) + create->cr_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (attrs.na_aclerr) + create->cr_bmval[0] &= ~FATTR4_WORD0_ACL; set_change_info(&create->cr_cinfo, &cstate->current_fh); fh_dup2(&cstate->current_fh, &resfh); out: fh_put(&resfh); out_umask: current->fs->umask = 0; + nfsd_attrs_free(&attrs); return status; } @@ -1043,10 +1009,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); - if (!status) { - fh_unlock(&cstate->current_fh); + if (!status) set_change_info(&remove->rm_cinfo, &cstate->current_fh); - } return status; } @@ -1086,7 +1050,6 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &exp, &dentry); if (err) return err; - fh_unlock(&cstate->current_fh); if (d_really_is_negative(dentry)) { exp_put(exp); err = nfserr_noent; @@ -1141,6 +1104,11 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_setattr *setattr = &u->setattr; + struct nfsd_attrs attrs = { + .na_iattr = &setattr->sa_iattr, + .na_seclabel = &setattr->sa_label, + }; + struct inode *inode; __be32 status = nfs_ok; int err; @@ -1163,19 +1131,18 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - if (setattr->sa_acl != NULL) - status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, - setattr->sa_acl); + inode = cstate->current_fh.fh_dentry->d_inode; + status = nfsd4_acl_to_attr(S_ISDIR(inode->i_mode) ? NF4DIR : NF4REG, + setattr->sa_acl, &attrs); + if (status) goto out; - if (setattr->sa_label.len) - status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, - &setattr->sa_label); - if (status) - goto out; - status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, + status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, 0, (time64_t)0); + if (!status) + status = nfserrno(attrs.na_labelerr); out: + nfsd_attrs_free(&attrs); fh_drop_write(&cstate->current_fh); return status; } @@ -1285,30 +1252,17 @@ out: return status; } -void nfs4_put_copy(struct nfsd4_copy *copy) +static void nfs4_put_copy(struct nfsd4_copy *copy) { if (!refcount_dec_and_test(©->refcount)) return; + kfree(copy->cp_src); kfree(copy); } -static bool -check_and_set_stop_copy(struct nfsd4_copy *copy) -{ - bool value; - - spin_lock(©->cp_clp->async_lock); - value = copy->stopped; - if (!copy->stopped) - copy->stopped = true; - spin_unlock(©->cp_clp->async_lock); - return value; -} - static void nfsd4_stop_copy(struct nfsd4_copy *copy) { - /* only 1 thread should stop the copy */ - if (!check_and_set_stop_copy(copy)) + if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) kthread_stop(copy->copy_task); nfs4_put_copy(copy); } @@ -1389,7 +1343,7 @@ try_again: return 0; } if (work) { - strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr)); + strlcpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1); refcount_set(&work->nsui_refcnt, 2); work->nsui_busy = true; list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list); @@ -1549,7 +1503,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, if (status) goto out; - status = nfsd4_interssc_connect(©->cp_src, rqstp, mount); + status = nfsd4_interssc_connect(copy->cp_src, rqstp, mount); if (status) goto out; @@ -1567,7 +1521,7 @@ out: } static void -nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp, struct nfsd_file *dst) { bool found = false; @@ -1576,9 +1530,9 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id); - nfs42_ssc_close(src->nf_file); + nfs42_ssc_close(filp); nfsd_file_put(dst); - fput(src->nf_file); + fput(filp); if (!nn) { mntput(ss_mnt); @@ -1621,7 +1575,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, } static void -nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp, struct nfsd_file *dst) { } @@ -1658,9 +1612,10 @@ nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst) static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { - struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb); + struct nfsd4_cb_offload *cbo = + container_of(cb, struct nfsd4_cb_offload, co_cb); - nfs4_put_copy(copy); + kfree(cbo); } static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, @@ -1677,15 +1632,16 @@ static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = { static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) { copy->cp_res.wr_stable_how = - copy->committed ? NFS_FILE_SYNC : NFS_UNSTABLE; - copy->cp_synchronous = sync; + test_bit(NFSD4_COPY_F_COMMITTED, ©->cp_flags) ? + NFS_FILE_SYNC : NFS_UNSTABLE; + nfsd4_copy_set_sync(copy, sync); gen_boot_verifier(©->cp_res.wr_verifier, copy->cp_clp->net); } -static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) +static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy, + struct file *dst, + struct file *src) { - struct file *dst = copy->nf_dst->nf_file; - struct file *src = copy->nf_src->nf_file; errseq_t since; ssize_t bytes_copied = 0; u64 bytes_total = copy->cp_count; @@ -1707,26 +1663,29 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) copy->cp_res.wr_bytes_written += bytes_copied; src_pos += bytes_copied; dst_pos += bytes_copied; - } while (bytes_total > 0 && !copy->cp_synchronous); + } while (bytes_total > 0 && nfsd4_copy_is_async(copy)); /* for a non-zero asynchronous copy do a commit of data */ - if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) { + if (nfsd4_copy_is_async(copy) && copy->cp_res.wr_bytes_written > 0) { since = READ_ONCE(dst->f_wb_err); status = vfs_fsync_range(dst, copy->cp_dst_pos, copy->cp_res.wr_bytes_written, 0); if (!status) status = filemap_check_wb_err(dst->f_mapping, since); if (!status) - copy->committed = true; + set_bit(NFSD4_COPY_F_COMMITTED, ©->cp_flags); } return bytes_copied; } -static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) +static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, + struct file *src, struct file *dst, + bool sync) { __be32 status; ssize_t bytes; - bytes = _nfsd_copy_file_range(copy); + bytes = _nfsd_copy_file_range(copy, dst, src); + /* for async copy, we ignore the error, client can always retry * to get the error */ @@ -1736,13 +1695,6 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) nfsd4_init_copy_res(copy, sync); status = nfs_ok; } - - if (!copy->cp_intra) /* Inter server SSC */ - nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src, - copy->nf_dst); - else - nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); - return status; } @@ -1751,17 +1703,17 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) dst->cp_src_pos = src->cp_src_pos; dst->cp_dst_pos = src->cp_dst_pos; dst->cp_count = src->cp_count; - dst->cp_synchronous = src->cp_synchronous; + dst->cp_flags = src->cp_flags; memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res)); memcpy(&dst->fh, &src->fh, sizeof(src->fh)); dst->cp_clp = src->cp_clp; dst->nf_dst = nfsd_file_get(src->nf_dst); - dst->cp_intra = src->cp_intra; - if (src->cp_intra) /* for inter, file_src doesn't exist yet */ + /* for inter, nf_src doesn't exist yet */ + if (!nfsd4_ssc_is_inter(src)) dst->nf_src = nfsd_file_get(src->nf_src); memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); - memcpy(&dst->cp_src, &src->cp_src, sizeof(struct nl4_server)); + memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server)); memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid)); memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh)); dst->ss_mnt = src->ss_mnt; @@ -1771,7 +1723,7 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) { nfs4_free_copy_state(copy); nfsd_file_put(copy->nf_dst); - if (copy->cp_intra) + if (!nfsd4_ssc_is_inter(copy)) nfsd_file_put(copy->nf_src); spin_lock(©->cp_clp->async_lock); list_del(©->copies); @@ -1779,45 +1731,58 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) nfs4_put_copy(copy); } +static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr) +{ + struct nfsd4_cb_offload *cbo; + + cbo = kzalloc(sizeof(*cbo), GFP_KERNEL); + if (!cbo) + return; + + memcpy(&cbo->co_res, ©->cp_res, sizeof(copy->cp_res)); + memcpy(&cbo->co_fh, ©->fh, sizeof(copy->fh)); + cbo->co_nfserr = nfserr; + + nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops, + NFSPROC4_CLNT_CB_OFFLOAD); + trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid, + &cbo->co_fh, copy->cp_count, nfserr); + nfsd4_run_cb(&cbo->co_cb); +} + +/** + * nfsd4_do_async_copy - kthread function for background server-side COPY + * @data: arguments for COPY operation + * + * Return values: + * %0: Copy operation is done. + */ static int nfsd4_do_async_copy(void *data) { struct nfsd4_copy *copy = (struct nfsd4_copy *)data; - struct nfsd4_copy *cb_copy; + __be32 nfserr; - if (!copy->cp_intra) { /* Inter server SSC */ - copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL); - if (!copy->nf_src) { - copy->nfserr = nfserr_serverfault; - nfsd4_interssc_disconnect(copy->ss_mnt); - goto do_callback; - } - copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, - ©->stateid); - if (IS_ERR(copy->nf_src->nf_file)) { - copy->nfserr = nfserr_offload_denied; + if (nfsd4_ssc_is_inter(copy)) { + struct file *filp; + + filp = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, + ©->stateid); + if (IS_ERR(filp)) { + nfserr = nfserr_offload_denied; nfsd4_interssc_disconnect(copy->ss_mnt); goto do_callback; } + nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, + false); + nfsd4_cleanup_inter_ssc(copy->ss_mnt, filp, copy->nf_dst); + } else { + nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, + copy->nf_dst->nf_file, false); + nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); } - copy->nfserr = nfsd4_do_copy(copy, 0); do_callback: - cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); - if (!cb_copy) - goto out; - refcount_set(&cb_copy->refcount, 1); - memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res)); - cb_copy->cp_clp = copy->cp_clp; - cb_copy->nfserr = copy->nfserr; - memcpy(&cb_copy->fh, ©->fh, sizeof(copy->fh)); - nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp, - &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); - trace_nfsd_cb_offload(copy->cp_clp, ©->cp_res.cb_stateid, - ©->fh, copy->cp_count, copy->nfserr); - nfsd4_run_cb(&cb_copy->cp_cb); -out: - if (!copy->cp_intra) - kfree(copy->nf_src); + nfsd4_send_cb_offload(copy, nfserr); cleanup_async_copy(copy); return 0; } @@ -1830,8 +1795,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd4_copy *async_copy = NULL; - if (!copy->cp_intra) { /* Inter server SSC */ - if (!inter_copy_offload_enable || copy->cp_synchronous) { + if (nfsd4_ssc_is_inter(copy)) { + if (!inter_copy_offload_enable || nfsd4_copy_is_sync(copy)) { status = nfserr_notsupp; goto out; } @@ -1848,13 +1813,16 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, copy->cp_clp = cstate->clp; memcpy(©->fh, &cstate->current_fh.fh_handle, sizeof(struct knfsd_fh)); - if (!copy->cp_synchronous) { + if (nfsd4_copy_is_async(copy)) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); status = nfserrno(-ENOMEM); async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) goto out_err; + async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL); + if (!async_copy->cp_src) + goto out_err; if (!nfs4_init_copy_state(nn, copy)) goto out_err; refcount_set(&async_copy->refcount, 1); @@ -1872,7 +1840,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, wake_up_process(async_copy->copy_task); status = nfs_ok; } else { - status = nfsd4_do_copy(copy, 1); + status = nfsd4_do_copy(copy, copy->nf_src->nf_file, + copy->nf_dst->nf_file, true); + nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); } out: return status; @@ -1880,7 +1850,7 @@ out_err: if (async_copy) cleanup_async_copy(async_copy); status = nfserrno(-ENOMEM); - if (!copy->cp_intra) + if (nfsd4_ssc_is_inter(copy)) nfsd4_interssc_disconnect(copy->ss_mnt); goto out; } @@ -1953,9 +1923,9 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* For now, only return one server address in cpn_src, the * address used by the client to connect to this server. */ - cn->cpn_src.nl4_type = NL4_NETADDR; + cn->cpn_src->nl4_type = NL4_NETADDR; status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr, - &cn->cpn_src.u.nl4_addr); + &cn->cpn_src->u.nl4_addr); WARN_ON_ONCE(status); if (status) { nfs4_put_cpntf_state(nn, cps); @@ -2609,7 +2579,7 @@ check_if_stalefh_allowed(struct nfsd4_compoundargs *args) return; } putfh = (struct nfsd4_putfh *)&saved_op->u; - if (!copy->cp_intra) + if (nfsd4_ssc_is_inter(copy)) putfh->no_verify = true; } } @@ -2711,7 +2681,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) { /* * Don't execute this op if we couldn't encode a - * succesful reply: + * successful reply: */ u32 plen = op->opdesc->op_rsize_bop(rqstp, op); /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9409a0dc1b76..c5d199d7e6b4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -820,9 +820,9 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) swap(f2, fp->fi_fds[O_RDWR]); spin_unlock(&fp->fi_lock); if (f1) - nfsd_file_put(f1); + nfsd_file_close(f1); if (f2) - nfsd_file_put(f2); + nfsd_file_close(f2); } } @@ -1131,7 +1131,6 @@ static void block_delegations(struct knfsd_fh *fh) static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, - struct svc_fh *current_fh, struct nfs4_clnt_odstate *odstate) { struct nfs4_delegation *dp; @@ -1141,7 +1140,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, n = atomic_long_inc_return(&num_delegations); if (n < 0 || n > max_delegations) goto out_dec; - if (delegation_blocked(¤t_fh->fh_handle)) + if (delegation_blocked(&fp->fi_fhandle)) goto out_dec; dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg)); if (dp == NULL) @@ -2053,11 +2052,16 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) * This type of memory management is somewhat inefficient, but we use it * anyway since SETCLIENTID is not a common operation. */ -static struct nfs4_client *alloc_client(struct xdr_netobj name) +static struct nfs4_client *alloc_client(struct xdr_netobj name, + struct nfsd_net *nn) { struct nfs4_client *clp; int i; + if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) { + mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); + return NULL; + } clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; @@ -2076,6 +2080,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) atomic_set(&clp->cl_rpc_users, 0); clp->cl_cb_state = NFSD4_CB_UNKNOWN; clp->cl_state = NFSD4_ACTIVE; + atomic_inc(&nn->nfs4_client_count); atomic_set(&clp->cl_delegs_in_recall, 0); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_openowners); @@ -2183,6 +2188,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) static void __destroy_client(struct nfs4_client *clp) { + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); int i; struct nfs4_openowner *oo; struct nfs4_delegation *dp; @@ -2226,6 +2232,7 @@ __destroy_client(struct nfs4_client *clp) nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); + atomic_add_unless(&nn->nfs4_client_count, -1, 0); free_client(clp); wake_up_all(&expiry_wq); } @@ -2564,7 +2571,7 @@ static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f) static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f) { - struct inode *inode = f->nf_inode; + struct inode *inode = file_inode(f->nf_file); seq_printf(s, "superblock: \"%02x:%02x:%ld\"", MAJOR(inode->i_sb->s_dev), @@ -2848,7 +2855,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct dentry *dentries[ARRAY_SIZE(client_files)]; - clp = alloc_client(name); + clp = alloc_client(name, nn); if (clp == NULL) return NULL; @@ -4330,6 +4337,27 @@ out: return -ENOMEM; } +void nfsd4_init_leases_net(struct nfsd_net *nn) +{ + struct sysinfo si; + u64 max_clients; + + nn->nfsd4_lease = 90; /* default lease time */ + nn->nfsd4_grace = 90; + nn->somebody_reclaimed = false; + nn->track_reclaim_completes = false; + nn->clverifier_counter = prandom_u32(); + nn->clientid_base = prandom_u32(); + nn->clientid_counter = nn->clientid_base + 1; + nn->s2s_cp_cl_id = nn->clientid_counter++; + + atomic_set(&nn->nfs4_client_count, 0); + si_meminfo(&si); + max_clients = (u64)si.totalram * si.mem_unit / (1024 * 1024 * 1024); + max_clients *= NFS4_CLIENTS_PER_GB; + nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); +} + static void init_nfs4_replay(struct nfs4_replay *rp) { rp->rp_status = nfserr_serverfault; @@ -5032,11 +5060,14 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, .ia_valid = ATTR_SIZE, .ia_size = 0, }; + struct nfsd_attrs attrs = { + .na_iattr = &iattr, + }; if (!open->op_truncate) return 0; if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) return nfserr_inval; - return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0); + return nfsd_setattr(rqstp, fh, &attrs, 0, (time64_t)0); } static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, @@ -5104,6 +5135,7 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, goto out_put_access; nf->nf_file = open->op_filp; open->op_filp = NULL; + trace_nfsd_file_create(rqstp, access, nf); } spin_lock(&fp->fi_lock); @@ -5259,11 +5291,41 @@ static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, return 0; } +/* + * It's possible that between opening the dentry and setting the delegation, + * that it has been renamed or unlinked. Redo the lookup to verify that this + * hasn't happened. + */ +static int +nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp, + struct svc_fh *parent) +{ + struct svc_export *exp; + struct dentry *child; + __be32 err; + + err = nfsd_lookup_dentry(open->op_rqstp, parent, + open->op_fname, open->op_fnamelen, + &exp, &child); + + if (err) + return -EAGAIN; + + dput(child); + if (child != file_dentry(fp->fi_deleg_file->nf_file)) + return -EAGAIN; + + return 0; +} + static struct nfs4_delegation * -nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, - struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) +nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, + struct svc_fh *parent) { int status = 0; + struct nfs4_client *clp = stp->st_stid.sc_client; + struct nfs4_file *fp = stp->st_stid.sc_file; + struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate; struct nfs4_delegation *dp; struct nfsd_file *nf; struct file_lock *fl; @@ -5305,7 +5367,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, return ERR_PTR(status); status = -ENOMEM; - dp = alloc_init_deleg(clp, fp, fh, odstate); + dp = alloc_init_deleg(clp, fp, odstate); if (!dp) goto out_delegees; @@ -5318,6 +5380,13 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, locks_free_lock(fl); if (status) goto out_clnt_odstate; + + if (parent) { + status = nfsd4_verify_deleg_dentry(open, fp, parent); + if (status) + goto out_unlock; + } + status = nfsd4_check_conflicting_opens(clp, fp); if (status) goto out_unlock; @@ -5373,12 +5442,13 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) * proper support for them. */ static void -nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, - struct nfs4_ol_stateid *stp) +nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, + struct svc_fh *currentfh) { struct nfs4_delegation *dp; struct nfs4_openowner *oo = openowner(stp->st_stateowner); struct nfs4_client *clp = stp->st_stid.sc_client; + struct svc_fh *parent = NULL; int cb_up; int status = 0; @@ -5392,6 +5462,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, goto out_no_deleg; break; case NFS4_OPEN_CLAIM_NULL: + parent = currentfh; + fallthrough; case NFS4_OPEN_CLAIM_FH: /* * Let's not give out any delegations till everyone's @@ -5406,7 +5478,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, default: goto out_no_deleg; } - dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate); + dp = nfs4_set_delegation(open, stp, parent); if (IS_ERR(dp)) goto out_no_deleg; @@ -5538,7 +5610,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ - nfs4_open_delegation(current_fh, open, stp); + nfs4_open_delegation(open, stp, &resp->cstate.current_fh); nodeleg: status = nfs_ok; trace_nfsd_open(&stp->st_stid.sc_stateid); @@ -5792,9 +5864,12 @@ static void nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist, struct laundry_time *lt) { + unsigned int maxreap, reapcnt = 0; struct list_head *pos, *next; struct nfs4_client *clp; + maxreap = (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) ? + NFSD_CLIENT_MAX_TRIM_PER_RUN : 0; INIT_LIST_HEAD(reaplist); spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { @@ -5805,14 +5880,15 @@ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist, break; if (!atomic_read(&clp->cl_rpc_users)) clp->cl_state = NFSD4_COURTESY; - if (!client_has_state(clp) || - ktime_get_boottime_seconds() >= - (clp->cl_time + NFSD_COURTESY_CLIENT_TIMEOUT)) + if (!client_has_state(clp)) goto exp_client; - if (nfs4_anylock_blockers(clp)) { + if (!nfs4_anylock_blockers(clp)) + if (reapcnt >= maxreap) + continue; exp_client: - if (!mark_client_expired_locked(clp)) - list_add(&clp->cl_lru, reaplist); + if (!mark_client_expired_locked(clp)) { + list_add(&clp->cl_lru, reaplist); + reapcnt++; } } spin_unlock(&nn->client_lock); @@ -7321,21 +7397,22 @@ out: static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) { struct nfsd_file *nf; + struct inode *inode; __be32 err; err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); if (err) return err; - fh_lock(fhp); /* to block new leases till after test_lock: */ - err = nfserrno(nfsd_open_break_lease(fhp->fh_dentry->d_inode, - NFSD_MAY_READ)); + inode = fhp->fh_dentry->d_inode; + inode_lock(inode); /* to block new leases till after test_lock: */ + err = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (err) goto out; lock->fl_file = nf->nf_file; err = nfserrno(vfs_test_lock(nf->nf_file, lock)); lock->fl_file = NULL; out: - fh_unlock(fhp); + inode_unlock(inode); nfsd_file_put(nf); return err; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2acea7792bb2..1e9690a061ec 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1810,7 +1810,7 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta for (i = 0; i < test_stateid->ts_num_ids; i++) { stateid = svcxdr_tmpalloc(argp, sizeof(*stateid)); if (!stateid) - return nfserrno(-ENOMEM); /* XXX: not jukebox? */ + return nfserr_jukebox; INIT_LIST_HEAD(&stateid->ts_id_list); list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); status = nfsd4_decode_stateid4(argp, &stateid->ts_id_stateid); @@ -1896,8 +1896,8 @@ static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) { + u32 consecutive, i, count, sync; struct nl4_server *ns_dummy; - u32 consecutive, i, count; __be32 status; status = nfsd4_decode_stateid4(argp, ©->cp_src_stateid); @@ -1915,25 +1915,28 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) /* ca_consecutive: we always do consecutive copies */ if (xdr_stream_decode_u32(argp->xdr, &consecutive) < 0) return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, ©->cp_synchronous) < 0) + if (xdr_stream_decode_bool(argp->xdr, &sync) < 0) return nfserr_bad_xdr; + nfsd4_copy_set_sync(copy, sync); if (xdr_stream_decode_u32(argp->xdr, &count) < 0) return nfserr_bad_xdr; - copy->cp_intra = false; + copy->cp_src = svcxdr_tmpalloc(argp, sizeof(*copy->cp_src)); + if (copy->cp_src == NULL) + return nfserr_jukebox; if (count == 0) { /* intra-server copy */ - copy->cp_intra = true; + __set_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); return nfs_ok; } /* decode all the supplied server addresses but use only the first */ - status = nfsd4_decode_nl4_server(argp, ©->cp_src); + status = nfsd4_decode_nl4_server(argp, copy->cp_src); if (status) return status; ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL); if (ns_dummy == NULL) - return nfserrno(-ENOMEM); /* XXX: jukebox? */ + return nfserr_jukebox; for (i = 0; i < count - 1; i++) { status = nfsd4_decode_nl4_server(argp, ns_dummy); if (status) { @@ -1952,10 +1955,17 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, { __be32 status; + cn->cpn_src = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_src)); + if (cn->cpn_src == NULL) + return nfserr_jukebox; + cn->cpn_dst = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_dst)); + if (cn->cpn_dst == NULL) + return nfserr_jukebox; + status = nfsd4_decode_stateid4(argp, &cn->cpn_src_stateid); if (status) return status; - return nfsd4_decode_nl4_server(argp, &cn->cpn_dst); + return nfsd4_decode_nl4_server(argp, cn->cpn_dst); } static __be32 @@ -2828,10 +2838,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct kstat stat; struct svc_fh *tempfh = NULL; struct kstatfs statfs; - __be32 *p; + __be32 *p, *attrlen_p; int starting_len = xdr->buf->len; int attrlen_offset; - __be32 attrlen; u32 dummy; u64 dummy64; u32 rdattr_err = 0; @@ -2919,10 +2928,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, goto out; attrlen_offset = xdr->buf->len; - p = xdr_reserve_space(xdr, 4); - if (!p) + attrlen_p = xdr_reserve_space(xdr, XDR_UNIT); + if (!attrlen_p) goto out_resource; - p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { u32 supp[3]; @@ -3344,8 +3352,7 @@ out_acl: *p++ = cpu_to_be32(err == 0); } - attrlen = htonl(xdr->buf->len - attrlen_offset - 4); - write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); + *attrlen_p = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT); status = nfs_ok; out: @@ -3882,16 +3889,15 @@ static __be32 nfsd4_encode_splice_read( struct xdr_stream *xdr = resp->xdr; struct xdr_buf *buf = xdr->buf; int status, space_left; - u32 eof; __be32 nfserr; - __be32 *p = xdr->p - 2; /* Make sure there will be room for padding if needed */ if (xdr->end - xdr->p < 1) return nfserr_resource; nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, - file, read->rd_offset, &maxcount, &eof); + file, read->rd_offset, &maxcount, + &read->rd_eof); read->rd_length = maxcount; if (nfserr) goto out_err; @@ -3902,9 +3908,6 @@ static __be32 nfsd4_encode_splice_read( goto out_err; } - *(p++) = htonl(eof); - *(p++) = htonl(maxcount); - buf->page_len = maxcount; buf->len += maxcount; xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1) @@ -3946,11 +3949,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, struct file *file, unsigned long maxcount) { struct xdr_stream *xdr = resp->xdr; - u32 eof; - int starting_len = xdr->buf->len - 8; + unsigned int starting_len = xdr->buf->len; + __be32 zero = xdr_zero; __be32 nfserr; - __be32 tmp; - int pad; read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount); if (read->rd_vlen < 0) @@ -3958,31 +3959,24 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, &maxcount, - &eof); + &read->rd_eof); read->rd_length = maxcount; if (nfserr) return nfserr; - if (svc_encode_result_payload(resp->rqstp, starting_len + 8, maxcount)) + if (svc_encode_result_payload(resp->rqstp, starting_len, maxcount)) return nfserr_io; - xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount)); - - tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); - tmp = htonl(maxcount); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); - - tmp = xdr_zero; - pad = (maxcount&3) ? 4 - (maxcount&3) : 0; - write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, - &tmp, pad); - return 0; + xdr_truncate_encode(xdr, starting_len + xdr_align_size(maxcount)); + write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &zero, + xdr_pad_size(maxcount)); + return nfs_ok; } static __be32 nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_read *read) { + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); unsigned long maxcount; struct xdr_stream *xdr = resp->xdr; struct file *file; @@ -3995,11 +3989,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ if (!p) { - WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); + WARN_ON_ONCE(splice_ok); return nfserr_resource; } - if (resp->xdr->buf->page_len && - test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { + if (resp->xdr->buf->page_len && splice_ok) { WARN_ON_ONCE(1); return nfserr_resource; } @@ -4008,31 +4001,30 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, maxcount = min_t(unsigned long, read->rd_length, (xdr->buf->buflen - xdr->buf->len)); - if (file->f_op->splice_read && - test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) + if (file->f_op->splice_read && splice_ok) nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); else nfserr = nfsd4_encode_readv(resp, read, file, maxcount); - - if (nfserr) + if (nfserr) { xdr_truncate_encode(xdr, starting_len); + return nfserr; + } - return nfserr; + p = xdr_encode_bool(p, read->rd_eof); + *p = cpu_to_be32(read->rd_length); + return nfs_ok; } static __be32 nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) { - int maxcount; - __be32 wire_count; - int zero = 0; + __be32 *p, *maxcount_p, zero = xdr_zero; struct xdr_stream *xdr = resp->xdr; int length_offset = xdr->buf->len; - int status; - __be32 *p; + int maxcount, status; - p = xdr_reserve_space(xdr, 4); - if (!p) + maxcount_p = xdr_reserve_space(xdr, XDR_UNIT); + if (!maxcount_p) return nfserr_resource; maxcount = PAGE_SIZE; @@ -4057,14 +4049,11 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd nfserr = nfserrno(status); goto out_err; } - - wire_count = htonl(maxcount); - write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); - xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4)); - if (maxcount & 3) - write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, - &zero, 4 - (maxcount&3)); - return 0; + *maxcount_p = cpu_to_be32(maxcount); + xdr_truncate_encode(xdr, length_offset + 4 + xdr_align_size(maxcount)); + write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero, + xdr_pad_size(maxcount)); + return nfs_ok; out_err: xdr_truncate_encode(xdr, length_offset); @@ -4715,13 +4704,13 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, __be32 *p; nfserr = nfsd42_encode_write_res(resp, ©->cp_res, - !!copy->cp_synchronous); + nfsd4_copy_is_sync(copy)); if (nfserr) return nfserr; p = xdr_reserve_space(resp->xdr, 4 + 4); *p++ = xdr_one; /* cr_consecutive */ - *p++ = cpu_to_be32(copy->cp_synchronous); + *p = nfsd4_copy_is_sync(copy) ? xdr_one : xdr_zero; return 0; } @@ -4919,7 +4908,8 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = cpu_to_be32(1); - return nfsd42_encode_nl4_server(resp, &cn->cpn_src); + nfserr = nfsd42_encode_nl4_server(resp, cn->cpn_src); + return nfserr; } static __be32 @@ -5373,8 +5363,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) so->so_replay.rp_buf, len); } status: - /* Note that op->status is already in network byte order: */ - write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4); + *p = op->status; } /* diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 0621c2faf242..917fa1892fd2 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -25,6 +25,7 @@ #include "state.h" #include "netns.h" #include "pnfs.h" +#include "filecache.h" /* * We have a single directory with several nodes in it. @@ -45,6 +46,7 @@ enum { NFSD_Ports, NFSD_MaxBlkSize, NFSD_MaxConnections, + NFSD_Filecache, NFSD_SupportedEnctypes, /* * The below MUST come last. Otherwise we leave a hole in nfsd_files[] @@ -229,6 +231,13 @@ static const struct file_operations reply_cache_stats_operations = { .release = single_release, }; +static const struct file_operations filecache_ops = { + .open = nfsd_file_cache_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /*----------------------------------------------------------------------------*/ /* * payload - write methods @@ -633,7 +642,6 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) } /* Now write current state into reply buffer */ - len = 0; sep = ""; remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) { @@ -1371,6 +1379,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, + [NFSD_Filecache] = {"filecache", &filecache_ops, S_IRUGO}, #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ @@ -1475,14 +1484,7 @@ static __net_init int nfsd_init_net(struct net *net) retval = nfsd_reply_cache_init(nn); if (retval) goto out_drc_error; - nn->nfsd4_lease = 90; /* default lease time */ - nn->nfsd4_grace = 90; - nn->somebody_reclaimed = false; - nn->track_reclaim_completes = false; - nn->clverifier_counter = prandom_u32(); - nn->clientid_base = prandom_u32(); - nn->clientid_counter = nn->clientid_base + 1; - nn->s2s_cp_cl_id = nn->clientid_counter++; + nfsd4_init_leases_net(nn); get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); @@ -1517,7 +1519,6 @@ static struct pernet_operations nfsd_net_ops = { static int __init init_nfsd(void) { int retval; - printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); retval = nfsd4_init_slabs(); if (retval) diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 9a8b09afc173..57a468ed85c3 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -341,6 +341,8 @@ void nfsd_lockd_shutdown(void); #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ #define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */ +#define NFSD_CLIENT_MAX_TRIM_PER_RUN 128 +#define NFS4_CLIENTS_PER_GB 1024 /* * The following attributes are currently not supported by the NFSv4 server: @@ -496,12 +498,16 @@ extern void unregister_cld_notifier(void); extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); #endif +extern void nfsd4_init_leases_net(struct nfsd_net *nn); + #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) { return 0; } +static inline void nfsd4_init_leases_net(struct nfsd_net *nn) {}; + #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c29baa03dfaf..a5b71526cee0 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -331,8 +331,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) struct dentry *dentry; __be32 error; - dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); - if (!fhp->fh_dentry) { error = nfsd_set_fh_dentry(rqstp, fhp); if (error) @@ -340,6 +338,9 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) } dentry = fhp->fh_dentry; exp = fhp->fh_export; + + trace_nfsd_fh_verify(rqstp, fhp, type, access); + /* * We still have to do all these permission checks, even when * fh_dentry is already set: @@ -548,7 +549,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, if (ref_fh == fhp) fh_put(ref_fh); - if (fhp->fh_locked || fhp->fh_dentry) { + if (fhp->fh_dentry) { printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n", dentry); } @@ -671,6 +672,25 @@ void fh_fill_post_attrs(struct svc_fh *fhp) nfsd4_change_attribute(&fhp->fh_post_attr, inode); } +/** + * fh_fill_both_attrs - Fill pre-op and post-op attributes + * @fhp: file handle to be updated + * + * This is used when the directory wasn't changed, but wcc attributes + * are needed anyway. + */ +void fh_fill_both_attrs(struct svc_fh *fhp) +{ + fh_fill_post_attrs(fhp); + if (!fhp->fh_post_saved) + return; + fhp->fh_pre_change = fhp->fh_post_change; + fhp->fh_pre_mtime = fhp->fh_post_attr.mtime; + fhp->fh_pre_ctime = fhp->fh_post_attr.ctime; + fhp->fh_pre_size = fhp->fh_post_attr.size; + fhp->fh_pre_saved = true; +} + /* * Release a file handle. */ @@ -680,7 +700,6 @@ fh_put(struct svc_fh *fhp) struct dentry * dentry = fhp->fh_dentry; struct svc_export * exp = fhp->fh_export; if (dentry) { - fh_unlock(fhp); fhp->fh_dentry = NULL; dput(dentry); fh_clear_pre_post_attrs(fhp); diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index fb9d358a267e..c3ae6414fc5c 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -81,7 +81,6 @@ typedef struct svc_fh { struct dentry * fh_dentry; /* validated dentry */ struct svc_export * fh_export; /* export pointer */ - bool fh_locked; /* inode locked by us */ bool fh_want_write; /* remount protection taken */ bool fh_no_wcc; /* no wcc data needed */ bool fh_no_atomic_attr; @@ -93,7 +92,7 @@ typedef struct svc_fh { bool fh_post_saved; /* post-op attrs saved */ bool fh_pre_saved; /* pre-op attrs saved */ - /* Pre-op attributes saved during fh_lock */ + /* Pre-op attributes saved when inode is locked */ __u64 fh_pre_size; /* size before operation */ struct timespec64 fh_pre_mtime; /* mtime before oper */ struct timespec64 fh_pre_ctime; /* ctime before oper */ @@ -103,7 +102,7 @@ typedef struct svc_fh { */ u64 fh_pre_change; - /* Post-op attributes saved in fh_unlock */ + /* Post-op attributes saved in fh_fill_post_attrs() */ struct kstat fh_post_attr; /* full attrs after operation */ u64 fh_post_change; /* nfsv4 change; see above */ } svc_fh; @@ -223,8 +222,8 @@ void fh_put(struct svc_fh *); static __inline__ struct svc_fh * fh_copy(struct svc_fh *dst, struct svc_fh *src) { - WARN_ON(src->fh_dentry || src->fh_locked); - + WARN_ON(src->fh_dentry); + *dst = *src; return dst; } @@ -322,52 +321,5 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat, extern void fh_fill_pre_attrs(struct svc_fh *fhp); extern void fh_fill_post_attrs(struct svc_fh *fhp); - - -/* - * Lock a file handle/inode - * NOTE: both fh_lock and fh_unlock are done "by hand" in - * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once - * so, any changes here should be reflected there. - */ - -static inline void -fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) -{ - struct dentry *dentry = fhp->fh_dentry; - struct inode *inode; - - BUG_ON(!dentry); - - if (fhp->fh_locked) { - printk(KERN_WARNING "fh_lock: %pd2 already locked!\n", - dentry); - return; - } - - inode = d_inode(dentry); - inode_lock_nested(inode, subclass); - fh_fill_pre_attrs(fhp); - fhp->fh_locked = true; -} - -static inline void -fh_lock(struct svc_fh *fhp) -{ - fh_lock_nested(fhp, I_MUTEX_NORMAL); -} - -/* - * Unlock a file handle/inode - */ -static inline void -fh_unlock(struct svc_fh *fhp) -{ - if (fhp->fh_locked) { - fh_fill_post_attrs(fhp); - inode_unlock(d_inode(fhp->fh_dentry)); - fhp->fh_locked = false; - } -} - +extern void fh_fill_both_attrs(struct svc_fh *fhp); #endif /* _LINUX_NFSD_NFSFH_H */ diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index fcdab8a8a41f..7381972f1677 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -51,6 +51,9 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) struct nfsd_sattrargs *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; struct iattr *iap = &argp->attrs; + struct nfsd_attrs attrs = { + .na_iattr = iap, + }; struct svc_fh *fhp; dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n", @@ -100,7 +103,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) } } - resp->status = nfsd_setattr(rqstp, fhp, iap, 0, (time64_t)0); + resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0); if (resp->status != nfs_ok) goto out; @@ -260,6 +263,9 @@ nfsd_proc_create(struct svc_rqst *rqstp) svc_fh *dirfhp = &argp->fh; svc_fh *newfhp = &resp->fh; struct iattr *attr = &argp->attrs; + struct nfsd_attrs attrs = { + .na_iattr = attr, + }; struct inode *inode; struct dentry *dchild; int type, mode; @@ -285,7 +291,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) goto done; } - fh_lock_nested(dirfhp, I_MUTEX_PARENT); + inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT); dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); if (IS_ERR(dchild)) { resp->status = nfserrno(PTR_ERR(dchild)); @@ -385,7 +391,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) if (!inode) { /* File doesn't exist. Create it and set attrs */ resp->status = nfsd_create_locked(rqstp, dirfhp, argp->name, - argp->len, attr, type, rdev, + argp->len, &attrs, type, rdev, newfhp); } else if (type == S_IFREG) { dprintk("nfsd: existing %s, valid=%x, size=%ld\n", @@ -396,13 +402,12 @@ nfsd_proc_create(struct svc_rqst *rqstp) */ attr->ia_valid &= ATTR_SIZE; if (attr->ia_valid) - resp->status = nfsd_setattr(rqstp, newfhp, attr, 0, + resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0, (time64_t)0); } out_unlock: - /* We don't really need to unlock, as fh_put does it. */ - fh_unlock(dirfhp); + inode_unlock(dirfhp->fh_dentry->d_inode); fh_drop_write(dirfhp); done: fh_put(dirfhp); @@ -472,6 +477,9 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) { struct nfsd_symlinkargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; struct svc_fh newfh; if (argp->tlen > NFS_MAXPATHLEN) { @@ -493,7 +501,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) fh_init(&newfh, NFS_FHSIZE); resp->status = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, - argp->tname, &newfh); + argp->tname, &attrs, &newfh); kfree(argp->tname); fh_put(&argp->ffh); @@ -511,6 +519,9 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) { struct nfsd_createargs *argp = rqstp->rq_argp; struct nfsd_diropres *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); @@ -522,7 +533,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) argp->attrs.ia_valid &= ~ATTR_SIZE; fh_init(&resp->fh, NFS_FHSIZE); resp->status = nfsd_create(rqstp, &argp->fh, argp->name, argp->len, - &argp->attrs, S_IFDIR, 0, &resp->fh); + &attrs, S_IFDIR, 0, &resp->fh); fh_put(&argp->fh); if (resp->status != nfs_ok) goto out; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index f3d6313914ed..ae596dbf8667 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -703,7 +703,6 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); void put_nfs4_file(struct nfs4_file *fi); -extern void nfs4_put_copy(struct nfsd4_copy *copy); extern struct nfsd4_copy * find_async_copy(struct nfs4_client *clp, stateid_t *staetid); extern void nfs4_put_cpntf_state(struct nfsd_net *nn, diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index a60ead3b227a..9ebd67d461f9 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -171,6 +171,52 @@ TRACE_EVENT(nfsd_compound_encode_err, __entry->opnum, __entry->status) ); +#define show_fs_file_type(x) \ + __print_symbolic(x, \ + { S_IFLNK, "LNK" }, \ + { S_IFREG, "REG" }, \ + { S_IFDIR, "DIR" }, \ + { S_IFCHR, "CHR" }, \ + { S_IFBLK, "BLK" }, \ + { S_IFIFO, "FIFO" }, \ + { S_IFSOCK, "SOCK" }) + +TRACE_EVENT(nfsd_fh_verify, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + umode_t type, + int access + ), + TP_ARGS(rqstp, fhp, type, access), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __sockaddr(server, rqstp->rq_xprt->xpt_remotelen) + __sockaddr(client, rqstp->rq_xprt->xpt_remotelen) + __field(u32, xid) + __field(u32, fh_hash) + __field(void *, inode) + __field(unsigned long, type) + __field(unsigned long, access) + ), + TP_fast_assign( + __entry->netns_ino = SVC_NET(rqstp)->ns.inum; + __assign_sockaddr(server, &rqstp->rq_xprt->xpt_local, + rqstp->rq_xprt->xpt_locallen); + __assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote, + rqstp->rq_xprt->xpt_remotelen); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->inode = d_inode(fhp->fh_dentry); + __entry->type = type; + __entry->access = access; + ), + TP_printk("xid=0x%08x fh_hash=0x%08x inode=%p type=%s access=%s", + __entry->xid, __entry->fh_hash, __entry->inode, + show_fs_file_type(__entry->type), + show_nfsd_may_flags(__entry->access) + ) +); DECLARE_EVENT_CLASS(nfsd_fh_err_class, TP_PROTO(struct svc_rqst *rqstp, @@ -696,15 +742,12 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \ - { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \ { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), TP_ARGS(nf), TP_STRUCT__entry( - __field(unsigned int, nf_hashval) __field(void *, nf_inode) __field(int, nf_ref) __field(unsigned long, nf_flags) @@ -712,15 +755,13 @@ DECLARE_EVENT_CLASS(nfsd_file_class, __field(struct file *, nf_file) ), TP_fast_assign( - __entry->nf_hashval = nf->nf_hashval; __entry->nf_inode = nf->nf_inode; __entry->nf_ref = refcount_read(&nf->nf_ref); __entry->nf_flags = nf->nf_flags; __entry->nf_may = nf->nf_may; __entry->nf_file = nf->nf_file; ), - TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p", - __entry->nf_hashval, + TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p", __entry->nf_inode, __entry->nf_ref, show_nf_flags(__entry->nf_flags), @@ -733,34 +774,59 @@ DEFINE_EVENT(nfsd_file_class, name, \ TP_PROTO(struct nfsd_file *nf), \ TP_ARGS(nf)) -DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc); DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); -DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked); +DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose); + +TRACE_EVENT(nfsd_file_alloc, + TP_PROTO( + const struct nfsd_file *nf + ), + TP_ARGS(nf), + TP_STRUCT__entry( + __field(const void *, nf_inode) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(unsigned int, nf_ref) + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_flags = nf->nf_flags; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_may = nf->nf_may; + ), + TP_printk("inode=%p ref=%u flags=%s may=%s", + __entry->nf_inode, __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may) + ) +); TRACE_EVENT(nfsd_file_acquire, - TP_PROTO(struct svc_rqst *rqstp, unsigned int hash, - struct inode *inode, unsigned int may_flags, - struct nfsd_file *nf, __be32 status), + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + unsigned int may_flags, + const struct nfsd_file *nf, + __be32 status + ), - TP_ARGS(rqstp, hash, inode, may_flags, nf, status), + TP_ARGS(rqstp, inode, may_flags, nf, status), TP_STRUCT__entry( __field(u32, xid) - __field(unsigned int, hash) - __field(void *, inode) + __field(const void *, inode) __field(unsigned long, may_flags) - __field(int, nf_ref) + __field(unsigned int, nf_ref) __field(unsigned long, nf_flags) __field(unsigned long, nf_may) - __field(struct file *, nf_file) + __field(const void *, nf_file) __field(u32, status) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqstp->rq_xid); - __entry->hash = hash; __entry->inode = inode; __entry->may_flags = may_flags; __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0; @@ -770,19 +836,117 @@ TRACE_EVENT(nfsd_file_acquire, __entry->status = be32_to_cpu(status); ), - TP_printk("xid=0x%x hash=0x%x inode=%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=%p status=%u", - __entry->xid, __entry->hash, __entry->inode, + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p status=%u", + __entry->xid, __entry->inode, show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref, show_nf_flags(__entry->nf_flags), show_nfsd_may_flags(__entry->nf_may), - __entry->nf_file, __entry->status) + __entry->nf_file, __entry->status + ) +); + +TRACE_EVENT(nfsd_file_create, + TP_PROTO( + const struct svc_rqst *rqstp, + unsigned int may_flags, + const struct nfsd_file *nf + ), + + TP_ARGS(rqstp, may_flags, nf), + + TP_STRUCT__entry( + __field(const void *, nf_inode) + __field(const void *, nf_file) + __field(unsigned long, may_flags) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(unsigned int, nf_ref) + __field(u32, xid) + ), + + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_file = nf->nf_file; + __entry->may_flags = may_flags; + __entry->nf_flags = nf->nf_flags; + __entry->nf_may = nf->nf_may; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + ), + + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", + __entry->xid, __entry->nf_inode, + show_nfsd_may_flags(__entry->may_flags), + __entry->nf_ref, show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), __entry->nf_file + ) +); + +TRACE_EVENT(nfsd_file_insert_err, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + unsigned int may_flags, + long error + ), + TP_ARGS(rqstp, inode, may_flags, error), + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, inode) + __field(unsigned long, may_flags) + __field(long, error) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->inode = inode; + __entry->may_flags = may_flags; + __entry->error = error; + ), + TP_printk("xid=0x%x inode=%p may_flags=%s error=%ld", + __entry->xid, __entry->inode, + show_nfsd_may_flags(__entry->may_flags), + __entry->error + ) +); + +TRACE_EVENT(nfsd_file_cons_err, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + unsigned int may_flags, + const struct nfsd_file *nf + ), + TP_ARGS(rqstp, inode, may_flags, nf), + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, inode) + __field(unsigned long, may_flags) + __field(unsigned int, nf_ref) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(const void *, nf_file) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->inode = inode; + __entry->may_flags = may_flags; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_flags = nf->nf_flags; + __entry->nf_may = nf->nf_may; + __entry->nf_file = nf->nf_file; + ), + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", + __entry->xid, __entry->inode, + show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), __entry->nf_file + ) ); TRACE_EVENT(nfsd_file_open, TP_PROTO(struct nfsd_file *nf, __be32 status), TP_ARGS(nf, status), TP_STRUCT__entry( - __field(unsigned int, nf_hashval) __field(void *, nf_inode) /* cannot be dereferenced */ __field(int, nf_ref) __field(unsigned long, nf_flags) @@ -790,15 +954,13 @@ TRACE_EVENT(nfsd_file_open, __field(void *, nf_file) /* cannot be dereferenced */ ), TP_fast_assign( - __entry->nf_hashval = nf->nf_hashval; __entry->nf_inode = nf->nf_inode; __entry->nf_ref = refcount_read(&nf->nf_ref); __entry->nf_flags = nf->nf_flags; __entry->nf_may = nf->nf_may; __entry->nf_file = nf->nf_file; ), - TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p", - __entry->nf_hashval, + TP_printk("inode=%p ref=%d flags=%s may=%s file=%p", __entry->nf_inode, __entry->nf_ref, show_nf_flags(__entry->nf_flags), @@ -807,30 +969,53 @@ TRACE_EVENT(nfsd_file_open, ) DECLARE_EVENT_CLASS(nfsd_file_search_class, - TP_PROTO(struct inode *inode, unsigned int hash, int found), - TP_ARGS(inode, hash, found), + TP_PROTO( + const struct inode *inode, + unsigned int count + ), + TP_ARGS(inode, count), TP_STRUCT__entry( - __field(struct inode *, inode) - __field(unsigned int, hash) - __field(int, found) + __field(const struct inode *, inode) + __field(unsigned int, count) ), TP_fast_assign( __entry->inode = inode; - __entry->hash = hash; - __entry->found = found; + __entry->count = count; ), - TP_printk("hash=0x%x inode=%p found=%d", __entry->hash, - __entry->inode, __entry->found) + TP_printk("inode=%p count=%u", + __entry->inode, __entry->count) ); #define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ DEFINE_EVENT(nfsd_file_search_class, name, \ - TP_PROTO(struct inode *inode, unsigned int hash, int found), \ - TP_ARGS(inode, hash, found)) + TP_PROTO( \ + const struct inode *inode, \ + unsigned int count \ + ), \ + TP_ARGS(inode, count)) DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached); + +TRACE_EVENT(nfsd_file_is_cached, + TP_PROTO( + const struct inode *inode, + int found + ), + TP_ARGS(inode, found), + TP_STRUCT__entry( + __field(const struct inode *, inode) + __field(int, found) + ), + TP_fast_assign( + __entry->inode = inode; + __entry->found = found; + ), + TP_printk("inode=%p is %scached", + __entry->inode, + __entry->found ? "" : "not " + ) +); TRACE_EVENT(nfsd_file_fsnotify_handle_event, TP_PROTO(struct inode *inode, u32 mask), @@ -851,6 +1036,76 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event, __entry->nlink, __entry->mode, __entry->mask) ); +DECLARE_EVENT_CLASS(nfsd_file_gc_class, + TP_PROTO( + const struct nfsd_file *nf + ), + TP_ARGS(nf), + TP_STRUCT__entry( + __field(void *, nf_inode) + __field(void *, nf_file) + __field(int, nf_ref) + __field(unsigned long, nf_flags) + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_file = nf->nf_file; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_flags = nf->nf_flags; + ), + TP_printk("inode=%p ref=%d nf_flags=%s nf_file=%p", + __entry->nf_inode, __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + __entry->nf_file + ) +); + +#define DEFINE_NFSD_FILE_GC_EVENT(name) \ +DEFINE_EVENT(nfsd_file_gc_class, name, \ + TP_PROTO( \ + const struct nfsd_file *nf \ + ), \ + TP_ARGS(nf)) + +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add_disposed); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_hashed); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); + +DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, + TP_PROTO( + unsigned long removed, + unsigned long remaining + ), + TP_ARGS(removed, remaining), + TP_STRUCT__entry( + __field(unsigned long, removed) + __field(unsigned long, remaining) + ), + TP_fast_assign( + __entry->removed = removed; + __entry->remaining = remaining; + ), + TP_printk("%lu entries removed, %lu remaining", + __entry->removed, __entry->remaining) +); + +#define DEFINE_NFSD_FILE_LRUWALK_EVENT(name) \ +DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ + TP_PROTO( \ + unsigned long removed, \ + unsigned long remaining \ + ), \ + TP_ARGS(removed, remaining)) + +DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); +DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); + #include "cache.h" TRACE_DEFINE_ENUM(RC_DROPIT); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d79db56475d4..9f486b788ed0 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -199,27 +199,13 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; } } else { - /* - * In the nfsd4_open() case, this may be held across - * subsequent open and delegation acquisition which may - * need to take the child's i_mutex: - */ - fh_lock_nested(fhp, I_MUTEX_PARENT); - dentry = lookup_one_len(name, dparent, len); + dentry = lookup_one_len_unlocked(name, dparent, len); host_err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_nfserr; if (nfsd_mountpoint(dentry, exp)) { - /* - * We don't need the i_mutex after all. It's - * still possible we could open this (regular - * files can be mountpoints too), but the - * i_mutex is just there to prevent renames of - * something that we might be about to delegate, - * and a mountpoint won't be renamed: - */ - fh_unlock(fhp); - if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { + host_err = nfsd_cross_mnt(rqstp, &dentry, &exp); + if (host_err) { dput(dentry); goto out_nfserr; } @@ -234,7 +220,15 @@ out_nfserr: return nfserrno(host_err); } -/* +/** + * nfsd_lookup - look up a single path component for nfsd + * + * @rqstp: the request context + * @fhp: the file handle of the directory + * @name: the component name, or %NULL to look up parent + * @len: length of name to examine + * @resfh: pointer to pre-initialised filehandle to hold result. + * * Look up one component of a pathname. * N.B. After this call _both_ fhp and resfh need an fh_put * @@ -244,11 +238,11 @@ out_nfserr: * returned. Otherwise the covered directory is returned. * NOTE: this mountpoint crossing is not supported properly by all * clients and is explicitly disallowed for NFSv3 - * NeilBrown + * */ __be32 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, - unsigned int len, struct svc_fh *resfh) + unsigned int len, struct svc_fh *resfh) { struct svc_export *exp; struct dentry *dentry; @@ -349,11 +343,13 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, * Set various file attributes. After this call fhp needs an fh_put. */ __be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfsd_attrs *attr, int check_guard, time64_t guardtime) { struct dentry *dentry; struct inode *inode; + struct iattr *iap = attr->na_iattr; int accmode = NFSD_MAY_SATTR; umode_t ftype = 0; __be32 err; @@ -420,7 +416,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, return err; } - fh_lock(fhp); + inode_lock(inode); if (size_change) { /* * RFC5661, Section 18.30.4: @@ -456,7 +452,19 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, host_err = notify_change(&init_user_ns, dentry, iap, NULL); out_unlock: - fh_unlock(fhp); + if (attr->na_seclabel && attr->na_seclabel->len) + attr->na_labelerr = security_inode_setsecctx(dentry, + attr->na_seclabel->data, attr->na_seclabel->len); + if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl) + attr->na_aclerr = set_posix_acl(&init_user_ns, + inode, ACL_TYPE_ACCESS, + attr->na_pacl); + if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && + !attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode)) + attr->na_aclerr = set_posix_acl(&init_user_ns, + inode, ACL_TYPE_DEFAULT, + attr->na_dpacl); + inode_unlock(inode); if (size_change) put_write_access(inode); out: @@ -494,32 +502,6 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; return 1; } -#ifdef CONFIG_NFSD_V4_SECURITY_LABEL -__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct xdr_netobj *label) -{ - __be32 error; - int host_error; - struct dentry *dentry; - - error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); - if (error) - return error; - - dentry = fhp->fh_dentry; - - inode_lock(d_inode(dentry)); - host_error = security_inode_setsecctx(dentry, label->data, label->len); - inode_unlock(d_inode(dentry)); - return nfserrno(host_error); -} -#else -__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct xdr_netobj *label) -{ - return nfserr_notsupp; -} -#endif static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp) { @@ -1202,14 +1184,15 @@ out: * @rqstp: RPC transaction being executed * @fhp: NFS filehandle of parent directory * @resfhp: NFS filehandle of new object - * @iap: requested attributes of new object + * @attrs: requested attributes of new object * * Returns nfs_ok on success, or an nfsstat in network byte order. */ __be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct svc_fh *resfhp, struct iattr *iap) + struct svc_fh *resfhp, struct nfsd_attrs *attrs) { + struct iattr *iap = attrs->na_iattr; __be32 status; /* @@ -1230,7 +1213,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, * if the attributes have not changed. */ if (iap->ia_valid) - status = nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0); + status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0); else status = nfserrno(commit_metadata(resfhp)); @@ -1269,11 +1252,12 @@ nfsd_check_ignore_resizing(struct iattr *iap) /* The parent directory should already be locked: */ __be32 nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, struct iattr *iap, - int type, dev_t rdev, struct svc_fh *resfhp) + char *fname, int flen, struct nfsd_attrs *attrs, + int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild; struct inode *dirp; + struct iattr *iap = attrs->na_iattr; __be32 err; int host_err; @@ -1281,13 +1265,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, dirp = d_inode(dentry); dchild = dget(resfhp->fh_dentry); - if (!fhp->fh_locked) { - WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n", - dentry); - err = nfserr_io; - goto out; - } - err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE); if (err) goto out; @@ -1347,7 +1324,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err < 0) goto out_nfserr; - err = nfsd_create_setattr(rqstp, fhp, resfhp, iap); + err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs); out: dput(dchild); @@ -1366,8 +1343,8 @@ out_nfserr: */ __be32 nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, struct iattr *iap, - int type, dev_t rdev, struct svc_fh *resfhp) + char *fname, int flen, struct nfsd_attrs *attrs, + int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild = NULL; __be32 err; @@ -1386,11 +1363,13 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err) return nfserrno(host_err); - fh_lock_nested(fhp, I_MUTEX_PARENT); + inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); dchild = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(dchild); - if (IS_ERR(dchild)) - return nfserrno(host_err); + if (IS_ERR(dchild)) { + err = nfserrno(host_err); + goto out_unlock; + } err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); /* * We unconditionally drop our ref to dchild as fh_compose will have @@ -1398,9 +1377,14 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ dput(dchild); if (err) - return err; - return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type, - rdev, resfhp); + goto out_unlock; + fh_fill_pre_attrs(fhp); + err = nfsd_create_locked(rqstp, fhp, fname, flen, attrs, type, + rdev, resfhp); + fh_fill_post_attrs(fhp); +out_unlock: + inode_unlock(dentry->d_inode); + return err; } /* @@ -1441,15 +1425,25 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) return 0; } -/* - * Create a symlink and look up its inode +/** + * nfsd_symlink - Create a symlink and look up its inode + * @rqstp: RPC transaction being executed + * @fhp: NFS filehandle of parent directory + * @fname: filename of the new symlink + * @flen: length of @fname + * @path: content of the new symlink (NUL-terminated) + * @attrs: requested attributes of new object + * @resfhp: NFS filehandle of new object + * * N.B. After this call _both_ fhp and resfhp need an fh_put + * + * Returns nfs_ok on success, or an nfsstat in network byte order. */ __be32 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, - char *path, - struct svc_fh *resfhp) + char *fname, int flen, + char *path, struct nfsd_attrs *attrs, + struct svc_fh *resfhp) { struct dentry *dentry, *dnew; __be32 err, cerr; @@ -1467,33 +1461,35 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; + if (host_err) { + err = nfserrno(host_err); + goto out; + } - fh_lock(fhp); dentry = fhp->fh_dentry; + inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); dnew = lookup_one_len(fname, dentry, flen); - host_err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; - + if (IS_ERR(dnew)) { + err = nfserrno(PTR_ERR(dnew)); + inode_unlock(dentry->d_inode); + goto out_drop_write; + } + fh_fill_pre_attrs(fhp); host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path); err = nfserrno(host_err); - fh_unlock(fhp); + cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); + if (!err) + nfsd_create_setattr(rqstp, fhp, resfhp, attrs); + fh_fill_post_attrs(fhp); + inode_unlock(dentry->d_inode); if (!err) err = nfserrno(commit_metadata(fhp)); - - fh_drop_write(fhp); - - cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); dput(dnew); if (err==0) err = cerr; +out_drop_write: + fh_drop_write(fhp); out: return err; - -out_nfserr: - err = nfserrno(host_err); - goto out; } /* @@ -1531,22 +1527,25 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, goto out; } - fh_lock_nested(ffhp, I_MUTEX_PARENT); ddir = ffhp->fh_dentry; dirp = d_inode(ddir); + inode_lock_nested(dirp, I_MUTEX_PARENT); dnew = lookup_one_len(name, ddir, len); - host_err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; + if (IS_ERR(dnew)) { + err = nfserrno(PTR_ERR(dnew)); + goto out_unlock; + } dold = tfhp->fh_dentry; err = nfserr_noent; if (d_really_is_negative(dold)) goto out_dput; + fh_fill_pre_attrs(ffhp); host_err = vfs_link(dold, &init_user_ns, dirp, dnew, NULL); - fh_unlock(ffhp); + fh_fill_post_attrs(ffhp); + inode_unlock(dirp); if (!host_err) { err = nfserrno(commit_metadata(ffhp)); if (!err) @@ -1557,17 +1556,17 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, else err = nfserrno(host_err); } -out_dput: dput(dnew); -out_unlock: - fh_unlock(ffhp); +out_drop_write: fh_drop_write(tfhp); out: return err; -out_nfserr: - err = nfserrno(host_err); - goto out_unlock; +out_dput: + dput(dnew); +out_unlock: + inode_unlock(dirp); + goto out_drop_write; } static void @@ -1628,10 +1627,7 @@ retry: goto out; } - /* cannot use fh_lock as we need deadlock protective ordering - * so do it by hand */ trap = lock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = true; fh_fill_pre_attrs(ffhp); fh_fill_pre_attrs(tfhp); @@ -1687,17 +1683,12 @@ retry: dput(odentry); out_nfserr: err = nfserrno(host_err); - /* - * We cannot rely on fh_unlock on the two filehandles, - * as that would do the wrong thing if the two directories - * were the same, so again we do it by hand. - */ + if (!close_cached) { fh_fill_post_attrs(ffhp); fh_fill_post_attrs(tfhp); } unlock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = false; fh_drop_write(ffhp); /* @@ -1741,19 +1732,19 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (host_err) goto out_nfserr; - fh_lock_nested(fhp, I_MUTEX_PARENT); dentry = fhp->fh_dentry; dirp = d_inode(dentry); + inode_lock_nested(dirp, I_MUTEX_PARENT); rdentry = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) - goto out_drop_write; + goto out_unlock; if (d_really_is_negative(rdentry)) { dput(rdentry); host_err = -ENOENT; - goto out_drop_write; + goto out_unlock; } rinode = d_inode(rdentry); ihold(rinode); @@ -1761,6 +1752,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (!type) type = d_inode(rdentry)->i_mode & S_IFMT; + fh_fill_pre_attrs(fhp); if (type != S_IFDIR) { if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) nfsd_close_cached_files(rdentry); @@ -1768,8 +1760,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, } else { host_err = vfs_rmdir(&init_user_ns, dirp, rdentry); } + fh_fill_post_attrs(fhp); - fh_unlock(fhp); + inode_unlock(dirp); if (!host_err) host_err = commit_metadata(fhp); dput(rdentry); @@ -1791,6 +1784,9 @@ out_nfserr: } out: return err; +out_unlock: + inode_unlock(dirp); + goto out_drop_write; } /* @@ -2144,13 +2140,16 @@ out: return err; } -/* - * Removexattr and setxattr need to call fh_lock to both lock the inode - * and set the change attribute. Since the top-level vfs_removexattr - * and vfs_setxattr calls already do their own inode_lock calls, call - * the _locked variant. Pass in a NULL pointer for delegated_inode, - * and let the client deal with NFS4ERR_DELAY (same as with e.g. - * setattr and remove). +/** + * nfsd_removexattr - Remove an extended attribute + * @rqstp: RPC transaction being executed + * @fhp: NFS filehandle of object with xattr to remove + * @name: name of xattr to remove (NUL-terminate) + * + * Pass in a NULL pointer for delegated_inode, and let the client deal + * with NFS4ERR_DELAY (same as with e.g. setattr and remove). + * + * Returns nfs_ok on success, or an nfsstat in network byte order. */ __be32 nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) @@ -2166,12 +2165,14 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) if (ret) return nfserrno(ret); - fh_lock(fhp); + inode_lock(fhp->fh_dentry->d_inode); + fh_fill_pre_attrs(fhp); ret = __vfs_removexattr_locked(&init_user_ns, fhp->fh_dentry, name, NULL); - fh_unlock(fhp); + fh_fill_post_attrs(fhp); + inode_unlock(fhp->fh_dentry->d_inode); fh_drop_write(fhp); return nfsd_xattr_errno(ret); @@ -2191,12 +2192,13 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, ret = fh_want_write(fhp); if (ret) return nfserrno(ret); - fh_lock(fhp); + inode_lock(fhp->fh_dentry->d_inode); + fh_fill_pre_attrs(fhp); ret = __vfs_setxattr_locked(&init_user_ns, fhp->fh_dentry, name, buf, len, flags, NULL); - - fh_unlock(fhp); + fh_fill_post_attrs(fhp); + inode_unlock(fhp->fh_dentry->d_inode); fh_drop_write(fhp); return nfsd_xattr_errno(ret); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 26347d76f44a..c95cd414b4bb 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -6,6 +6,8 @@ #ifndef LINUX_NFSD_VFS_H #define LINUX_NFSD_VFS_H +#include +#include #include "nfsfh.h" #include "nfsd.h" @@ -42,6 +44,22 @@ struct nfsd_file; typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned); /* nfsd/vfs.c */ +struct nfsd_attrs { + struct iattr *na_iattr; /* input */ + struct xdr_netobj *na_seclabel; /* input */ + struct posix_acl *na_pacl; /* input */ + struct posix_acl *na_dpacl; /* input */ + + int na_labelerr; /* output */ + int na_aclerr; /* output */ +}; + +static inline void nfsd_attrs_free(struct nfsd_attrs *attrs) +{ + posix_acl_release(attrs->na_pacl); + posix_acl_release(attrs->na_dpacl); +} + int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp); __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, @@ -50,11 +68,9 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, const char *, unsigned int, struct svc_export **, struct dentry **); __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, - struct iattr *, int, time64_t); + struct nfsd_attrs *, int, time64_t); int nfsd_mountpoint(struct dentry *, struct svc_export *); #ifdef CONFIG_NFSD_V4 -__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, - struct xdr_netobj *); __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, struct file *, loff_t, loff_t, int); __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, @@ -63,14 +79,14 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, u64 count, bool sync); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *, - char *name, int len, struct iattr *attrs, + char *name, int len, struct nfsd_attrs *attrs, int type, dev_t rdev, struct svc_fh *res); __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, - char *name, int len, struct iattr *attrs, + char *name, int len, struct nfsd_attrs *attrs, int type, dev_t rdev, struct svc_fh *res); __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); __be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct svc_fh *resfhp, struct iattr *iap); + struct svc_fh *resfhp, struct nfsd_attrs *iap); __be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp, u64 offset, u32 count, __be32 *verf); #ifdef CONFIG_NFSD_V4 @@ -110,8 +126,9 @@ __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, - char *name, int len, char *path, - struct svc_fh *res); + char *name, int len, char *path, + struct nfsd_attrs *attrs, + struct svc_fh *res); __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, char *, int, struct svc_fh *); ssize_t nfsd_copy_file_range(struct file *, u64, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 7b744011f2d3..96267258e629 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -279,6 +279,7 @@ struct nfsd4_open { struct nfs4_clnt_odstate *op_odstate; /* used during processing */ struct nfs4_acl *op_acl; struct xdr_netobj op_label; + struct svc_rqst *op_rqstp; }; struct nfsd4_open_confirm { @@ -302,9 +303,10 @@ struct nfsd4_read { u32 rd_length; /* request */ int rd_vlen; struct nfsd_file *rd_nf; - + struct svc_rqst *rd_rqstp; /* response */ - struct svc_fh *rd_fhp; /* response */ + struct svc_fh *rd_fhp; /* response */ + u32 rd_eof; /* response */ }; struct nfsd4_readdir { @@ -532,6 +534,13 @@ struct nfsd42_write_res { stateid_t cb_stateid; }; +struct nfsd4_cb_offload { + struct nfsd4_callback co_cb; + struct nfsd42_write_res co_res; + __be32 co_nfserr; + struct knfsd_fh co_fh; +}; + struct nfsd4_copy { /* request */ stateid_t cp_src_stateid; @@ -539,18 +548,16 @@ struct nfsd4_copy { u64 cp_src_pos; u64 cp_dst_pos; u64 cp_count; - struct nl4_server cp_src; - bool cp_intra; + struct nl4_server *cp_src; - /* both */ - u32 cp_synchronous; + unsigned long cp_flags; +#define NFSD4_COPY_F_STOPPED (0) +#define NFSD4_COPY_F_INTRA (1) +#define NFSD4_COPY_F_SYNCHRONOUS (2) +#define NFSD4_COPY_F_COMMITTED (3) /* response */ struct nfsd42_write_res cp_res; - - /* for cb_offload */ - struct nfsd4_callback cp_cb; - __be32 nfserr; struct knfsd_fh fh; struct nfs4_client *cp_clp; @@ -563,14 +570,35 @@ struct nfsd4_copy { struct list_head copies; struct task_struct *copy_task; refcount_t refcount; - bool stopped; struct vfsmount *ss_mnt; struct nfs_fh c_fh; nfs4_stateid stateid; - bool committed; }; +static inline void nfsd4_copy_set_sync(struct nfsd4_copy *copy, bool sync) +{ + if (sync) + set_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); + else + clear_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); +} + +static inline bool nfsd4_copy_is_sync(const struct nfsd4_copy *copy) +{ + return test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); +} + +static inline bool nfsd4_copy_is_async(const struct nfsd4_copy *copy) +{ + return !test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); +} + +static inline bool nfsd4_ssc_is_inter(const struct nfsd4_copy *copy) +{ + return !test_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); +} + struct nfsd4_seek { /* request */ stateid_t seek_stateid; @@ -594,19 +622,20 @@ struct nfsd4_offload_status { struct nfsd4_copy_notify { /* request */ stateid_t cpn_src_stateid; - struct nl4_server cpn_dst; + struct nl4_server *cpn_dst; /* response */ stateid_t cpn_cnr_stateid; u64 cpn_sec; u32 cpn_nsec; - struct nl4_server cpn_src; + struct nl4_server *cpn_src; }; struct nfsd4_op { u32 opnum; - const struct nfsd4_operation * opdesc; __be32 status; + const struct nfsd4_operation *opdesc; + struct nfs4_replay *replay; union nfsd4_op_u { struct nfsd4_access access; struct nfsd4_close close; @@ -670,7 +699,6 @@ struct nfsd4_op { struct nfsd4_listxattrs listxattrs; struct nfsd4_removexattr removexattr; } u; - struct nfs4_replay * replay; }; bool nfsd4_cache_this_op(struct nfsd4_op *); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index fcef192e5e45..70ce419e2709 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -292,6 +292,7 @@ void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t); __be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **, struct nlm_lock *); void nlm_release_file(struct nlm_file *); +void nlmsvc_put_lockowner(struct nlm_lockowner *); void nlmsvc_release_lockowner(struct nlm_lock *); void nlmsvc_mark_resources(struct net *); void nlmsvc_free_host_resources(struct nlm_host *); diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 398f70093cd3..67e4a2c5500b 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -41,6 +41,8 @@ struct nlm_lock { struct nfs_fh fh; struct xdr_netobj oh; u32 svid; + u64 lock_start; + u64 lock_len; struct file_lock fl; }; diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h index 222ae8883e85..75843c00f326 100644 --- a/include/linux/nfs_ssc.h +++ b/include/linux/nfs_ssc.h @@ -64,7 +64,7 @@ struct nfsd4_ssc_umount_item { refcount_t nsui_refcnt; unsigned long nsui_expire; struct vfsmount *nsui_vfsmount; - char nsui_ipaddr[RPC_MAX_ADDRBUFLEN]; + char nsui_ipaddr[RPC_MAX_ADDRBUFLEN + 1]; }; #endif diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5860f32e3958..986c8a17ca5e 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -419,8 +419,8 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) */ static inline __be32 *xdr_encode_bool(__be32 *p, u32 n) { - *p = n ? xdr_one : xdr_zero; - return p++; + *p++ = n ? xdr_one : xdr_zero; + return p; } /** diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index b61d9c90fa26..5c48be033cc7 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1989,20 +1989,24 @@ TRACE_EVENT(svc_wake_up, TRACE_EVENT(svc_alloc_arg_err, TP_PROTO( - unsigned int pages + unsigned int requested, + unsigned int allocated ), - TP_ARGS(pages), + TP_ARGS(requested, allocated), TP_STRUCT__entry( - __field(unsigned int, pages) + __field(unsigned int, requested) + __field(unsigned int, allocated) ), TP_fast_assign( - __entry->pages = pages; + __entry->requested = requested; + __entry->allocated = allocated; ), - TP_printk("pages=%u", __entry->pages) + TP_printk("requested=%u allocated=%u", + __entry->requested, __entry->allocated) ); DECLARE_EVENT_CLASS(svc_deferred_event, diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2c4dd7ca95b0..2106003645a7 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -691,7 +691,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) set_current_state(TASK_RUNNING); return -EINTR; } - trace_svc_alloc_arg_err(pages); + trace_svc_alloc_arg_err(pages, ret); memalloc_retry_wait(GFP_KERNEL); } rqstp->rq_page_end = &rqstp->rq_pages[pages];