diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index dac435575384..204dd3ea36bb 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -361,8 +361,6 @@ so fl_release_private called on a lease should not block. ----------------------- lock_manager_operations --------------------------- prototypes: - int (*lm_compare_owner)(struct file_lock *, struct file_lock *); - unsigned long (*lm_owner_key)(struct file_lock *); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, struct file_lock *, int); void (*lm_break)(struct file_lock *); /* break_lease callback */ @@ -371,23 +369,11 @@ prototypes: locking rules: inode->i_lock blocked_lock_lock may block -lm_compare_owner: yes[1] maybe no -lm_owner_key yes[1] yes no lm_notify: yes yes no lm_grant: no no no lm_break: yes no no lm_change yes no no -[1]: ->lm_compare_owner and ->lm_owner_key are generally called with -*an* inode->i_lock held. It may not be the i_lock of the inode -associated with either file_lock argument! This is the case with deadlock -detection, since the code has to chase down the owners of locks that may -be entirely unrelated to the one on which the lock is being acquired. -For deadlock detection however, the blocked_lock_lock is also held. The -fact that these locks are held ensures that the file_locks do not -disappear out from under you while doing the comparison or generating an -owner key. - --------------------------- buffer_head ----------------------------------- prototypes: void (*b_end_io)(struct buffer_head *bh, int uptodate); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 62f98225abb3..b11f2afa84f1 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -47,13 +47,14 @@ void nlmclnt_next_cookie(struct nlm_cookie *c) c->len=4; } -static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner) +static struct nlm_lockowner * +nlmclnt_get_lockowner(struct nlm_lockowner *lockowner) { refcount_inc(&lockowner->count); return lockowner; } -static void nlm_put_lockowner(struct nlm_lockowner *lockowner) +static void nlmclnt_put_lockowner(struct nlm_lockowner *lockowner) { if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) return; @@ -82,28 +83,28 @@ static inline uint32_t __nlm_alloc_pid(struct nlm_host *host) return res; } -static struct nlm_lockowner *__nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner) +static struct nlm_lockowner *__nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner) { struct nlm_lockowner *lockowner; list_for_each_entry(lockowner, &host->h_lockowners, list) { if (lockowner->owner != owner) continue; - return nlm_get_lockowner(lockowner); + return nlmclnt_get_lockowner(lockowner); } return NULL; } -static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner) +static struct nlm_lockowner *nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner) { struct nlm_lockowner *res, *new = NULL; spin_lock(&host->h_lock); - res = __nlm_find_lockowner(host, owner); + res = __nlmclnt_find_lockowner(host, owner); if (res == NULL) { spin_unlock(&host->h_lock); new = kmalloc(sizeof(*new), GFP_KERNEL); spin_lock(&host->h_lock); - res = __nlm_find_lockowner(host, owner); + res = __nlmclnt_find_lockowner(host, owner); if (res == NULL && new != NULL) { res = new; refcount_set(&new->count, 1); @@ -457,7 +458,7 @@ static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl) { spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock); new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state; - new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner); + new->fl_u.nfs_fl.owner = nlmclnt_get_lockowner(fl->fl_u.nfs_fl.owner); list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted); spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock); } @@ -467,7 +468,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl) spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock); list_del(&fl->fl_u.nfs_fl.list); spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock); - nlm_put_lockowner(fl->fl_u.nfs_fl.owner); + nlmclnt_put_lockowner(fl->fl_u.nfs_fl.owner); } static const struct file_lock_operations nlmclnt_lock_ops = { @@ -478,7 +479,7 @@ static const struct file_lock_operations nlmclnt_lock_ops = { static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host) { fl->fl_u.nfs_fl.state = 0; - fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner); + fl->fl_u.nfs_fl.owner = nlmclnt_find_lockowner(host, fl->fl_owner); INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list); fl->fl_ops = &nlmclnt_lock_ops; } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 1bddf70d9656..e4d3f783e06a 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -46,8 +46,14 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ lock->fl.fl_file = file->f_file; - lock->fl.fl_owner = (fl_owner_t) host; + lock->fl.fl_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; + nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); + if (!lock->fl.fl_owner) { + /* lockowner allocation has failed */ + nlmsvc_release_host(host); + return nlm_lck_denied_nolocks; + } } return 0; @@ -94,6 +100,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -142,6 +149,7 @@ __nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -178,6 +186,7 @@ __nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp) resp->status = nlmsvc_cancel_blocked(SVC_NET(rqstp), file, &argp->lock); dprintk("lockd: CANCEL status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -217,6 +226,7 @@ __nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp) resp->status = nlmsvc_unlock(SVC_NET(rqstp), file, &argp->lock); dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -365,6 +375,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp) resp->status = nlmsvc_share_file(host, file, argp); dprintk("lockd: SHARE status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -399,6 +410,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp) resp->status = nlmsvc_unshare_file(host, file, argp); dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index ea719cdd6a36..61d3cc2283dc 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -332,6 +332,93 @@ void nlmsvc_traverse_blocks(struct nlm_host *host, mutex_unlock(&file->f_mutex); } +static struct nlm_lockowner * +nlmsvc_get_lockowner(struct nlm_lockowner *lockowner) +{ + refcount_inc(&lockowner->count); + return lockowner; +} + +static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner) +{ + if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) + return; + list_del(&lockowner->list); + spin_unlock(&lockowner->host->h_lock); + nlmsvc_release_host(lockowner->host); + kfree(lockowner); +} + +static struct nlm_lockowner *__nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid) +{ + struct nlm_lockowner *lockowner; + list_for_each_entry(lockowner, &host->h_lockowners, list) { + if (lockowner->pid != pid) + continue; + return nlmsvc_get_lockowner(lockowner); + } + return NULL; +} + +static struct nlm_lockowner *nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid) +{ + struct nlm_lockowner *res, *new = NULL; + + spin_lock(&host->h_lock); + res = __nlmsvc_find_lockowner(host, pid); + + if (res == NULL) { + spin_unlock(&host->h_lock); + new = kmalloc(sizeof(*res), GFP_KERNEL); + spin_lock(&host->h_lock); + res = __nlmsvc_find_lockowner(host, pid); + if (res == NULL && new != NULL) { + res = new; + /* fs/locks.c will manage the refcount through lock_ops */ + refcount_set(&new->count, 1); + new->pid = pid; + new->host = nlm_get_host(host); + list_add(&new->list, &host->h_lockowners); + new = NULL; + } + } + + spin_unlock(&host->h_lock); + kfree(new); + return res; +} + +void +nlmsvc_release_lockowner(struct nlm_lock *lock) +{ + if (lock->fl.fl_owner) + nlmsvc_put_lockowner(lock->fl.fl_owner); +} + +static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl) +{ + struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner; + new->fl_owner = nlmsvc_get_lockowner(nlm_lo); +} + +static void nlmsvc_locks_release_private(struct file_lock *fl) +{ + nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner); +} + +static const struct file_lock_operations nlmsvc_lock_ops = { + .fl_copy_lock = nlmsvc_locks_copy_lock, + .fl_release_private = nlmsvc_locks_release_private, +}; + +void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host, + pid_t pid) +{ + fl->fl_owner = nlmsvc_find_lockowner(host, pid); + if (fl->fl_owner != NULL) + fl->fl_ops = &nlmsvc_lock_ops; +} + /* * Initialize arguments for GRANTED call. The nlm_rqst structure * has been cleared already. @@ -345,7 +432,7 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock) /* set default data area */ call->a_args.lock.oh.data = call->a_owner; - call->a_args.lock.svid = lock->fl.fl_pid; + call->a_args.lock.svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid; if (lock->oh.len > NLMCLNT_OHSIZE) { void *data = kmalloc(lock->oh.len, GFP_KERNEL); @@ -509,6 +596,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, { int error; __be32 ret; + struct nlm_lockowner *test_owner; dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", locks_inode(file->f_file)->i_sb->s_id, @@ -522,6 +610,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } + /* If there's a conflicting lock, remember to clean up the test lock */ + test_owner = (struct nlm_lockowner *)lock->fl.fl_owner; + error = vfs_test_lock(file->f_file, &lock->fl); if (error) { /* We can't currently deal with deferred test requests */ @@ -543,11 +634,16 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, conflock->caller = "somehost"; /* FIXME */ conflock->len = strlen(conflock->caller); conflock->oh.len = 0; /* don't return OH info */ - conflock->svid = lock->fl.fl_pid; + conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid; conflock->fl.fl_type = lock->fl.fl_type; conflock->fl.fl_start = lock->fl.fl_start; conflock->fl.fl_end = lock->fl.fl_end; locks_release_private(&lock->fl); + + /* Clean up the test lock */ + lock->fl.fl_owner = NULL; + nlmsvc_put_lockowner(test_owner); + ret = nlm_lck_denied; out: return ret; @@ -692,25 +788,7 @@ nlmsvc_notify_blocked(struct file_lock *fl) printk(KERN_WARNING "lockd: notification for unknown block!\n"); } -static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2) -{ - return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; -} - -/* - * Since NLM uses two "keys" for tracking locks, we need to hash them down - * to one for the blocked_hash. Here, we're just xor'ing the host address - * with the pid in order to create a key value for picking a hash bucket. - */ -static unsigned long -nlmsvc_owner_key(struct file_lock *fl) -{ - return (unsigned long)fl->fl_owner ^ (unsigned long)fl->fl_pid; -} - const struct lock_manager_operations nlmsvc_lock_operations = { - .lm_compare_owner = nlmsvc_same_owner, - .lm_owner_key = nlmsvc_owner_key, .lm_notify = nlmsvc_notify_blocked, .lm_grant = nlmsvc_grant_deferred, }; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index ea77c66d3cc3..d0bb7a6bf005 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -76,8 +76,14 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ lock->fl.fl_file = file->f_file; - lock->fl.fl_owner = (fl_owner_t) host; + lock->fl.fl_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; + nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); + if (!lock->fl.fl_owner) { + /* lockowner allocation has failed */ + nlmsvc_release_host(host); + return nlm_lck_denied_nolocks; + } } return 0; @@ -125,6 +131,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) dprintk("lockd: TEST status %d vers %d\n", ntohl(resp->status), rqstp->rq_vers); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -173,6 +180,7 @@ __nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -210,6 +218,7 @@ __nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp) resp->status = cast_status(nlmsvc_cancel_blocked(net, file, &argp->lock)); dprintk("lockd: CANCEL status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -250,6 +259,7 @@ __nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp) resp->status = cast_status(nlmsvc_unlock(net, file, &argp->lock)); dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -408,6 +418,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp) resp->status = cast_status(nlmsvc_share_file(host, file, argp)); dprintk("lockd: SHARE status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -442,6 +453,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp) resp->status = cast_status(nlmsvc_unshare_file(host, file, argp)); dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 0e610f422406..028fc152da22 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -180,7 +180,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, /* update current lock count */ file->f_locks++; - lockhost = (struct nlm_host *) fl->fl_owner; + lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host; if (match(lockhost, host)) { struct file_lock lock = *fl; diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 7147e4aebecc..982629f7b120 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -126,8 +126,6 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock) lock->svid = ntohl(*p++); locks_init_lock(fl); - fl->fl_owner = current->files; - fl->fl_pid = (pid_t)lock->svid; fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; /* as good as anything else */ start = ntohl(*p++); @@ -269,7 +267,6 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); lock->svid = ~(u32) 0; - lock->fl.fl_pid = (pid_t)lock->svid; if (!(p = nlm_decode_cookie(p, &argp->cookie)) || !(p = xdr_decode_string_inplace(p, &lock->caller, diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 7ed9edf9aed4..5fa9f48a9dba 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -118,8 +118,6 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock) lock->svid = ntohl(*p++); locks_init_lock(fl); - fl->fl_owner = current->files; - fl->fl_pid = (pid_t)lock->svid; fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; /* as good as anything else */ p = xdr_decode_hyper(p, &start); @@ -266,7 +264,6 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); lock->svid = ~(u32) 0; - lock->fl.fl_pid = (pid_t)lock->svid; if (!(p = nlm4_decode_cookie(p, &argp->cookie)) || !(p = xdr_decode_string_inplace(p, &lock->caller, diff --git a/fs/locks.c b/fs/locks.c index de87a3231789..686eae21daf6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -658,9 +658,6 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) */ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) { - if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner) - return fl2->fl_lmops == fl1->fl_lmops && - fl1->fl_lmops->lm_compare_owner(fl1, fl2); return fl1->fl_owner == fl2->fl_owner; } @@ -701,8 +698,6 @@ static void locks_delete_global_locks(struct file_lock *fl) static unsigned long posix_owner_key(struct file_lock *fl) { - if (fl->fl_lmops && fl->fl_lmops->lm_owner_key) - return fl->fl_lmops->lm_owner_key(fl); return (unsigned long)fl->fl_owner; } diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 4fb1f72a25fb..66d4c55eb48e 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -121,15 +121,13 @@ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp, { loff_t new_size = lcp->lc_last_wr + 1; struct iattr iattr = { .ia_valid = 0 }; - struct timespec ts; int error; - ts = timespec64_to_timespec(inode->i_mtime); if (lcp->lc_mtime.tv_nsec == UTIME_NOW || - timespec_compare(&lcp->lc_mtime, &ts) < 0) - lcp->lc_mtime = timespec64_to_timespec(current_time(inode)); + timespec64_compare(&lcp->lc_mtime, &inode->i_mtime) < 0) + lcp->lc_mtime = current_time(inode); iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; - iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = timespec_to_timespec64(lcp->lc_mtime); + iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; if (new_size > i_size_read(inode)) { iattr.ia_valid |= ATTR_SIZE; diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 4a98537efb0f..10ec5ecdf117 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -10,6 +10,7 @@ #define NFSCACHE_H #include +#include "netns.h" /* * Representation of a reply cache entry. @@ -77,8 +78,8 @@ enum { /* Checksum this amount of the request */ #define RC_CSUMLEN (256U) -int nfsd_reply_cache_init(void); -void nfsd_reply_cache_shutdown(void); +int nfsd_reply_cache_init(struct nfsd_net *); +void nfsd_reply_cache_shutdown(struct nfsd_net *); int nfsd_cache_lookup(struct svc_rqst *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); int nfsd_reply_cache_stats_open(struct inode *, struct file *); diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 7c686a270d60..bdfe5bcb3dcd 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -42,6 +42,11 @@ struct nfsd_net { bool grace_ended; time_t boot_time; + /* internal mount of the "nfsd" pseudofilesystem: */ + struct vfsmount *nfsd_mnt; + + struct dentry *nfsd_client_dir; + /* * reclaim_str_hashtbl[] holds known client info from previous reset/reboot * used in reboot/reset lease grace period processing @@ -106,6 +111,7 @@ struct nfsd_net { */ unsigned int max_connections; + u32 clientid_base; u32 clientid_counter; u32 clverifier_counter; @@ -127,6 +133,44 @@ struct nfsd_net { */ bool *nfsd_versions; bool *nfsd4_minorversions; + + /* + * Duplicate reply cache + */ + struct nfsd_drc_bucket *drc_hashtbl; + struct kmem_cache *drc_slab; + + /* max number of entries allowed in the cache */ + unsigned int max_drc_entries; + + /* number of significant bits in the hash value */ + unsigned int maskbits; + unsigned int drc_hashsize; + + /* + * Stats and other tracking of on the duplicate reply cache. + * These fields and the "rc" fields in nfsdstats are modified + * with only the per-bucket cache lock, which isn't really safe + * and should be fixed if we want the statistics to be + * completely accurate. + */ + + /* total number of entries */ + atomic_t num_drc_entries; + + /* cache misses due only to checksum comparison failures */ + unsigned int payload_misses; + + /* amount of memory (in bytes) currently consumed by the DRC */ + unsigned int drc_mem_usage; + + /* longest hash chain seen */ + unsigned int longest_chain; + + /* size of cache when we saw the longest hash chain */ + unsigned int longest_chain_cachesize; + + struct shrinker nfsd_reply_cache_shrinker; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 2961016097ac..d1f285245af8 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -83,7 +83,7 @@ ent_init(struct cache_head *cnew, struct cache_head *citm) new->type = itm->type; strlcpy(new->name, itm->name, sizeof(new->name)); - strlcpy(new->authname, itm->authname, sizeof(new->name)); + strlcpy(new->authname, itm->authname, sizeof(new->authname)); } static void diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1a0cdeb3b875..7857942c5ca6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -99,6 +100,13 @@ enum nfsd4_st_mutex_lock_subclass { */ static DECLARE_WAIT_QUEUE_HEAD(close_wq); +/* + * A waitqueue where a writer to clients/#/ctl destroying a client can + * wait for cl_rpc_users to drop to 0 and then for the client to be + * unhashed. + */ +static DECLARE_WAIT_QUEUE_HEAD(expiry_wq); + static struct kmem_cache *client_slab; static struct kmem_cache *openowner_slab; static struct kmem_cache *lockowner_slab; @@ -138,7 +146,7 @@ static __be32 get_client_locked(struct nfs4_client *clp) if (is_client_expired(clp)) return nfserr_expired; - atomic_inc(&clp->cl_refcount); + atomic_inc(&clp->cl_rpc_users); return nfs_ok; } @@ -170,20 +178,24 @@ static void put_client_renew_locked(struct nfs4_client *clp) lockdep_assert_held(&nn->client_lock); - if (!atomic_dec_and_test(&clp->cl_refcount)) + if (!atomic_dec_and_test(&clp->cl_rpc_users)) return; if (!is_client_expired(clp)) renew_client_locked(clp); + else + wake_up_all(&expiry_wq); } static void put_client_renew(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) + if (!atomic_dec_and_lock(&clp->cl_rpc_users, &nn->client_lock)) return; if (!is_client_expired(clp)) renew_client_locked(clp); + else + wake_up_all(&expiry_wq); spin_unlock(&nn->client_lock); } @@ -694,7 +706,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla idr_preload(GFP_KERNEL); spin_lock(&cl->cl_lock); - new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT); + /* Reserving 0 for start of file in nfsdfs "states" file: */ + new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 1, 0, GFP_NOWAIT); spin_unlock(&cl->cl_lock); idr_preload_end(); if (new_id < 0) @@ -1844,7 +1857,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; - clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); + xdr_netobj_dup(&clp->cl_name, &name, GFP_KERNEL); if (clp->cl_name.data == NULL) goto err_no_name; clp->cl_ownerstr_hashtbl = kmalloc_array(OWNER_HASH_SIZE, @@ -1854,10 +1867,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) goto err_no_hashtbl; for (i = 0; i < OWNER_HASH_SIZE; i++) INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]); - clp->cl_name.len = name.len; INIT_LIST_HEAD(&clp->cl_sessions); idr_init(&clp->cl_stateids); - atomic_set(&clp->cl_refcount, 0); + atomic_set(&clp->cl_rpc_users, 0); clp->cl_cb_state = NFSD4_CB_UNKNOWN; INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_openowners); @@ -1879,6 +1891,25 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) return NULL; } +static void __free_client(struct kref *k) +{ + struct nfsdfs_client *c = container_of(k, struct nfsdfs_client, cl_ref); + struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs); + + free_svc_cred(&clp->cl_cred); + kfree(clp->cl_ownerstr_hashtbl); + kfree(clp->cl_name.data); + kfree(clp->cl_nii_domain.data); + kfree(clp->cl_nii_name.data); + idr_destroy(&clp->cl_stateids); + kmem_cache_free(client_slab, clp); +} + +static void drop_client(struct nfs4_client *clp) +{ + kref_put(&clp->cl_nfsdfs.cl_ref, __free_client); +} + static void free_client(struct nfs4_client *clp) { @@ -1891,11 +1922,12 @@ free_client(struct nfs4_client *clp) free_session(ses); } rpc_destroy_wait_queue(&clp->cl_cb_waitq); - free_svc_cred(&clp->cl_cred); - kfree(clp->cl_ownerstr_hashtbl); - kfree(clp->cl_name.data); - idr_destroy(&clp->cl_stateids); - kmem_cache_free(client_slab, clp); + if (clp->cl_nfsd_dentry) { + nfsd_client_rmdir(clp->cl_nfsd_dentry); + clp->cl_nfsd_dentry = NULL; + wake_up_all(&expiry_wq); + } + drop_client(clp); } /* must be called under the client_lock */ @@ -1936,7 +1968,7 @@ unhash_client(struct nfs4_client *clp) static __be32 mark_client_expired_locked(struct nfs4_client *clp) { - if (atomic_read(&clp->cl_refcount)) + if (atomic_read(&clp->cl_rpc_users)) return nfserr_jukebox; unhash_client_locked(clp); return nfs_ok; @@ -1989,6 +2021,7 @@ __destroy_client(struct nfs4_client *clp) if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); free_client(clp); + wake_up_all(&expiry_wq); } static void @@ -2199,6 +2232,342 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) return s; } +static struct nfs4_client *get_nfsdfs_clp(struct inode *inode) +{ + struct nfsdfs_client *nc; + nc = get_nfsdfs_client(inode); + if (!nc) + return NULL; + return container_of(nc, struct nfs4_client, cl_nfsdfs); +} + +static void seq_quote_mem(struct seq_file *m, char *data, int len) +{ + seq_printf(m, "\""); + seq_escape_mem_ascii(m, data, len); + seq_printf(m, "\""); +} + +static int client_info_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct nfs4_client *clp; + u64 clid; + + clp = get_nfsdfs_clp(inode); + if (!clp) + return -ENXIO; + memcpy(&clid, &clp->cl_clientid, sizeof(clid)); + seq_printf(m, "clientid: 0x%llx\n", clid); + seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr); + seq_printf(m, "name: "); + seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len); + seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion); + if (clp->cl_nii_domain.data) { + seq_printf(m, "Implementation domain: "); + seq_quote_mem(m, clp->cl_nii_domain.data, + clp->cl_nii_domain.len); + seq_printf(m, "\nImplementation name: "); + seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len); + seq_printf(m, "\nImplementation time: [%ld, %ld]\n", + clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec); + } + drop_client(clp); + + return 0; +} + +static int client_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, client_info_show, inode); +} + +static const struct file_operations client_info_fops = { + .open = client_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void *states_start(struct seq_file *s, loff_t *pos) + __acquires(&clp->cl_lock) +{ + struct nfs4_client *clp = s->private; + unsigned long id = *pos; + void *ret; + + spin_lock(&clp->cl_lock); + ret = idr_get_next_ul(&clp->cl_stateids, &id); + *pos = id; + return ret; +} + +static void *states_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct nfs4_client *clp = s->private; + unsigned long id = *pos; + void *ret; + + id = *pos; + id++; + ret = idr_get_next_ul(&clp->cl_stateids, &id); + *pos = id; + return ret; +} + +static void states_stop(struct seq_file *s, void *v) + __releases(&clp->cl_lock) +{ + struct nfs4_client *clp = s->private; + + spin_unlock(&clp->cl_lock); +} + +static void nfs4_show_superblock(struct seq_file *s, struct file *f) +{ + struct inode *inode = file_inode(f); + + seq_printf(s, "superblock: \"%02x:%02x:%ld\"", + MAJOR(inode->i_sb->s_dev), + MINOR(inode->i_sb->s_dev), + inode->i_ino); +} + +static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo) +{ + seq_printf(s, "owner: "); + seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len); +} + +static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) +{ + struct nfs4_ol_stateid *ols; + struct nfs4_file *nf; + struct file *file; + struct nfs4_stateowner *oo; + unsigned int access, deny; + + if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID) + return 0; /* XXX: or SEQ_SKIP? */ + ols = openlockstateid(st); + oo = ols->st_stateowner; + nf = st->sc_file; + file = find_any_file(nf); + + seq_printf(s, "- 0x%16phN: { type: open, ", &st->sc_stateid); + + access = bmap_to_share_mode(ols->st_access_bmap); + deny = bmap_to_share_mode(ols->st_deny_bmap); + + seq_printf(s, "access: \%s\%s, ", + access & NFS4_SHARE_ACCESS_READ ? "r" : "-", + access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); + seq_printf(s, "deny: \%s\%s, ", + deny & NFS4_SHARE_ACCESS_READ ? "r" : "-", + deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); + + nfs4_show_superblock(s, file); + seq_printf(s, ", "); + nfs4_show_owner(s, oo); + seq_printf(s, " }\n"); + fput(file); + + return 0; +} + +static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) +{ + struct nfs4_ol_stateid *ols; + struct nfs4_file *nf; + struct file *file; + struct nfs4_stateowner *oo; + + ols = openlockstateid(st); + oo = ols->st_stateowner; + nf = st->sc_file; + file = find_any_file(nf); + + seq_printf(s, "- 0x%16phN: { type: lock, ", &st->sc_stateid); + + /* + * Note: a lock stateid isn't really the same thing as a lock, + * it's the locking state held by one owner on a file, and there + * may be multiple (or no) lock ranges associated with it. + * (Same for the matter is true of open stateids.) + */ + + nfs4_show_superblock(s, file); + /* XXX: open stateid? */ + seq_printf(s, ", "); + nfs4_show_owner(s, oo); + seq_printf(s, " }\n"); + fput(file); + + return 0; +} + +static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) +{ + struct nfs4_delegation *ds; + struct nfs4_file *nf; + struct file *file; + + ds = delegstateid(st); + nf = st->sc_file; + file = nf->fi_deleg_file; + + seq_printf(s, "- 0x%16phN: { type: deleg, ", &st->sc_stateid); + + /* Kinda dead code as long as we only support read delegs: */ + seq_printf(s, "access: %s, ", + ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w"); + + /* XXX: lease time, whether it's being recalled. */ + + nfs4_show_superblock(s, file); + seq_printf(s, " }\n"); + + return 0; +} + +static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st) +{ + struct nfs4_layout_stateid *ls; + struct file *file; + + ls = container_of(st, struct nfs4_layout_stateid, ls_stid); + file = ls->ls_file; + + seq_printf(s, "- 0x%16phN: { type: layout, ", &st->sc_stateid); + + /* XXX: What else would be useful? */ + + nfs4_show_superblock(s, file); + seq_printf(s, " }\n"); + + return 0; +} + +static int states_show(struct seq_file *s, void *v) +{ + struct nfs4_stid *st = v; + + switch (st->sc_type) { + case NFS4_OPEN_STID: + return nfs4_show_open(s, st); + case NFS4_LOCK_STID: + return nfs4_show_lock(s, st); + case NFS4_DELEG_STID: + return nfs4_show_deleg(s, st); + case NFS4_LAYOUT_STID: + return nfs4_show_layout(s, st); + default: + return 0; /* XXX: or SEQ_SKIP? */ + } + /* XXX: copy stateids? */ +} + +static struct seq_operations states_seq_ops = { + .start = states_start, + .next = states_next, + .stop = states_stop, + .show = states_show +}; + +static int client_states_open(struct inode *inode, struct file *file) +{ + struct seq_file *s; + struct nfs4_client *clp; + int ret; + + clp = get_nfsdfs_clp(inode); + if (!clp) + return -ENXIO; + + ret = seq_open(file, &states_seq_ops); + if (ret) + return ret; + s = file->private_data; + s->private = clp; + return 0; +} + +static int client_opens_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + struct nfs4_client *clp = m->private; + + /* XXX: alternatively, we could get/drop in seq start/stop */ + drop_client(clp); + return 0; +} + +static const struct file_operations client_states_fops = { + .open = client_states_open, + .read = seq_read, + .llseek = seq_lseek, + .release = client_opens_release, +}; + +/* + * Normally we refuse to destroy clients that are in use, but here the + * administrator is telling us to just do it. We also want to wait + * so the caller has a guarantee that the client's locks are gone by + * the time the write returns: + */ +static void force_expire_client(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + bool already_expired; + + spin_lock(&clp->cl_lock); + clp->cl_time = 0; + spin_unlock(&clp->cl_lock); + + wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0); + spin_lock(&nn->client_lock); + already_expired = list_empty(&clp->cl_lru); + if (!already_expired) + unhash_client_locked(clp); + spin_unlock(&nn->client_lock); + + if (!already_expired) + expire_client(clp); + else + wait_event(expiry_wq, clp->cl_nfsd_dentry == NULL); +} + +static ssize_t client_ctl_write(struct file *file, const char __user *buf, + size_t size, loff_t *pos) +{ + char *data; + struct nfs4_client *clp; + + data = simple_transaction_get(file, buf, size); + if (IS_ERR(data)) + return PTR_ERR(data); + if (size != 7 || 0 != memcmp(data, "expire\n", 7)) + return -EINVAL; + clp = get_nfsdfs_clp(file_inode(file)); + if (!clp) + return -ENXIO; + force_expire_client(clp); + drop_client(clp); + return 7; +} + +static const struct file_operations client_ctl_fops = { + .write = client_ctl_write, + .release = simple_transaction_release, +}; + +static const struct tree_descr client_files[] = { + [0] = {"info", &client_info_fops, S_IRUSR}, + [1] = {"states", &client_states_fops, S_IRUSR}, + [2] = {"ctl", &client_ctl_fops, S_IRUSR|S_IWUSR}, + [3] = {""}, +}; + static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { @@ -2206,6 +2575,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, struct sockaddr *sa = svc_addr(rqstp); int ret; struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); clp = alloc_client(name); if (clp == NULL) @@ -2216,13 +2586,22 @@ static struct nfs4_client *create_client(struct xdr_netobj name, free_client(clp); return NULL; } + gen_clid(clp, nn); + kref_init(&clp->cl_nfsdfs.cl_ref); nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); - rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); + memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); clp->cl_cb_session = NULL; clp->net = net; + clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs, + clp->cl_clientid.cl_id - nn->clientid_base, + client_files); + if (!clp->cl_nfsd_dentry) { + free_client(clp); + return NULL; + } return clp; } @@ -2533,6 +2912,22 @@ static bool client_has_state(struct nfs4_client *clp) || !list_empty(&clp->async_copies); } +static __be32 copy_impl_id(struct nfs4_client *clp, + struct nfsd4_exchange_id *exid) +{ + if (!exid->nii_domain.data) + return 0; + xdr_netobj_dup(&clp->cl_nii_domain, &exid->nii_domain, GFP_KERNEL); + if (!clp->cl_nii_domain.data) + return nfserr_jukebox; + xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL); + if (!clp->cl_nii_name.data) + return nfserr_jukebox; + clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec; + clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec; + return 0; +} + __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -2559,6 +2954,9 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, new = create_client(exid->clname, rqstp, &verf); if (new == NULL) return nfserr_jukebox; + status = copy_impl_id(new, exid); + if (status) + goto out_nolock; switch (exid->spa_how) { case SP4_MACH_CRED: @@ -2667,7 +3065,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0]; new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1]; - gen_clid(new, nn); add_to_unconfirmed(new); swap(new, conf); out_copy: @@ -3411,7 +3808,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, copy_clid(new, conf); gen_confirm(new, nn); } else /* case 4 (new client) or cases 2, 3 (client reboot): */ - gen_clid(new, nn); + ; new->cl_minorversion = 0; gen_callback(new, setclid, rqstp); add_to_unconfirmed(new); @@ -3632,12 +4029,11 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj if (!sop) return NULL; - sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL); + xdr_netobj_dup(&sop->so_owner, owner, GFP_KERNEL); if (!sop->so_owner.data) { kmem_cache_free(slab, sop); return NULL; } - sop->so_owner.len = owner->len; INIT_LIST_HEAD(&sop->so_stateids); sop->so_client = clp; @@ -4092,7 +4488,7 @@ static __be32 lookup_clientid(clientid_t *clid, spin_unlock(&nn->client_lock); return nfserr_expired; } - atomic_inc(&found->cl_refcount); + atomic_inc(&found->cl_rpc_users); spin_unlock(&nn->client_lock); /* Cache the nfs4_client in cstate! */ @@ -5725,12 +6121,11 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) if (fl->fl_lmops == &nfsd_posix_mng_ops) { lo = (struct nfs4_lockowner *) fl->fl_owner; - deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data, - lo->lo_owner.so_owner.len, GFP_KERNEL); + xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner, + GFP_KERNEL); if (!deny->ld_owner.data) /* We just don't care that much */ goto nevermind; - deny->ld_owner.len = lo->lo_owner.so_owner.len; deny->ld_clientid = lo->lo_owner.so_client->cl_clientid; } else { nevermind: @@ -6584,7 +6979,7 @@ nfs4_check_open_reclaim(clientid_t *clid, static inline void put_client(struct nfs4_client *clp) { - atomic_dec(&clp->cl_refcount); + atomic_dec(&clp->cl_rpc_users); } static struct nfs4_client * @@ -6702,7 +7097,7 @@ nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst, return; lockdep_assert_held(&nn->client_lock); - atomic_inc(&clp->cl_refcount); + atomic_inc(&clp->cl_rpc_users); list_add(&lst->st_locks, collect); } @@ -6731,7 +7126,7 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, * Despite the fact that these functions deal * with 64-bit integers for "count", we must * ensure that it doesn't blow up the - * clp->cl_refcount. Throw a warning if we + * clp->cl_rpc_users. Throw a warning if we * start to approach INT_MAX here. */ WARN_ON_ONCE(count == (INT_MAX / 2)); @@ -6855,7 +7250,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max, if (func) { func(oop); if (collect) { - atomic_inc(&clp->cl_refcount); + atomic_inc(&clp->cl_rpc_users); list_add(&oop->oo_perclient, collect); } } @@ -6863,7 +7258,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max, /* * Despite the fact that these functions deal with * 64-bit integers for "count", we must ensure that - * it doesn't blow up the clp->cl_refcount. Throw a + * it doesn't blow up the clp->cl_rpc_users. Throw a * warning if we start to approach INT_MAX here. */ WARN_ON_ONCE(count == (INT_MAX / 2)); @@ -6993,7 +7388,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, if (dp->dl_time != 0) continue; - atomic_inc(&clp->cl_refcount); + atomic_inc(&clp->cl_rpc_users); WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, victims); } @@ -7001,7 +7396,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, /* * Despite the fact that these functions deal with * 64-bit integers for "count", we must ensure that - * it doesn't blow up the clp->cl_refcount. Throw a + * it doesn't blow up the clp->cl_rpc_users. Throw a * warning if we start to approach INT_MAX here. */ WARN_ON_ONCE(count == (INT_MAX / 2)); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 52c4f6daa649..442811809f3d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -269,19 +269,13 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) return ret; } -/* - * We require the high 32 bits of 'seconds' to be 0, and - * we ignore all 32 bits of 'nseconds'. - */ static __be32 -nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv) +nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv) { DECODE_HEAD; - u64 sec; READ_BUF(12); - p = xdr_decode_hyper(p, &sec); - tv->tv_sec = sec; + p = xdr_decode_hyper(p, &tv->tv_sec); tv->tv_nsec = be32_to_cpup(p++); if (tv->tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -320,7 +314,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, struct nfs4_acl **acl, struct xdr_netobj *label, int *umask) { - struct timespec ts; int expected_len, len = 0; u32 dummy32; char *buf; @@ -422,8 +415,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: len += 12; - status = nfsd4_decode_time(argp, &ts); - iattr->ia_atime = timespec_to_timespec64(ts); + status = nfsd4_decode_time(argp, &iattr->ia_atime); if (status) return status; iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); @@ -442,8 +434,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: len += 12; - status = nfsd4_decode_time(argp, &ts); - iattr->ia_mtime = timespec_to_timespec64(ts); + status = nfsd4_decode_time(argp, &iattr->ia_mtime); if (status) return status; iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); @@ -1398,7 +1389,6 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, goto xdr_error; } - /* Ignore Implementation ID */ READ_BUF(4); /* nfs_impl_id4 array length */ dummy = be32_to_cpup(p++); @@ -1406,21 +1396,19 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, goto xdr_error; if (dummy == 1) { - /* nii_domain */ - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); + status = nfsd4_decode_opaque(argp, &exid->nii_domain); + if (status) + goto xdr_error; /* nii_name */ - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); + status = nfsd4_decode_opaque(argp, &exid->nii_name); + if (status) + goto xdr_error; /* nii_date */ - READ_BUF(12); - p += 3; + status = nfsd4_decode_time(argp, &exid->nii_time); + if (status) + goto xdr_error; } DECODE_TAIL; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index da52b594362a..26ad75ae2be0 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -9,6 +9,7 @@ * Copyright (C) 1995, 1996 Olaf Kirch */ +#include #include #include #include @@ -35,48 +36,12 @@ struct nfsd_drc_bucket { spinlock_t cache_lock; }; -static struct nfsd_drc_bucket *drc_hashtbl; -static struct kmem_cache *drc_slab; - -/* max number of entries allowed in the cache */ -static unsigned int max_drc_entries; - -/* number of significant bits in the hash value */ -static unsigned int maskbits; -static unsigned int drc_hashsize; - -/* - * Stats and other tracking of on the duplicate reply cache. All of these and - * the "rc" fields in nfsdstats are protected by the cache_lock - */ - -/* total number of entries */ -static atomic_t num_drc_entries; - -/* cache misses due only to checksum comparison failures */ -static unsigned int payload_misses; - -/* amount of memory (in bytes) currently consumed by the DRC */ -static unsigned int drc_mem_usage; - -/* longest hash chain seen */ -static unsigned int longest_chain; - -/* size of cache when we saw the longest hash chain */ -static unsigned int longest_chain_cachesize; - static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc); static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc); -static struct shrinker nfsd_reply_cache_shrinker = { - .scan_objects = nfsd_reply_cache_scan, - .count_objects = nfsd_reply_cache_count, - .seeks = 1, -}; - /* * Put a cap on the size of the DRC based on the amount of available * low memory in the machine. @@ -94,6 +59,9 @@ static struct shrinker nfsd_reply_cache_shrinker = { * ...with a hard cap of 256k entries. In the worst case, each entry will be * ~1k, so the above numbers should give a rough max of the amount of memory * used in k. + * + * XXX: these limits are per-container, so memory used will increase + * linearly with number of containers. Maybe that's OK. */ static unsigned int nfsd_cache_size_limit(void) @@ -116,17 +84,18 @@ nfsd_hashsize(unsigned int limit) } static u32 -nfsd_cache_hash(__be32 xid) +nfsd_cache_hash(__be32 xid, struct nfsd_net *nn) { - return hash_32(be32_to_cpu(xid), maskbits); + return hash_32(be32_to_cpu(xid), nn->maskbits); } static struct svc_cacherep * -nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum) +nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum, + struct nfsd_net *nn) { struct svc_cacherep *rp; - rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); + rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL); if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; @@ -147,91 +116,101 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum) } static void -nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) +nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, + struct nfsd_net *nn) { if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { - drc_mem_usage -= rp->c_replvec.iov_len; + nn->drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); } if (rp->c_state != RC_UNUSED) { rb_erase(&rp->c_node, &b->rb_head); list_del(&rp->c_lru); - atomic_dec(&num_drc_entries); - drc_mem_usage -= sizeof(*rp); + atomic_dec(&nn->num_drc_entries); + nn->drc_mem_usage -= sizeof(*rp); } - kmem_cache_free(drc_slab, rp); + kmem_cache_free(nn->drc_slab, rp); } static void -nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) +nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, + struct nfsd_net *nn) { spin_lock(&b->cache_lock); - nfsd_reply_cache_free_locked(b, rp); + nfsd_reply_cache_free_locked(b, rp, nn); spin_unlock(&b->cache_lock); } -int nfsd_reply_cache_init(void) +int nfsd_reply_cache_init(struct nfsd_net *nn) { unsigned int hashsize; unsigned int i; int status = 0; - max_drc_entries = nfsd_cache_size_limit(); - atomic_set(&num_drc_entries, 0); - hashsize = nfsd_hashsize(max_drc_entries); - maskbits = ilog2(hashsize); + nn->max_drc_entries = nfsd_cache_size_limit(); + atomic_set(&nn->num_drc_entries, 0); + hashsize = nfsd_hashsize(nn->max_drc_entries); + nn->maskbits = ilog2(hashsize); - status = register_shrinker(&nfsd_reply_cache_shrinker); + nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; + nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; + nn->nfsd_reply_cache_shrinker.seeks = 1; + status = register_shrinker(&nn->nfsd_reply_cache_shrinker); if (status) - return status; - - drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), - 0, 0, NULL); - if (!drc_slab) goto out_nomem; - drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL); - if (!drc_hashtbl) { - drc_hashtbl = vzalloc(array_size(hashsize, - sizeof(*drc_hashtbl))); - if (!drc_hashtbl) - goto out_nomem; + nn->drc_slab = kmem_cache_create("nfsd_drc", + sizeof(struct svc_cacherep), 0, 0, NULL); + if (!nn->drc_slab) + goto out_shrinker; + + nn->drc_hashtbl = kcalloc(hashsize, + sizeof(*nn->drc_hashtbl), GFP_KERNEL); + if (!nn->drc_hashtbl) { + nn->drc_hashtbl = vzalloc(array_size(hashsize, + sizeof(*nn->drc_hashtbl))); + if (!nn->drc_hashtbl) + goto out_slab; } for (i = 0; i < hashsize; i++) { - INIT_LIST_HEAD(&drc_hashtbl[i].lru_head); - spin_lock_init(&drc_hashtbl[i].cache_lock); + INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head); + spin_lock_init(&nn->drc_hashtbl[i].cache_lock); } - drc_hashsize = hashsize; + nn->drc_hashsize = hashsize; return 0; +out_slab: + kmem_cache_destroy(nn->drc_slab); +out_shrinker: + unregister_shrinker(&nn->nfsd_reply_cache_shrinker); out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); - nfsd_reply_cache_shutdown(); return -ENOMEM; } -void nfsd_reply_cache_shutdown(void) +void nfsd_reply_cache_shutdown(struct nfsd_net *nn) { struct svc_cacherep *rp; unsigned int i; - unregister_shrinker(&nfsd_reply_cache_shrinker); + unregister_shrinker(&nn->nfsd_reply_cache_shrinker); - for (i = 0; i < drc_hashsize; i++) { - struct list_head *head = &drc_hashtbl[i].lru_head; + for (i = 0; i < nn->drc_hashsize; i++) { + struct list_head *head = &nn->drc_hashtbl[i].lru_head; while (!list_empty(head)) { rp = list_first_entry(head, struct svc_cacherep, c_lru); - nfsd_reply_cache_free_locked(&drc_hashtbl[i], rp); + nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i], + rp, nn); } } - kvfree(drc_hashtbl); - drc_hashtbl = NULL; - drc_hashsize = 0; + kvfree(nn->drc_hashtbl); + nn->drc_hashtbl = NULL; + nn->drc_hashsize = 0; - kmem_cache_destroy(drc_slab); - drc_slab = NULL; + kmem_cache_destroy(nn->drc_slab); + nn->drc_slab = NULL; } /* @@ -246,7 +225,7 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) } static long -prune_bucket(struct nfsd_drc_bucket *b) +prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) { struct svc_cacherep *rp, *tmp; long freed = 0; @@ -258,10 +237,10 @@ prune_bucket(struct nfsd_drc_bucket *b) */ if (rp->c_state == RC_INPROG) continue; - if (atomic_read(&num_drc_entries) <= max_drc_entries && + if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries && time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; - nfsd_reply_cache_free_locked(b, rp); + nfsd_reply_cache_free_locked(b, rp, nn); freed++; } return freed; @@ -272,18 +251,18 @@ prune_bucket(struct nfsd_drc_bucket *b) * Also prune the oldest ones when the total exceeds the max number of entries. */ static long -prune_cache_entries(void) +prune_cache_entries(struct nfsd_net *nn) { unsigned int i; long freed = 0; - for (i = 0; i < drc_hashsize; i++) { - struct nfsd_drc_bucket *b = &drc_hashtbl[i]; + for (i = 0; i < nn->drc_hashsize; i++) { + struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i]; if (list_empty(&b->lru_head)) continue; spin_lock(&b->cache_lock); - freed += prune_bucket(b); + freed += prune_bucket(b, nn); spin_unlock(&b->cache_lock); } return freed; @@ -292,13 +271,19 @@ prune_cache_entries(void) static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - return atomic_read(&num_drc_entries); + struct nfsd_net *nn = container_of(shrink, + struct nfsd_net, nfsd_reply_cache_shrinker); + + return atomic_read(&nn->num_drc_entries); } static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { - return prune_cache_entries(); + struct nfsd_net *nn = container_of(shrink, + struct nfsd_net, nfsd_reply_cache_shrinker); + + return prune_cache_entries(nn); } /* * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes @@ -334,11 +319,12 @@ nfsd_cache_csum(struct svc_rqst *rqstp) } static int -nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp) +nfsd_cache_key_cmp(const struct svc_cacherep *key, + const struct svc_cacherep *rp, struct nfsd_net *nn) { if (key->c_key.k_xid == rp->c_key.k_xid && key->c_key.k_csum != rp->c_key.k_csum) - ++payload_misses; + ++nn->payload_misses; return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key)); } @@ -349,7 +335,8 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp * inserts an empty key on failure. */ static struct svc_cacherep * -nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key) +nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key, + struct nfsd_net *nn) { struct svc_cacherep *rp, *ret = key; struct rb_node **p = &b->rb_head.rb_node, @@ -362,7 +349,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key) parent = *p; rp = rb_entry(parent, struct svc_cacherep, c_node); - cmp = nfsd_cache_key_cmp(key, rp); + cmp = nfsd_cache_key_cmp(key, rp, nn); if (cmp < 0) p = &parent->rb_left; else if (cmp > 0) @@ -376,14 +363,14 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key) rb_insert_color(&key->c_node, &b->rb_head); out: /* tally hash chain length stats */ - if (entries > longest_chain) { - longest_chain = entries; - longest_chain_cachesize = atomic_read(&num_drc_entries); - } else if (entries == longest_chain) { + if (entries > nn->longest_chain) { + nn->longest_chain = entries; + nn->longest_chain_cachesize = atomic_read(&nn->num_drc_entries); + } else if (entries == nn->longest_chain) { /* prefer to keep the smallest cachesize possible here */ - longest_chain_cachesize = min_t(unsigned int, - longest_chain_cachesize, - atomic_read(&num_drc_entries)); + nn->longest_chain_cachesize = min_t(unsigned int, + nn->longest_chain_cachesize, + atomic_read(&nn->num_drc_entries)); } lru_put_end(b, ret); @@ -400,11 +387,12 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key) int nfsd_cache_lookup(struct svc_rqst *rqstp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp, *found; __be32 xid = rqstp->rq_xid; __wsum csum; - u32 hash = nfsd_cache_hash(xid); - struct nfsd_drc_bucket *b = &drc_hashtbl[hash]; + u32 hash = nfsd_cache_hash(xid, nn); + struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash]; int type = rqstp->rq_cachetype; int rtn = RC_DOIT; @@ -420,16 +408,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) * Since the common case is a cache miss followed by an insert, * preallocate an entry. */ - rp = nfsd_reply_cache_alloc(rqstp, csum); + rp = nfsd_reply_cache_alloc(rqstp, csum, nn); if (!rp) { dprintk("nfsd: unable to allocate DRC entry!\n"); return rtn; } spin_lock(&b->cache_lock); - found = nfsd_cache_insert(b, rp); + found = nfsd_cache_insert(b, rp, nn); if (found != rp) { - nfsd_reply_cache_free_locked(NULL, rp); + nfsd_reply_cache_free_locked(NULL, rp, nn); rp = found; goto found_entry; } @@ -438,11 +426,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; - atomic_inc(&num_drc_entries); - drc_mem_usage += sizeof(*rp); + atomic_inc(&nn->num_drc_entries); + nn->drc_mem_usage += sizeof(*rp); /* go ahead and prune the cache */ - prune_bucket(b); + prune_bucket(b, nn); out: spin_unlock(&b->cache_lock); return rtn; @@ -477,7 +465,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) break; default: printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - nfsd_reply_cache_free_locked(b, rp); + nfsd_reply_cache_free_locked(b, rp, nn); } goto out; @@ -502,6 +490,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; u32 hash; @@ -512,15 +501,15 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) if (!rp) return; - hash = nfsd_cache_hash(rp->c_key.k_xid); - b = &drc_hashtbl[hash]; + hash = nfsd_cache_hash(rp->c_key.k_xid, nn); + b = &nn->drc_hashtbl[hash]; len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { - nfsd_reply_cache_free(b, rp); + nfsd_reply_cache_free(b, rp, nn); return; } @@ -535,18 +524,18 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) bufsize = len << 2; cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); if (!cachv->iov_base) { - nfsd_reply_cache_free(b, rp); + nfsd_reply_cache_free(b, rp, nn); return; } cachv->iov_len = bufsize; memcpy(cachv->iov_base, statp, bufsize); break; case RC_NOCACHE: - nfsd_reply_cache_free(b, rp); + nfsd_reply_cache_free(b, rp, nn); return; } spin_lock(&b->cache_lock); - drc_mem_usage += bufsize; + nn->drc_mem_usage += bufsize; lru_put_end(b, rp); rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags); rp->c_type = cachetype; @@ -582,21 +571,26 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) */ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) { - seq_printf(m, "max entries: %u\n", max_drc_entries); + struct nfsd_net *nn = v; + + seq_printf(m, "max entries: %u\n", nn->max_drc_entries); seq_printf(m, "num entries: %u\n", - atomic_read(&num_drc_entries)); - seq_printf(m, "hash buckets: %u\n", 1 << maskbits); - seq_printf(m, "mem usage: %u\n", drc_mem_usage); + atomic_read(&nn->num_drc_entries)); + seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits); + seq_printf(m, "mem usage: %u\n", nn->drc_mem_usage); seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache); - seq_printf(m, "payload misses: %u\n", payload_misses); - seq_printf(m, "longest chain len: %u\n", longest_chain); - seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); + seq_printf(m, "payload misses: %u\n", nn->payload_misses); + seq_printf(m, "longest chain len: %u\n", nn->longest_chain); + seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); return 0; } int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file) { - return single_open(file, nfsd_reply_cache_stats_show, NULL); + struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info, + nfsd_net_id); + + return single_open(file, nfsd_reply_cache_stats_show, nn); } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 62c58cfeb8d8..72fad54fc7e5 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "idmap.h" #include "nfsd.h" @@ -53,6 +54,7 @@ enum { NFSD_RecoveryDir, NFSD_V4EndGrace, #endif + NFSD_MaxReserved }; /* @@ -1147,8 +1149,201 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) * populating the filesystem. */ +/* Basically copying rpc_get_inode. */ +static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode) +{ + struct inode *inode = new_inode(sb); + if (!inode) + return NULL; + /* Following advice from simple_fill_super documentation: */ + inode->i_ino = iunique(sb, NFSD_MaxReserved); + inode->i_mode = mode; + inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + switch (mode & S_IFMT) { + case S_IFDIR: + inode->i_fop = &simple_dir_operations; + inode->i_op = &simple_dir_inode_operations; + inc_nlink(inode); + default: + break; + } + return inode; +} + +static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct inode *inode; + + inode = nfsd_get_inode(dir->i_sb, mode); + if (!inode) + return -ENOMEM; + d_add(dentry, inode); + inc_nlink(dir); + fsnotify_mkdir(dir, dentry); + return 0; +} + +static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *ncl, char *name) +{ + struct inode *dir = parent->d_inode; + struct dentry *dentry; + int ret = -ENOMEM; + + inode_lock(dir); + dentry = d_alloc_name(parent, name); + if (!dentry) + goto out_err; + ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600); + if (ret) + goto out_err; + if (ncl) { + d_inode(dentry)->i_private = ncl; + kref_get(&ncl->cl_ref); + } +out: + inode_unlock(dir); + return dentry; +out_err: + dentry = ERR_PTR(ret); + goto out; +} + +static void clear_ncl(struct inode *inode) +{ + struct nfsdfs_client *ncl = inode->i_private; + + inode->i_private = NULL; + synchronize_rcu(); + kref_put(&ncl->cl_ref, ncl->cl_release); +} + + +static struct nfsdfs_client *__get_nfsdfs_client(struct inode *inode) +{ + struct nfsdfs_client *nc = inode->i_private; + + if (nc) + kref_get(&nc->cl_ref); + return nc; +} + +struct nfsdfs_client *get_nfsdfs_client(struct inode *inode) +{ + struct nfsdfs_client *nc; + + rcu_read_lock(); + nc = __get_nfsdfs_client(inode); + rcu_read_unlock(); + return nc; +} +/* from __rpc_unlink */ +static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry) +{ + int ret; + + clear_ncl(d_inode(dentry)); + dget(dentry); + ret = simple_unlink(dir, dentry); + d_delete(dentry); + dput(dentry); + WARN_ON_ONCE(ret); +} + +static void nfsdfs_remove_files(struct dentry *root) +{ + struct dentry *dentry, *tmp; + + list_for_each_entry_safe(dentry, tmp, &root->d_subdirs, d_child) { + if (!simple_positive(dentry)) { + WARN_ON_ONCE(1); /* I think this can't happen? */ + continue; + } + nfsdfs_remove_file(d_inode(root), dentry); + } +} + +/* XXX: cut'n'paste from simple_fill_super; figure out if we could share + * code instead. */ +static int nfsdfs_create_files(struct dentry *root, + const struct tree_descr *files) +{ + struct inode *dir = d_inode(root); + struct inode *inode; + struct dentry *dentry; + int i; + + inode_lock(dir); + for (i = 0; files->name && files->name[0]; i++, files++) { + if (!files->name) + continue; + dentry = d_alloc_name(root, files->name); + if (!dentry) + goto out; + inode = nfsd_get_inode(d_inode(root)->i_sb, + S_IFREG | files->mode); + if (!inode) { + dput(dentry); + goto out; + } + inode->i_fop = files->ops; + inode->i_private = __get_nfsdfs_client(dir); + d_add(dentry, inode); + fsnotify_create(dir, dentry); + } + inode_unlock(dir); + return 0; +out: + nfsdfs_remove_files(root); + inode_unlock(dir); + return -ENOMEM; +} + +/* on success, returns positive number unique to that client. */ +struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, + struct nfsdfs_client *ncl, u32 id, + const struct tree_descr *files) +{ + struct dentry *dentry; + char name[11]; + int ret; + + sprintf(name, "%u", id); + + dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name); + if (IS_ERR(dentry)) /* XXX: tossing errors? */ + return NULL; + ret = nfsdfs_create_files(dentry, files); + if (ret) { + nfsd_client_rmdir(dentry); + return NULL; + } + return dentry; +} + +/* Taken from __rpc_rmdir: */ +void nfsd_client_rmdir(struct dentry *dentry) +{ + struct inode *dir = d_inode(dentry->d_parent); + struct inode *inode = d_inode(dentry); + int ret; + + inode_lock(dir); + nfsdfs_remove_files(dentry); + clear_ncl(inode); + dget(dentry); + ret = simple_rmdir(dir, dentry); + WARN_ON_ONCE(ret); + d_delete(dentry); + inode_unlock(dir); +} + static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + struct dentry *dentry; + int ret; + static const struct tree_descr nfsd_files[] = { [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", @@ -1178,7 +1373,15 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) /* last one */ {""} }; get_net(sb->s_fs_info); - return simple_fill_super(sb, 0x6e667364, nfsd_files); + ret = simple_fill_super(sb, 0x6e667364, nfsd_files); + if (ret) + return ret; + dentry = nfsd_mkdir(sb->s_root, NULL, "clients"); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + nn->nfsd_client_dir = dentry; + return 0; + } static struct dentry *nfsd_mount(struct file_system_type *fs_type, @@ -1232,6 +1435,7 @@ unsigned int nfsd_net_id; static __net_init int nfsd_init_net(struct net *net) { int retval; + struct vfsmount *mnt; struct nfsd_net *nn = net_generic(net, nfsd_net_id); retval = nfsd_export_init(net); @@ -1242,18 +1446,33 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; + retval = nfsd_reply_cache_init(nn); + if (retval) + goto out_drc_error; nn->nfsd4_lease = 90; /* default lease time */ nn->nfsd4_grace = 90; nn->somebody_reclaimed = false; nn->track_reclaim_completes = false; nn->clverifier_counter = prandom_u32(); - nn->clientid_counter = prandom_u32(); + nn->clientid_base = prandom_u32(); + nn->clientid_counter = nn->clientid_base + 1; nn->s2s_cp_cl_id = nn->clientid_counter++; atomic_set(&nn->ntf_refcnt, 0); init_waitqueue_head(&nn->ntf_wq); + + mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL); + if (IS_ERR(mnt)) { + retval = PTR_ERR(mnt); + goto out_mount_err; + } + nn->nfsd_mnt = mnt; return 0; +out_mount_err: + nfsd_reply_cache_shutdown(nn); +out_drc_error: + nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); out_export_error: @@ -1262,6 +1481,10 @@ static __net_init int nfsd_init_net(struct net *net) static __net_exit void nfsd_exit_net(struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + mntput(nn->nfsd_mnt); + nfsd_reply_cache_shutdown(nn); nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); @@ -1295,9 +1518,6 @@ static int __init init_nfsd(void) if (retval) goto out_exit_pnfs; nfsd_stat_init(); /* Statistics */ - retval = nfsd_reply_cache_init(); - if (retval) - goto out_free_stat; nfsd_lockd_init(); /* lockd->nfsd callbacks */ retval = create_proc_exports_entry(); if (retval) @@ -1311,8 +1531,6 @@ static int __init init_nfsd(void) remove_proc_entry("fs/nfs", NULL); out_free_lockd: nfsd_lockd_shutdown(); - nfsd_reply_cache_shutdown(); -out_free_stat: nfsd_stat_shutdown(); nfsd_fault_inject_cleanup(); out_exit_pnfs: @@ -1328,7 +1546,6 @@ static int __init init_nfsd(void) static void __exit exit_nfsd(void) { - nfsd_reply_cache_shutdown(); remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); nfsd_stat_shutdown(); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 24187b5dd638..af2947551e9c 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -22,6 +22,7 @@ #include +#include "netns.h" #include "stats.h" #include "export.h" @@ -86,6 +87,16 @@ int nfsd_pool_stats_release(struct inode *, struct file *); void nfsd_destroy(struct net *net); +struct nfsdfs_client { + struct kref cl_ref; + void (*cl_release)(struct kref *kref); +}; + +struct nfsdfs_client *get_nfsdfs_client(struct inode *); +struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, + struct nfsdfs_client *ncl, u32 id, const struct tree_descr *); +void nfsd_client_rmdir(struct dentry *dentry); + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL extern const struct svc_version nfsd_acl_version2; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 0b74d371ed67..8cb20cab012b 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -39,6 +39,7 @@ #include #include #include "nfsfh.h" +#include "nfsd.h" typedef struct { u32 cl_boot; @@ -316,6 +317,10 @@ struct nfs4_client { clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ u32 cl_minorversion; + /* NFSv4.1 client implementation id: */ + struct xdr_netobj cl_nii_domain; + struct xdr_netobj cl_nii_name; + struct timespec cl_nii_time; /* for v4.0 and v4.1 callbacks: */ struct nfs4_cb_conn cl_cb_conn; @@ -347,9 +352,13 @@ struct nfs4_client { struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ u32 cl_exchange_flags; /* number of rpc's in progress over an associated session: */ - atomic_t cl_refcount; + atomic_t cl_rpc_users; + struct nfsdfs_client cl_nfsdfs; struct nfs4_op_map cl_spo_must_allow; + /* debugging info directory under nfsd/clients/ : */ + struct dentry *cl_nfsd_dentry; + /* for nfs41 callbacks */ /* We currently support a single back channel with a single slot */ unsigned long cl_cb_slot_busy; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fc24ee47eab5..c85783e536d5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -404,7 +404,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, /* * If utimes(2) and friends are called with times not NULL, we should * not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission - * will return EACCESS, when the caller's effective UID does not match + * will return EACCES, when the caller's effective UID does not match * the owner of the file, and the caller is not privileged. In this * situation, we should return EPERM(notify_change will return this). */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index feeb6d4bdffd..d64c870f998a 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -410,6 +410,9 @@ struct nfsd4_exchange_id { int spa_how; u32 spo_must_enforce[3]; u32 spo_must_allow[3]; + struct xdr_netobj nii_domain; + struct xdr_netobj nii_name; + struct timespec64 nii_time; }; struct nfsd4_sequence { @@ -472,7 +475,7 @@ struct nfsd4_layoutcommit { u32 lc_reclaim; /* request */ u32 lc_newoffset; /* request */ u64 lc_last_wr; /* request */ - struct timespec lc_mtime; /* request */ + struct timespec64 lc_mtime; /* request */ u32 lc_layout_type; /* request */ u32 lc_up_len; /* layout length */ void *lc_up_layout; /* decoded by callback */ diff --git a/fs/seq_file.c b/fs/seq_file.c index abe27ec43176..04f09689cd6d 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -384,6 +384,17 @@ void seq_escape(struct seq_file *m, const char *s, const char *esc) } EXPORT_SYMBOL(seq_escape); +void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz) +{ + char *buf; + size_t size = seq_get_buf(m, &buf); + int ret; + + ret = string_escape_mem_ascii(src, isz, buf, size); + seq_commit(m, ret < size ? ret : -1); +} +EXPORT_SYMBOL(seq_escape_mem_ascii); + void seq_vprintf(struct seq_file *m, const char *f, va_list args) { int len; diff --git a/include/linux/fs.h b/include/linux/fs.h index 291c50500ca2..9193f5f6b09d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1019,8 +1019,6 @@ struct file_lock_operations { }; struct lock_manager_operations { - int (*lm_compare_owner)(struct file_lock *, struct file_lock *); - unsigned long (*lm_owner_key)(struct file_lock *); fl_owner_t (*lm_get_owner)(fl_owner_t); void (*lm_put_owner)(fl_owner_t); void (*lm_notify)(struct file_lock *); /* unblock callback */ diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index c9b422dde542..d294dde9e546 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -282,6 +282,7 @@ void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, nlm_host_match_fn_t match); void nlmsvc_grant_reply(struct nlm_cookie *, __be32); void nlmsvc_release_call(struct nlm_rqst *); +void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t); /* * File handling for the server personality @@ -289,6 +290,7 @@ void nlmsvc_release_call(struct nlm_rqst *); __be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **, struct nfs_fh *); void nlm_release_file(struct nlm_file *); +void nlmsvc_release_lockowner(struct nlm_lock *); void nlmsvc_mark_resources(struct net *); void nlmsvc_free_host_resources(struct nlm_host *); void nlmsvc_invalidate_all(void); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index a121982af0f5..5998e1f4ff06 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -127,6 +127,7 @@ void seq_put_hex_ll(struct seq_file *m, const char *delimiter, unsigned long long v, unsigned int width); void seq_escape(struct seq_file *m, const char *s, const char *esc); +void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz); void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index d23c5030901a..c28955132234 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -54,6 +54,9 @@ static inline int string_unescape_any_inplace(char *buf) int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, unsigned int flags, const char *only); +int string_escape_mem_ascii(const char *src, size_t isz, char *dst, + size_t osz); + static inline int string_escape_mem_any_np(const char *src, size_t isz, char *dst, size_t osz, const char *only) { diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 9ee3970ba59c..8a87d8bcb197 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -164,6 +164,13 @@ xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len) return p + XDR_QUADLEN(len); } +static inline void xdr_netobj_dup(struct xdr_netobj *dst, + struct xdr_netobj *src, gfp_t gfp_mask) +{ + dst->data = kmemdup(src->data, src->len, gfp_mask); + dst->len = src->len; +} + /* * Adjust kvec to reflect end of xdr'ed data (RPC client XDR) */ diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 4403e1924f73..3a90a9e2b94a 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -540,6 +540,25 @@ int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, } EXPORT_SYMBOL(string_escape_mem); +int string_escape_mem_ascii(const char *src, size_t isz, char *dst, + size_t osz) +{ + char *p = dst; + char *end = p + osz; + + while (isz--) { + unsigned char c = *src++; + + if (!isprint(c) || !isascii(c) || c == '"' || c == '\\') + escape_hex(c, &p, end); + else + escape_passthrough(c, &p, end); + } + + return p - dst; +} +EXPORT_SYMBOL(string_escape_mem_ascii); + /* * Return an allocated string that has been escaped of special characters * and double quotes, making it safe to log in quotes. diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 66fbb9d2fba7..6f1528f271ee 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1375,7 +1375,6 @@ static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) hlist_first_rcu(&cd->hash_table[hash])), struct cache_head, cache_list); } -EXPORT_SYMBOL_GPL(cache_seq_next); void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos) __acquires(RCU) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 869ce7737997..de3c077733a7 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -35,7 +35,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after * 6 minutes - * http://www.connectathon.org/talks96/nfstcp.pdf + * http://nfsv4bat.org/Documents/ConnectAThon/1996/nfstcp.pdf */ static int svc_conn_age_period = 6*60;