NFS: don't unhash dentry during unlink/rename

NFS unlink() (and rename over existing target) must determine if the
file is open, and must perform a "silly rename" instead of an unlink (or
before rename) if it is.  Otherwise the client might hold a file open
which has been removed on the server.

Consequently if it determines that the file isn't open, it must block
any subsequent opens until the unlink/rename has been completed on the
server.

This is currently achieved by unhashing the dentry.  This forces any
open attempt to the slow-path for lookup which will block on i_rwsem on
the directory until the unlink/rename completes.  A future patch will
change the VFS to only get a shared lock on i_rwsem for unlink, so this
will no longer work.

Instead we introduce an explicit interlock.  A special value is stored
in dentry->d_fsdata while the unlink/rename is running and
->d_revalidate blocks while that value is present.  When ->d_revalidate
unblocks, the dentry will be invalid.  This closes the race
without requiring exclusion on i_rwsem.

d_fsdata is already used in two different ways.
1/ an IS_ROOT directory dentry might have a "devname" stored in
   d_fsdata.  Such a dentry doesn't have a name and so cannot be the
   target of unlink or rename.  For safety we check if an old devname
   is still stored, and remove it if it is.
2/ a dentry with DCACHE_NFSFS_RENAMED set will have a 'struct
   nfs_unlinkdata' stored in d_fsdata.  While this is set maydelete()
   will fail, so an unlink or rename will never proceed on such
   a dentry.

Neither of these can be in effect when a dentry is the target of unlink
or rename.  So we can expect d_fsdata to be NULL, and store a special
value ((void*)1) which is given the name NFS_FSDATA_BLOCKED to indicate
that any lookup will be blocked.

The d_count() is incremented under d_lock() when a lookup finds the
dentry, so we check d_count() is low, and set NFS_FSDATA_BLOCKED under
the same lock to avoid any races.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
This commit is contained in:
NeilBrown 2022-08-01 10:33:34 +10:00 committed by Trond Myklebust
parent 2135e5d562
commit 3c59366c20
2 changed files with 63 additions and 18 deletions

View file

@ -1782,6 +1782,8 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
int ret;
if (flags & LOOKUP_RCU) {
if (dentry->d_fsdata == NFS_FSDATA_BLOCKED)
return -ECHILD;
parent = READ_ONCE(dentry->d_parent);
dir = d_inode_rcu(parent);
if (!dir)
@ -1790,6 +1792,9 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
if (parent != READ_ONCE(dentry->d_parent))
return -ECHILD;
} else {
/* Wait for unlink to complete */
wait_var_event(&dentry->d_fsdata,
dentry->d_fsdata != NFS_FSDATA_BLOCKED);
parent = dget_parent(dentry);
ret = reval(d_inode(parent), dentry, flags);
dput(parent);
@ -2458,7 +2463,6 @@ static int nfs_safe_remove(struct dentry *dentry)
int nfs_unlink(struct inode *dir, struct dentry *dentry)
{
int error;
int need_rehash = 0;
dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
dir->i_ino, dentry);
@ -2473,15 +2477,25 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
error = nfs_sillyrename(dir, dentry);
goto out;
}
if (!d_unhashed(dentry)) {
__d_drop(dentry);
need_rehash = 1;
}
/* We must prevent any concurrent open until the unlink
* completes. ->d_revalidate will wait for ->d_fsdata
* to clear. We set it here to ensure no lookup succeeds until
* the unlink is complete on the server.
*/
error = -ETXTBSY;
if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED))
goto out;
if (dentry->d_fsdata)
/* old devname */
kfree(dentry->d_fsdata);
dentry->d_fsdata = NFS_FSDATA_BLOCKED;
spin_unlock(&dentry->d_lock);
error = nfs_safe_remove(dentry);
nfs_dentry_remove_handle_error(dir, dentry, error);
if (need_rehash)
d_rehash(dentry);
dentry->d_fsdata = NULL;
wake_up_var(&dentry->d_fsdata);
out:
trace_nfs_unlink_exit(dir, dentry, error);
return error;
@ -2588,6 +2602,15 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
}
EXPORT_SYMBOL_GPL(nfs_link);
static void
nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
{
struct dentry *new_dentry = data->new_dentry;
new_dentry->d_fsdata = NULL;
wake_up_var(&new_dentry->d_fsdata);
}
/*
* RENAME
* FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@ -2618,8 +2641,9 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
struct dentry *dentry = NULL, *rehash = NULL;
struct dentry *dentry = NULL;
struct rpc_task *task;
bool must_unblock = false;
int error = -EBUSY;
if (flags)
@ -2637,18 +2661,27 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
* the new target.
*/
if (new_inode && !S_ISDIR(new_inode->i_mode)) {
/*
* To prevent any new references to the target during the
* rename, we unhash the dentry in advance.
/* We must prevent any concurrent open until the unlink
* completes. ->d_revalidate will wait for ->d_fsdata
* to clear. We set it here to ensure no lookup succeeds until
* the unlink is complete on the server.
*/
if (!d_unhashed(new_dentry)) {
d_drop(new_dentry);
rehash = new_dentry;
error = -ETXTBSY;
if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED))
goto out;
if (new_dentry->d_fsdata) {
/* old devname */
kfree(new_dentry->d_fsdata);
new_dentry->d_fsdata = NULL;
}
spin_lock(&new_dentry->d_lock);
if (d_count(new_dentry) > 2) {
int err;
spin_unlock(&new_dentry->d_lock);
/* copy the target dentry's name */
dentry = d_alloc(new_dentry->d_parent,
&new_dentry->d_name);
@ -2661,14 +2694,19 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
goto out;
new_dentry = dentry;
rehash = NULL;
new_inode = NULL;
} else {
new_dentry->d_fsdata = NFS_FSDATA_BLOCKED;
must_unblock = true;
spin_unlock(&new_dentry->d_lock);
}
}
if (S_ISREG(old_inode->i_mode))
nfs_sync_inode(old_inode);
task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
must_unblock ? nfs_unblock_rename : NULL);
if (IS_ERR(task)) {
error = PTR_ERR(task);
goto out;
@ -2692,8 +2730,6 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
spin_unlock(&old_inode->i_lock);
}
out:
if (rehash)
d_rehash(rehash);
trace_nfs_rename_exit(old_dir, old_dentry,
new_dir, new_dentry, error);
if (!error) {

View file

@ -617,6 +617,15 @@ nfs_fileid_to_ino_t(u64 fileid)
#define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
/* We need to block new opens while a file is being unlinked.
* If it is opened *before* we decide to unlink, we will silly-rename
* instead. If it is opened *after*, then we need to create or will fail.
* If we allow the two to race, we could end up with a file that is open
* but deleted on the server resulting in ESTALE.
* So use ->d_fsdata to record when the unlink is happening
* and block dentry revalidation while it is set.
*/
#define NFS_FSDATA_BLOCKED ((void*)1)
# undef ifdebug
# ifdef NFS_DEBUG