For some reason, the wait_on_bit() in nfsd4_deleg_getattr_conflict()
is waiting forever, preventing a clean server shutdown. The
requesting client might also hang waiting for a reply to the
conflicting GETATTR.

Invoking wait_on_bit() in an nfsd thread context is a hazard. The
correct fix is to replace this wait_on_bit() call site with a
mechanism that defers the conflicting GETATTR until the CB_GETATTR
completes or is known to have failed.

That will require some surgery and extended testing and it's late
in the v6.7-rc cycle, so I'm reverting now in favor of trying again
in a subsequent kernel release.

This is my fault: I should have recognized the ramifications of
calling wait_on_bit() in here before accepting this patch.

Thanks to Dai Ngo <dai.ngo@oracle.com> for diagnosing the issue.

Reported-by: Wolfgang Walter <linux-nfs@stwm.de>
Closes: https://lore.kernel.org/linux-nfs/e3d43ecdad554fbdcaa7181833834f78@stwm.de/
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
This commit is contained in:
Chuck Lever 2023-12-16 11:57:43 -05:00
parent 1bd773b4f0
commit 862bee84d7
3 changed files with 15 additions and 119 deletions

View file

@ -127,7 +127,6 @@ static void free_session(struct nfsd4_session *);
static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
static struct workqueue_struct *laundry_wq;
@ -1190,10 +1189,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
dp->dl_recalled = false;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
&nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
&nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
dp->dl_cb_fattr.ncf_file_modified = false;
dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
get_nfs4_file(fp);
dp->dl_stid.sc_file = fp;
return dp;
@ -2901,56 +2896,11 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
spin_unlock(&nn->client_lock);
}
static int
nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
struct nfs4_cb_fattr *ncf =
container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
ncf->ncf_cb_status = task->tk_status;
switch (task->tk_status) {
case -NFS4ERR_DELAY:
rpc_delay(task, 2 * HZ);
return 0;
default:
return 1;
}
}
static void
nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
{
struct nfs4_cb_fattr *ncf =
container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
struct nfs4_delegation *dp =
container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
nfs4_put_stid(&dp->dl_stid);
clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
}
static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
.done = nfsd4_cb_recall_any_done,
.release = nfsd4_cb_recall_any_release,
};
static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
.done = nfsd4_cb_getattr_done,
.release = nfsd4_cb_getattr_release,
};
void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
{
struct nfs4_delegation *dp =
container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
return;
refcount_inc(&dp->dl_stid.sc_count);
nfsd4_run_cb(&ncf->ncf_getattr);
}
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@ -5686,8 +5636,6 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct svc_fh *parent = NULL;
int cb_up;
int status = 0;
struct kstat stat;
struct path path;
cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
open->op_recall = false;
@ -5725,18 +5673,6 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
path.mnt = currentfh->fh_export->ex_path.mnt;
path.dentry = currentfh->fh_dentry;
if (vfs_getattr(&path, &stat,
(STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
AT_STATX_SYNC_AS_STAT)) {
nfs4_put_stid(&dp->dl_stid);
destroy_delegation(dp);
goto out_no_deleg;
}
dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
dp->dl_cb_fattr.ncf_initial_cinfo =
nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
} else {
open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
@ -8489,8 +8425,6 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
* nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
* @rqstp: RPC transaction context
* @inode: file to be checked for a conflict
* @modified: return true if file was modified
* @size: new size of file if modified is true
*
* This function is called when there is a conflict between a write
* delegation and a change/size GETATTR from another client. The server
@ -8499,23 +8433,21 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
* delegation before replying to the GETATTR. See RFC 8881 section
* 18.7.4.
*
* The current implementation does not support CB_GETATTR yet. However
* this can avoid recalling the delegation could be added in follow up
* work.
*
* Returns 0 if there is no conflict; otherwise an nfs_stat
* code is returned.
*/
__be32
nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
bool *modified, u64 *size)
nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
{
struct file_lock_context *ctx;
struct nfs4_delegation *dp;
struct nfs4_cb_fattr *ncf;
struct file_lock *fl;
struct iattr attrs;
__be32 status;
struct file_lock_context *ctx;
struct file_lock *fl;
struct nfs4_delegation *dp;
might_sleep();
*modified = false;
ctx = locks_inode_context(inode);
if (!ctx)
return 0;
@ -8542,34 +8474,10 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
break_lease:
spin_unlock(&ctx->flc_lock);
nfsd_stats_wdeleg_getattr_inc();
dp = fl->fl_owner;
ncf = &dp->dl_cb_fattr;
nfs4_cb_getattr(&dp->dl_cb_fattr);
wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
if (ncf->ncf_cb_status) {
status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
if (status != nfserr_jukebox ||
!nfsd_wait_for_delegreturn(rqstp, inode))
return status;
}
if (!ncf->ncf_file_modified &&
(ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
ncf->ncf_file_modified = true;
if (ncf->ncf_file_modified) {
/*
* The server would not update the file's metadata
* with the client's modified size.
*/
attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
setattr_copy(&nop_mnt_idmap, inode, &attrs);
mark_inode_dirty(inode);
ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
*size = ncf->ncf_cur_fsize;
*modified = true;
}
status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
if (status != nfserr_jukebox ||
!nfsd_wait_for_delegreturn(rqstp, inode))
return status;
return 0;
}
break;

View file

@ -3505,9 +3505,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
u32 attrmask[3];
unsigned long mask[2];
} u;
bool file_modified;
unsigned long bit;
u64 size = 0;
WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1);
WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval));
@ -3534,8 +3532,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
}
args.size = 0;
if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
&file_modified, &size);
status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
if (status)
goto out;
}
@ -3545,7 +3542,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
AT_STATX_SYNC_AS_STAT);
if (err)
goto out_nfserr;
args.size = file_modified ? size : args.stat.size;
args.size = args.stat.size;
if (!(args.stat.result_mask & STATX_BTIME))
/* underlying FS does not offer btime so we can't share it */

View file

@ -125,16 +125,8 @@ struct nfs4_cb_fattr {
/* from CB_GETATTR reply */
u64 ncf_cb_change;
u64 ncf_cb_fsize;
unsigned long ncf_cb_flags;
bool ncf_file_modified;
u64 ncf_initial_cinfo;
u64 ncf_cur_fsize;
};
/* bits for ncf_cb_flags */
#define CB_GETATTR_BUSY 0
/*
* Represents a delegation stateid. The nfs4_client holds references to these
* and they are put when it is being destroyed or when the delegation is
@ -754,6 +746,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
}
extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
struct inode *inode, bool *file_modified, u64 *size);
extern void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf);
struct inode *inode);
#endif /* NFSD4_STATE_H */