From 5f7b839d47dbc74cf4a07beeab5191f93678673e Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Mon, 28 Mar 2022 10:48:59 +0800 Subject: [PATCH 1/5] SUNRPC: Return true/false (not 1/0) from bool functions Return boolean values ("true" or "false") instead of 1 or 0 from bool functions. This fixes the following warnings from coccicheck: ./fs/nfsd/nfs2acl.c:289:9-10: WARNING: return of 0/1 in function 'nfsaclsvc_encode_accessres' with return type bool ./fs/nfsd/nfs2acl.c:252:9-10: WARNING: return of 0/1 in function 'nfsaclsvc_encode_getaclres' with return type bool Signed-off-by: Haowen Bai Signed-off-by: Chuck Lever --- fs/nfsd/nfs2acl.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 367551bddfc6..b5760801d377 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -249,34 +249,34 @@ nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) int w; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; if (dentry == NULL || d_really_is_negative(dentry)) - return 1; + return true; inode = d_inode(dentry); if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->mask) < 0) - return 0; + return false; rqstp->rq_res.page_len = w = nfsacl_size( (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { if (!*(rqstp->rq_next_page++)) - return 1; + return true; w -= PAGE_SIZE; } if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access, resp->mask & NFS_ACL, 0)) - return 0; + return false; if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default, resp->mask & NFS_DFACL, NFS_ACL_DEFAULT)) - return 0; + return false; - return 1; + return true; } /* ACCESS */ @@ -286,17 +286,17 @@ nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) struct nfsd3_accessres *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->access) < 0) - return 0; + return false; break; } - return 1; + return true; } /* From 6b8a94332ee4f7d9a8ae0cbac7609f79c212f06c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Mar 2022 09:54:01 -0400 Subject: [PATCH 2/5] nfsd: Fix a write performance regression The call to filemap_flush() in nfsd_file_put() is there to ensure that we clear out any writes belonging to a NFSv3 client relatively quickly and avoid situations where the file can't be evicted by the garbage collector. It also ensures that we detect write errors quickly. The problem is this causes a regression in performance for some workloads. So try to improve matters by deferring writeback until we're ready to close the file, and need to detect errors so that we can force the client to resend. Tested-by: Jan Kara Fixes: b6669305d35a ("nfsd: Reduce the number of calls to nfsd_file_gc()") Signed-off-by: Trond Myklebust Link: https://lore.kernel.org/all/20220330103457.r4xrhy2d6nhtouzk@quack3.lan Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index cc2831cec669..496f7b3f7523 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -235,6 +235,13 @@ nfsd_file_check_write_error(struct nfsd_file *nf) return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); } +static void +nfsd_file_flush(struct nfsd_file *nf) +{ + if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0) + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); +} + static void nfsd_file_do_unhash(struct nfsd_file *nf) { @@ -302,11 +309,14 @@ nfsd_file_put(struct nfsd_file *nf) return; } - filemap_flush(nf->nf_file->f_mapping); is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; - nfsd_file_put_noref(nf); - if (is_hashed) + if (!is_hashed) { + nfsd_file_flush(nf); + nfsd_file_put_noref(nf); + } else { + nfsd_file_put_noref(nf); nfsd_file_schedule_laundrette(); + } if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) nfsd_file_gc(); } @@ -327,6 +337,7 @@ nfsd_file_dispose_list(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del(&nf->nf_lru); + nfsd_file_flush(nf); nfsd_file_put_noref(nf); } } @@ -340,6 +351,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del(&nf->nf_lru); + nfsd_file_flush(nf); if (!refcount_dec_and_test(&nf->nf_ref)) continue; if (nfsd_file_free(nf)) From 999397926ab3f78c7d1235cc4ca6e3c89d2769bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Mar 2022 09:54:02 -0400 Subject: [PATCH 3/5] nfsd: Clean up nfsd_file_put() Make it a little less racy, by removing the refcount_read() test. Then remove the redundant 'is_hashed' variable. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 496f7b3f7523..8f7ed5dbb003 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -301,21 +301,14 @@ nfsd_file_put_noref(struct nfsd_file *nf) void nfsd_file_put(struct nfsd_file *nf) { - bool is_hashed; - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { - nfsd_file_put_noref(nf); - return; - } - - is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; - if (!is_hashed) { + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { nfsd_file_flush(nf); nfsd_file_put_noref(nf); } else { nfsd_file_put_noref(nf); - nfsd_file_schedule_laundrette(); + if (nf->nf_file) + nfsd_file_schedule_laundrette(); } if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) nfsd_file_gc(); From 773f91b2cf3f52df0d7508fdbf60f37567cdaee4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 1 Apr 2022 17:08:21 -0400 Subject: [PATCH 4/5] SUNRPC: Fix NFSD's request deferral on RDMA transports Trond Myklebust reports an NFSD crash in svc_rdma_sendto(). Further investigation shows that the crash occurred while NFSD was handling a deferred request. This patch addresses two inter-related issues that prevent request deferral from working correctly for RPC/RDMA requests: 1. Prevent the crash by ensuring that the original svc_rqst::rq_xprt_ctxt value is available when the request is revisited. Otherwise svc_rdma_sendto() does not have a Receive context available with which to construct its reply. 2. Possibly since before commit 71641d99ce03 ("svcrdma: Properly compute .len and .buflen for received RPC Calls"), svc_rdma_recvfrom() did not include the transport header in the returned xdr_buf. There should have been no need for svc_defer() and friends to save and restore that header, as of that commit. This issue is addressed in a backport-friendly way by simply having svc_rdma_recvfrom() set rq_xprt_hlen to zero unconditionally, just as svc_tcp_recvfrom() does. This enables svc_deferred_recv() to correctly reconstruct an RPC message received via RPC/RDMA. Reported-by: Trond Myklebust Link: https://lore.kernel.org/linux-nfs/82662b7190f26fb304eb0ab1bb04279072439d4e.camel@hammerspace.com/ Signed-off-by: Chuck Lever Cc: --- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svc_xprt.c | 3 +++ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index a5dda4987e8b..217711fc9cac 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -395,6 +395,7 @@ struct svc_deferred_req { size_t addrlen; struct sockaddr_storage daddr; /* where reply must come from */ size_t daddrlen; + void *xprt_ctxt; struct cache_deferred_req handle; size_t xprt_hlen; int argslen; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 0c117d3bfda8..b42cfffa7395 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1231,6 +1231,8 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) dr->daddr = rqstp->rq_daddr; dr->argslen = rqstp->rq_arg.len >> 2; dr->xprt_hlen = rqstp->rq_xprt_hlen; + dr->xprt_ctxt = rqstp->rq_xprt_ctxt; + rqstp->rq_xprt_ctxt = NULL; /* back up head to the start of the buffer and copy */ skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; @@ -1269,6 +1271,7 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp) rqstp->rq_xprt_hlen = dr->xprt_hlen; rqstp->rq_daddr = dr->daddr; rqstp->rq_respages = rqstp->rq_pages; + rqstp->rq_xprt_ctxt = dr->xprt_ctxt; svc_xprt_received(rqstp->rq_xprt); return (dr->argslen<<2) - dr->xprt_hlen; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index cf76a6ad127b..864131a9fc6e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -831,7 +831,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto out_err; if (ret == 0) goto out_drop; - rqstp->rq_xprt_hlen = ret; + rqstp->rq_xprt_hlen = 0; if (svc_rdma_is_reverse_direction_reply(xprt, ctxt)) goto out_backchannel; From 4d5004451ab2218eab94a30e1841462c9316ba19 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 6 Apr 2022 13:51:32 -0400 Subject: [PATCH 5/5] SUNRPC: Fix the svc_deferred_event trace class Fix a NULL deref crash that occurs when an svc_rqst is deferred while the sunrpc tracing subsystem is enabled. svc_revisit() sets dr->xprt to NULL, so it can't be relied upon in the tracepoint to provide the remote's address. Unfortunately we can't revert the "svc_deferred_class" hunk in commit ece200ddd54b ("sunrpc: Save remote presentation address in svc_xprt for trace events") because there is now a specific check of event format specifiers for unsafe dereferences. The warning that check emits is: event svc_defer_recv has unsafe dereference of argument 1 A "%pISpc" format specifier with a "struct sockaddr *" is indeed flagged by this check. Instead, take the brute-force approach used by the svcrdma_qp_error tracepoint. Convert the dr::addr field into a presentation address in the TP_fast_assign() arm of the trace event, and store that as a string. This fix can be backported to -stable kernels. In the meantime, commit c6ced22997ad ("tracing: Update print fmt check to handle new __get_sockaddr() macro") is now in v5.18, so this wonky fix can be replaced with __sockaddr() and friends properly during the v5.19 merge window. Fixes: ece200ddd54b ("sunrpc: Save remote presentation address in svc_xprt for trace events") Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ab8ae1f6ba84..4eb706fa5825 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -2017,17 +2017,18 @@ DECLARE_EVENT_CLASS(svc_deferred_event, TP_STRUCT__entry( __field(const void *, dr) __field(u32, xid) - __string(addr, dr->xprt->xpt_remotebuf) + __array(__u8, addr, INET6_ADDRSTRLEN + 10) ), TP_fast_assign( __entry->dr = dr; __entry->xid = be32_to_cpu(*(__be32 *)(dr->args + (dr->xprt_hlen>>2))); - __assign_str(addr, dr->xprt->xpt_remotebuf); + snprintf(__entry->addr, sizeof(__entry->addr) - 1, + "%pISpc", (struct sockaddr *)&dr->addr); ), - TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr, + TP_printk("addr=%s dr=%p xid=0x%08x", __entry->addr, __entry->dr, __entry->xid) );