NFSD 6.9 Release Notes

The bulk of the patches for this release are optimizations, code
 clean-ups, and minor bug fixes.
 
 One new feature to mention is that NFSD administrators now have the
 ability to revoke NFSv4 open and lock state. NFSD's NFSv3 support
 has had this capability for some time.
 
 As always I am grateful to NFSD contributors, reviewers, and
 testers.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEKLLlsBKG3yQ88j7+M2qzM29mf5cFAmXwV4QACgkQM2qzM29m
 f5c7cg/8CRe0mGbeEMonoSycBjANDuiRolCM+DhVccUvSyWPqf4blF5yrNHcf5zN
 WmjQHVXIJUMVpLovcakj+4aBIuXGgdSmBJamFTy9fVfcFadiWYRceNgMMXpLMDDI
 fMAszRUyfL/r0Evj0Zajt86R5/gGn+W9X6HlDc1k7VV0Z+fzRw9WMxADy11cgHLp
 mh2bzyPmwu0EfBYlWNWLqzWVZm1C5UCGnlInyr0KXImCLOkpJqAVXTDvDkGFW2Qw
 1kJhodyabf6fRV2ZqPjLUuR4aRqABey83rB0N5z7MumO/dJUBW3CHR3uNMqvkmh3
 XevI8bPzS2Kypijcx7dONtkDWwU+fsvCdepNpmVDB73B19BFiLG+HDbMypJ0dmp+
 rvvfILRDCmIb+FA1DUeT3lIc6ac1f1+qAVc7hi3E7rGctEJWeHDsZg+E1PuTvpxM
 3XfRaFnucY5vwyiB2/uI4eblBHcVXoKho+pUqQMegLPRbgsEUyFUfg3+ZMtntagd
 OVUXvWYIARP97HNh0J5ChcGI72UpXtFWMlbbiTiCzYx4FeiCffeczIERXNJ4FYAg
 fKUaiBhdAN1PPFCRXJORZ5XlSIeZttUNSJUPfmuOpkscMdkpRUIhuEUYo9K8/1eL
 O+YZeGW/kTG+llxOERfEHJoekLf1TgGdU7oBmTIgQIK03hTUih8=
 =75G4
 -----END PGP SIGNATURE-----

Merge tag 'nfsd-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux

Pull nfsd updates from Chuck Lever:
 "The bulk of the patches for this release are optimizations, code
  clean-ups, and minor bug fixes.

  One new feature to mention is that NFSD administrators now have the
  ability to revoke NFSv4 open and lock state. NFSD's NFSv3 support has
  had this capability for some time.

  As always I am grateful to NFSD contributors, reviewers, and testers"

* tag 'nfsd-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (75 commits)
  NFSD: Clean up nfsd4_encode_replay()
  NFSD: send OP_CB_RECALL_ANY to clients when number of delegations reaches its limit
  NFSD: Document nfsd_setattr() fill-attributes behavior
  nfsd: Fix NFSv3 atomicity bugs in nfsd_setattr()
  nfsd: Fix a regression in nfsd_setattr()
  NFSD: OP_CB_RECALL_ANY should recall both read and write delegations
  NFSD: handle GETATTR conflict with write delegation
  NFSD: add support for CB_GETATTR callback
  NFSD: Document the phases of CREATE_SESSION
  NFSD: Fix the NFSv4.1 CREATE_SESSION operation
  nfsd: clean up comments over nfs4_client definition
  svcrdma: Add Write chunk WRs to the RPC's Send WR chain
  svcrdma: Post WRs for Write chunks in svc_rdma_sendto()
  svcrdma: Post the Reply chunk and Send WR together
  svcrdma: Move write_info for Reply chunks into struct svc_rdma_send_ctxt
  svcrdma: Post Send WR chain
  svcrdma: Fix retry loop in svc_rdma_send()
  svcrdma: Prevent a UAF in svc_rdma_send()
  svcrdma: Fix SQ wake-ups
  svcrdma: Increase the per-transport rw_ctx count
  ...
This commit is contained in:
Linus Torvalds 2024-03-12 14:27:37 -07:00
commit a01c9fe323
44 changed files with 1765 additions and 755 deletions

View File

@ -8172,6 +8172,7 @@ F: include/uapi/scsi/fc/
FILE LOCKING (flock() and fcntl()/lockf()) FILE LOCKING (flock() and fcntl()/lockf())
M: Jeff Layton <jlayton@kernel.org> M: Jeff Layton <jlayton@kernel.org>
M: Chuck Lever <chuck.lever@oracle.com> M: Chuck Lever <chuck.lever@oracle.com>
R: Alexander Aring <alex.aring@gmail.com>
L: linux-fsdevel@vger.kernel.org L: linux-fsdevel@vger.kernel.org
S: Maintained S: Maintained
F: fs/fcntl.c F: fs/fcntl.c

View File

@ -710,8 +710,6 @@ static const struct svc_version *nlmsvc_version[] = {
#endif #endif
}; };
static struct svc_stat nlmsvc_stats;
#define NLM_NRVERS ARRAY_SIZE(nlmsvc_version) #define NLM_NRVERS ARRAY_SIZE(nlmsvc_version)
static struct svc_program nlmsvc_program = { static struct svc_program nlmsvc_program = {
.pg_prog = NLM_PROGRAM, /* program number */ .pg_prog = NLM_PROGRAM, /* program number */
@ -719,7 +717,6 @@ static struct svc_program nlmsvc_program = {
.pg_vers = nlmsvc_version, /* version table */ .pg_vers = nlmsvc_version, /* version table */
.pg_name = "lockd", /* service name */ .pg_name = "lockd", /* service name */
.pg_class = "nfsd", /* share authentication with nfsd */ .pg_class = "nfsd", /* share authentication with nfsd */
.pg_stats = &nlmsvc_stats, /* stats table */
.pg_authenticate = &lockd_authenticate, /* export authentication */ .pg_authenticate = &lockd_authenticate, /* export authentication */
.pg_init_request = svc_generic_init_request, .pg_init_request = svc_generic_init_request,
.pg_rpcbind_set = svc_generic_rpcbind_set, .pg_rpcbind_set = svc_generic_rpcbind_set,

View File

@ -356,15 +356,12 @@ static const struct svc_version *nfs4_callback_version[] = {
[4] = &nfs4_callback_version4, [4] = &nfs4_callback_version4,
}; };
static struct svc_stat nfs4_callback_stats;
static struct svc_program nfs4_callback_program = { static struct svc_program nfs4_callback_program = {
.pg_prog = NFS4_CALLBACK, /* RPC service number */ .pg_prog = NFS4_CALLBACK, /* RPC service number */
.pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */ .pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */
.pg_vers = nfs4_callback_version, /* version table */ .pg_vers = nfs4_callback_version, /* version table */
.pg_name = "NFSv4 callback", /* service name */ .pg_name = "NFSv4 callback", /* service name */
.pg_class = "nfs", /* authentication class */ .pg_class = "nfs", /* authentication class */
.pg_stats = &nfs4_callback_stats,
.pg_authenticate = nfs_callback_authenticate, .pg_authenticate = nfs_callback_authenticate,
.pg_init_request = svc_generic_init_request, .pg_init_request = svc_generic_init_request,
.pg_rpcbind_set = svc_generic_rpcbind_set, .pg_rpcbind_set = svc_generic_rpcbind_set,

View File

@ -328,10 +328,10 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
} }
static void static void
nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls) nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
{ {
struct nfs4_client *clp = ls->ls_stid.sc_client; struct nfs4_client *clp = ls->ls_stid.sc_client;
struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev; struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY, bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
nfsd4_scsi_pr_key(clp), 0, true); nfsd4_scsi_pr_key(clp), 0, true);

View File

@ -80,8 +80,6 @@ enum {
int nfsd_drc_slab_create(void); int nfsd_drc_slab_create(void);
void nfsd_drc_slab_free(void); void nfsd_drc_slab_free(void);
int nfsd_net_reply_cache_init(struct nfsd_net *nn);
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *); int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,

View File

@ -61,13 +61,10 @@ static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
struct nfsd_fcache_disposal { struct nfsd_fcache_disposal {
struct work_struct work;
spinlock_t lock; spinlock_t lock;
struct list_head freeme; struct list_head freeme;
}; };
static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_slab;
static struct kmem_cache *nfsd_file_mark_slab; static struct kmem_cache *nfsd_file_mark_slab;
static struct list_lru nfsd_file_lru; static struct list_lru nfsd_file_lru;
@ -283,7 +280,7 @@ nfsd_file_free(struct nfsd_file *nf)
nfsd_file_mark_put(nf->nf_mark); nfsd_file_mark_put(nf->nf_mark);
if (nf->nf_file) { if (nf->nf_file) {
nfsd_file_check_write_error(nf); nfsd_file_check_write_error(nf);
filp_close(nf->nf_file, NULL); nfsd_filp_close(nf->nf_file);
} }
/* /*
@ -421,7 +418,37 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
spin_lock(&l->lock); spin_lock(&l->lock);
list_move_tail(&nf->nf_lru, &l->freeme); list_move_tail(&nf->nf_lru, &l->freeme);
spin_unlock(&l->lock); spin_unlock(&l->lock);
queue_work(nfsd_filecache_wq, &l->work); svc_wake_up(nn->nfsd_serv);
}
}
/**
* nfsd_file_net_dispose - deal with nfsd_files waiting to be disposed.
* @nn: nfsd_net in which to find files to be disposed.
*
* When files held open for nfsv3 are removed from the filecache, whether
* due to memory pressure or garbage collection, they are queued to
* a per-net-ns queue. This function completes the disposal, either
* directly or by waking another nfsd thread to help with the work.
*/
void nfsd_file_net_dispose(struct nfsd_net *nn)
{
struct nfsd_fcache_disposal *l = nn->fcache_disposal;
if (!list_empty(&l->freeme)) {
LIST_HEAD(dispose);
int i;
spin_lock(&l->lock);
for (i = 0; i < 8 && !list_empty(&l->freeme); i++)
list_move(l->freeme.next, &dispose);
spin_unlock(&l->lock);
if (!list_empty(&l->freeme))
/* Wake up another thread to share the work
* *before* doing any actual disposing.
*/
svc_wake_up(nn->nfsd_serv);
nfsd_file_dispose_list(&dispose);
} }
} }
@ -631,28 +658,6 @@ nfsd_file_close_inode_sync(struct inode *inode)
list_del_init(&nf->nf_lru); list_del_init(&nf->nf_lru);
nfsd_file_free(nf); nfsd_file_free(nf);
} }
flush_delayed_fput();
}
/**
* nfsd_file_delayed_close - close unused nfsd_files
* @work: dummy
*
* Scrape the freeme list for this nfsd_net, and then dispose of them
* all.
*/
static void
nfsd_file_delayed_close(struct work_struct *work)
{
LIST_HEAD(head);
struct nfsd_fcache_disposal *l = container_of(work,
struct nfsd_fcache_disposal, work);
spin_lock(&l->lock);
list_splice_init(&l->freeme, &head);
spin_unlock(&l->lock);
nfsd_file_dispose_list(&head);
} }
static int static int
@ -717,25 +722,18 @@ nfsd_file_cache_init(void)
return ret; return ret;
ret = -ENOMEM; ret = -ENOMEM;
nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", WQ_UNBOUND, 0); nfsd_file_slab = KMEM_CACHE(nfsd_file, 0);
if (!nfsd_filecache_wq)
goto out;
nfsd_file_slab = kmem_cache_create("nfsd_file",
sizeof(struct nfsd_file), 0, 0, NULL);
if (!nfsd_file_slab) { if (!nfsd_file_slab) {
pr_err("nfsd: unable to create nfsd_file_slab\n"); pr_err("nfsd: unable to create nfsd_file_slab\n");
goto out_err; goto out_err;
} }
nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", nfsd_file_mark_slab = KMEM_CACHE(nfsd_file_mark, 0);
sizeof(struct nfsd_file_mark), 0, 0, NULL);
if (!nfsd_file_mark_slab) { if (!nfsd_file_mark_slab) {
pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
goto out_err; goto out_err;
} }
ret = list_lru_init(&nfsd_file_lru); ret = list_lru_init(&nfsd_file_lru);
if (ret) { if (ret) {
pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
@ -785,8 +783,6 @@ out_err:
nfsd_file_slab = NULL; nfsd_file_slab = NULL;
kmem_cache_destroy(nfsd_file_mark_slab); kmem_cache_destroy(nfsd_file_mark_slab);
nfsd_file_mark_slab = NULL; nfsd_file_mark_slab = NULL;
destroy_workqueue(nfsd_filecache_wq);
nfsd_filecache_wq = NULL;
rhltable_destroy(&nfsd_file_rhltable); rhltable_destroy(&nfsd_file_rhltable);
goto out; goto out;
} }
@ -832,7 +828,6 @@ nfsd_alloc_fcache_disposal(void)
l = kmalloc(sizeof(*l), GFP_KERNEL); l = kmalloc(sizeof(*l), GFP_KERNEL);
if (!l) if (!l)
return NULL; return NULL;
INIT_WORK(&l->work, nfsd_file_delayed_close);
spin_lock_init(&l->lock); spin_lock_init(&l->lock);
INIT_LIST_HEAD(&l->freeme); INIT_LIST_HEAD(&l->freeme);
return l; return l;
@ -841,7 +836,6 @@ nfsd_alloc_fcache_disposal(void)
static void static void
nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
{ {
cancel_work_sync(&l->work);
nfsd_file_dispose_list(&l->freeme); nfsd_file_dispose_list(&l->freeme);
kfree(l); kfree(l);
} }
@ -910,8 +904,6 @@ nfsd_file_cache_shutdown(void)
fsnotify_wait_marks_destroyed(); fsnotify_wait_marks_destroyed();
kmem_cache_destroy(nfsd_file_mark_slab); kmem_cache_destroy(nfsd_file_mark_slab);
nfsd_file_mark_slab = NULL; nfsd_file_mark_slab = NULL;
destroy_workqueue(nfsd_filecache_wq);
nfsd_filecache_wq = NULL;
rhltable_destroy(&nfsd_file_rhltable); rhltable_destroy(&nfsd_file_rhltable);
for_each_possible_cpu(i) { for_each_possible_cpu(i) {

View File

@ -56,6 +56,7 @@ void nfsd_file_cache_shutdown_net(struct net *net);
void nfsd_file_put(struct nfsd_file *nf); void nfsd_file_put(struct nfsd_file *nf);
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
void nfsd_file_close_inode_sync(struct inode *inode); void nfsd_file_close_inode_sync(struct inode *inode);
void nfsd_file_net_dispose(struct nfsd_net *nn);
bool nfsd_file_is_cached(struct inode *inode); bool nfsd_file_is_cached(struct inode *inode);
__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp); unsigned int may_flags, struct nfsd_file **nfp);

View File

@ -11,8 +11,10 @@
#include <net/net_namespace.h> #include <net/net_namespace.h>
#include <net/netns/generic.h> #include <net/netns/generic.h>
#include <linux/filelock.h> #include <linux/filelock.h>
#include <linux/nfs4.h>
#include <linux/percpu_counter.h> #include <linux/percpu_counter.h>
#include <linux/siphash.h> #include <linux/siphash.h>
#include <linux/sunrpc/stats.h>
/* Hash tables for nfs4_clientid state */ /* Hash tables for nfs4_clientid state */
#define CLIENT_HASH_BITS 4 #define CLIENT_HASH_BITS 4
@ -26,10 +28,22 @@ struct nfsd4_client_tracking_ops;
enum { enum {
/* cache misses due only to checksum comparison failures */ /* cache misses due only to checksum comparison failures */
NFSD_NET_PAYLOAD_MISSES, NFSD_STATS_PAYLOAD_MISSES,
/* amount of memory (in bytes) currently consumed by the DRC */ /* amount of memory (in bytes) currently consumed by the DRC */
NFSD_NET_DRC_MEM_USAGE, NFSD_STATS_DRC_MEM_USAGE,
NFSD_NET_COUNTERS_NUM NFSD_STATS_RC_HITS, /* repcache hits */
NFSD_STATS_RC_MISSES, /* repcache misses */
NFSD_STATS_RC_NOCACHE, /* uncached reqs */
NFSD_STATS_FH_STALE, /* FH stale error */
NFSD_STATS_IO_READ, /* bytes returned to read requests */
NFSD_STATS_IO_WRITE, /* bytes passed in write requests */
#ifdef CONFIG_NFSD_V4
NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
#endif
NFSD_STATS_COUNTERS_NUM
}; };
/* /*
@ -164,7 +178,10 @@ struct nfsd_net {
atomic_t num_drc_entries; atomic_t num_drc_entries;
/* Per-netns stats counters */ /* Per-netns stats counters */
struct percpu_counter counter[NFSD_NET_COUNTERS_NUM]; struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
/* sunrpc svc stats */
struct svc_stat nfsd_svcstats;
/* longest hash chain seen */ /* longest hash chain seen */
unsigned int longest_chain; unsigned int longest_chain;
@ -192,6 +209,10 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients; atomic_t nfsd_courtesy_clients;
struct shrinker *nfsd_client_shrinker; struct shrinker *nfsd_client_shrinker;
struct work_struct nfsd_shrinker_work; struct work_struct nfsd_shrinker_work;
/* last time an admin-revoke happened for NFSv4.0 */
time64_t nfs40_last_revoke;
}; };
/* Simple check to find out if a given net was properly initialized */ /* Simple check to find out if a given net was properly initialized */

View File

@ -71,13 +71,15 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp)
struct nfsd_attrs attrs = { struct nfsd_attrs attrs = {
.na_iattr = &argp->attrs, .na_iattr = &argp->attrs,
}; };
const struct timespec64 *guardtime = NULL;
dprintk("nfsd: SETATTR(3) %s\n", dprintk("nfsd: SETATTR(3) %s\n",
SVCFH_fmt(&argp->fh)); SVCFH_fmt(&argp->fh));
fh_copy(&resp->fh, &argp->fh); fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, if (argp->check_guard)
argp->check_guard, argp->guardtime); guardtime = &argp->guardtime;
resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, guardtime);
return rpc_success; return rpc_success;
} }

View File

@ -295,17 +295,14 @@ svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr,
static bool static bool
svcxdr_decode_sattrguard3(struct xdr_stream *xdr, struct nfsd3_sattrargs *args) svcxdr_decode_sattrguard3(struct xdr_stream *xdr, struct nfsd3_sattrargs *args)
{ {
__be32 *p;
u32 check; u32 check;
if (xdr_stream_decode_bool(xdr, &check) < 0) if (xdr_stream_decode_bool(xdr, &check) < 0)
return false; return false;
if (check) { if (check) {
p = xdr_inline_decode(xdr, XDR_UNIT * 2); if (!svcxdr_decode_nfstime3(xdr, &args->guardtime))
if (!p)
return false; return false;
args->check_guard = 1; args->check_guard = 1;
args->guardtime = be32_to_cpup(p);
} else } else
args->check_guard = 0; args->check_guard = 0;

View File

@ -45,7 +45,7 @@
#define NFSDDBG_FACILITY NFSDDBG_PROC #define NFSDDBG_FACILITY NFSDDBG_PROC
static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason); static void nfsd4_mark_cb_fault(struct nfs4_client *clp);
#define NFSPROC4_CB_NULL 0 #define NFSPROC4_CB_NULL 0
#define NFSPROC4_CB_COMPOUND 1 #define NFSPROC4_CB_COMPOUND 1
@ -85,7 +85,21 @@ static void encode_uint32(struct xdr_stream *xdr, u32 n)
static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap, static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap,
size_t len) size_t len)
{ {
WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0); xdr_stream_encode_uint32_array(xdr, bitmap, len);
}
static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap,
struct nfs4_cb_fattr *fattr)
{
fattr->ncf_cb_change = 0;
fattr->ncf_cb_fsize = 0;
if (bitmap[0] & FATTR4_WORD0_CHANGE)
if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0)
return -NFSERR_BAD_XDR;
if (bitmap[0] & FATTR4_WORD0_SIZE)
if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0)
return -NFSERR_BAD_XDR;
return 0;
} }
static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op) static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
@ -333,6 +347,30 @@ encode_cb_recallany4args(struct xdr_stream *xdr,
hdr->nops++; hdr->nops++;
} }
/*
* CB_GETATTR4args
* struct CB_GETATTR4args {
* nfs_fh4 fh;
* bitmap4 attr_request;
* };
*
* The size and change attributes are the only one
* guaranteed to be serviced by the client.
*/
static void
encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr,
struct nfs4_cb_fattr *fattr)
{
struct nfs4_delegation *dp =
container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle;
encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR);
encode_nfs_fh4(xdr, fh);
encode_bitmap4(xdr, fattr->ncf_cb_bmap, ARRAY_SIZE(fattr->ncf_cb_bmap));
hdr->nops++;
}
/* /*
* CB_SEQUENCE4args * CB_SEQUENCE4args
* *
@ -468,6 +506,26 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
xdr_reserve_space(xdr, 0); xdr_reserve_space(xdr, 0);
} }
/*
* 20.1. Operation 3: CB_GETATTR - Get Attributes
*/
static void nfs4_xdr_enc_cb_getattr(struct rpc_rqst *req,
struct xdr_stream *xdr, const void *data)
{
const struct nfsd4_callback *cb = data;
struct nfs4_cb_fattr *ncf =
container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
struct nfs4_cb_compound_hdr hdr = {
.ident = cb->cb_clp->cl_cb_ident,
.minorversion = cb->cb_clp->cl_minorversion,
};
encode_cb_compound4args(xdr, &hdr);
encode_cb_sequence4args(xdr, cb, &hdr);
encode_cb_getattr4args(xdr, &hdr, ncf);
encode_cb_nops(&hdr);
}
/* /*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation * 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/ */
@ -523,6 +581,42 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
return 0; return 0;
} }
/*
* 20.1. Operation 3: CB_GETATTR - Get Attributes
*/
static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
void *data)
{
struct nfsd4_callback *cb = data;
struct nfs4_cb_compound_hdr hdr;
int status;
u32 bitmap[3] = {0};
u32 attrlen;
struct nfs4_cb_fattr *ncf =
container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
status = decode_cb_compound4res(xdr, &hdr);
if (unlikely(status))
return status;
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
if (status)
return status;
if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
return -NFSERR_BAD_XDR;
if (xdr_stream_decode_u32(xdr, &attrlen) < 0)
return -NFSERR_BAD_XDR;
if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize)))
return -NFSERR_BAD_XDR;
status = decode_cb_fattr4(xdr, bitmap, ncf);
return status;
}
/* /*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation * 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/ */
@ -831,6 +925,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any), PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr),
}; };
static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
@ -887,7 +982,16 @@ static struct workqueue_struct *callback_wq;
static bool nfsd4_queue_cb(struct nfsd4_callback *cb) static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
{ {
return queue_work(callback_wq, &cb->cb_work); trace_nfsd_cb_queue(cb->cb_clp, cb);
return queue_delayed_work(callback_wq, &cb->cb_work, 0);
}
static void nfsd4_queue_cb_delayed(struct nfsd4_callback *cb,
unsigned long msecs)
{
trace_nfsd_cb_queue(cb->cb_clp, cb);
queue_delayed_work(callback_wq, &cb->cb_work,
msecs_to_jiffies(msecs));
} }
static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
@ -999,18 +1103,18 @@ static void nfsd4_mark_cb_state(struct nfs4_client *clp, int newstate)
{ {
if (clp->cl_cb_state != newstate) { if (clp->cl_cb_state != newstate) {
clp->cl_cb_state = newstate; clp->cl_cb_state = newstate;
trace_nfsd_cb_state(clp); trace_nfsd_cb_new_state(clp);
} }
} }
static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) static void nfsd4_mark_cb_down(struct nfs4_client *clp)
{ {
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return; return;
nfsd4_mark_cb_state(clp, NFSD4_CB_DOWN); nfsd4_mark_cb_state(clp, NFSD4_CB_DOWN);
} }
static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) static void nfsd4_mark_cb_fault(struct nfs4_client *clp)
{ {
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return; return;
@ -1022,7 +1126,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
if (task->tk_status) if (task->tk_status)
nfsd4_mark_cb_down(clp, task->tk_status); nfsd4_mark_cb_down(clp);
else else
nfsd4_mark_cb_state(clp, NFSD4_CB_UP); nfsd4_mark_cb_state(clp, NFSD4_CB_UP);
} }
@ -1106,6 +1210,7 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
{ {
struct nfs4_client *clp = cb->cb_clp; struct nfs4_client *clp = cb->cb_clp;
trace_nfsd_cb_destroy(clp, cb);
nfsd41_cb_release_slot(cb); nfsd41_cb_release_slot(cb);
if (cb->cb_ops && cb->cb_ops->release) if (cb->cb_ops && cb->cb_ops->release)
cb->cb_ops->release(cb); cb->cb_ops->release(cb);
@ -1158,6 +1263,8 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
if (!cb->cb_holds_slot) if (!cb->cb_holds_slot)
goto need_restart; goto need_restart;
/* This is the operation status code for CB_SEQUENCE */
trace_nfsd_cb_seq_status(task, cb);
switch (cb->cb_seq_status) { switch (cb->cb_seq_status) {
case 0: case 0:
/* /*
@ -1171,13 +1278,23 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
break; break;
case -ESERVERFAULT: case -ESERVERFAULT:
++session->se_cb_seq_nr; ++session->se_cb_seq_nr;
fallthrough; nfsd4_mark_cb_fault(cb->cb_clp);
case 1:
case -NFS4ERR_BADSESSION:
nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
ret = false; ret = false;
break; break;
case 1:
/*
* cb_seq_status remains 1 if an RPC Reply was never
* received. NFSD can't know if the client processed
* the CB_SEQUENCE operation. Ask the client to send a
* DESTROY_SESSION to recover.
*/
fallthrough;
case -NFS4ERR_BADSESSION:
nfsd4_mark_cb_fault(cb->cb_clp);
ret = false;
goto need_restart;
case -NFS4ERR_DELAY: case -NFS4ERR_DELAY:
cb->cb_seq_status = 1;
if (!rpc_restart_call(task)) if (!rpc_restart_call(task))
goto out; goto out;
@ -1192,14 +1309,11 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
} }
break; break;
default: default:
nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status); nfsd4_mark_cb_fault(cb->cb_clp);
dprintk("%s: unprocessed error %d\n", __func__,
cb->cb_seq_status);
} }
nfsd41_cb_release_slot(cb); nfsd41_cb_release_slot(cb);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_session->se_cb_seq_nr); trace_nfsd_cb_free_slot(task, cb);
if (RPC_SIGNALLED(task)) if (RPC_SIGNALLED(task))
goto need_restart; goto need_restart;
@ -1211,6 +1325,7 @@ retry_nowait:
goto out; goto out;
need_restart: need_restart:
if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) { if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
trace_nfsd_cb_restart(clp, cb);
task->tk_status = 0; task->tk_status = 0;
cb->cb_need_restart = true; cb->cb_need_restart = true;
} }
@ -1240,7 +1355,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
case -EIO: case -EIO:
case -ETIMEDOUT: case -ETIMEDOUT:
case -EACCES: case -EACCES:
nfsd4_mark_cb_down(clp, task->tk_status); nfsd4_mark_cb_down(clp);
} }
break; break;
default: default:
@ -1295,12 +1410,13 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
nfsd41_cb_inflight_wait_complete(clp); nfsd41_cb_inflight_wait_complete(clp);
} }
/* requires cl_lock: */
static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
{ {
struct nfsd4_session *s; struct nfsd4_session *s;
struct nfsd4_conn *c; struct nfsd4_conn *c;
lockdep_assert_held(&clp->cl_lock);
list_for_each_entry(s, &clp->cl_sessions, se_perclnt) { list_for_each_entry(s, &clp->cl_sessions, se_perclnt) {
list_for_each_entry(c, &s->se_conns, cn_persession) { list_for_each_entry(c, &s->se_conns, cn_persession) {
if (c->cn_flags & NFS4_CDFC4_BACK) if (c->cn_flags & NFS4_CDFC4_BACK)
@ -1324,11 +1440,14 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
struct nfsd4_conn *c; struct nfsd4_conn *c;
int err; int err;
trace_nfsd_cb_bc_update(clp, cb);
/* /*
* This is either an update, or the client dying; in either case, * This is either an update, or the client dying; in either case,
* kill the old client: * kill the old client:
*/ */
if (clp->cl_cb_client) { if (clp->cl_cb_client) {
trace_nfsd_cb_bc_shutdown(clp, cb);
rpc_shutdown_client(clp->cl_cb_client); rpc_shutdown_client(clp->cl_cb_client);
clp->cl_cb_client = NULL; clp->cl_cb_client = NULL;
put_cred(clp->cl_cb_cred); put_cred(clp->cl_cb_cred);
@ -1340,13 +1459,15 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
} }
if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
return; return;
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
/* /*
* Only serialized callback code is allowed to clear these * Only serialized callback code is allowed to clear these
* flags; main nfsd code can only set them: * flags; main nfsd code can only set them:
*/ */
BUG_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)); WARN_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK));
clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn)); memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
c = __nfsd4_find_backchannel(clp); c = __nfsd4_find_backchannel(clp);
if (c) { if (c) {
@ -1358,7 +1479,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
err = setup_callback_client(clp, &conn, ses); err = setup_callback_client(clp, &conn, ses);
if (err) { if (err) {
nfsd4_mark_cb_down(clp, err); nfsd4_mark_cb_down(clp);
if (c) if (c)
svc_xprt_put(c->cn_xprt); svc_xprt_put(c->cn_xprt);
return; return;
@ -1369,25 +1490,28 @@ static void
nfsd4_run_cb_work(struct work_struct *work) nfsd4_run_cb_work(struct work_struct *work)
{ {
struct nfsd4_callback *cb = struct nfsd4_callback *cb =
container_of(work, struct nfsd4_callback, cb_work); container_of(work, struct nfsd4_callback, cb_work.work);
struct nfs4_client *clp = cb->cb_clp; struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt; struct rpc_clnt *clnt;
int flags; int flags;
if (cb->cb_need_restart) { trace_nfsd_cb_start(clp);
cb->cb_need_restart = false;
} else {
if (cb->cb_ops && cb->cb_ops->prepare)
cb->cb_ops->prepare(cb);
}
if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
nfsd4_process_cb_update(cb); nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client; clnt = clp->cl_cb_client;
if (!clnt) { if (!clnt) {
/* Callback channel broken, or client killed; give up: */ if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
nfsd41_destroy_cb(cb); nfsd41_destroy_cb(cb);
else {
/*
* XXX: Ideally, we could wait for the client to
* reconnect, but I haven't figured out how
* to do that yet.
*/
nfsd4_queue_cb_delayed(cb, 25);
}
return; return;
} }
@ -1400,6 +1524,12 @@ nfsd4_run_cb_work(struct work_struct *work)
return; return;
} }
if (cb->cb_need_restart) {
cb->cb_need_restart = false;
} else {
if (cb->cb_ops && cb->cb_ops->prepare)
cb->cb_ops->prepare(cb);
}
cb->cb_msg.rpc_cred = clp->cl_cb_cred; cb->cb_msg.rpc_cred = clp->cl_cb_cred;
flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN; flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN;
rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags, rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
@ -1414,8 +1544,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_msg.rpc_argp = cb; cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb; cb->cb_msg.rpc_resp = cb;
cb->cb_ops = ops; cb->cb_ops = ops;
INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); INIT_DELAYED_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_seq_status = 1;
cb->cb_status = 0; cb->cb_status = 0;
cb->cb_need_restart = false; cb->cb_need_restart = false;
cb->cb_holds_slot = false; cb->cb_holds_slot = false;

View File

@ -152,6 +152,23 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
#endif #endif
} }
void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
{
struct nfsd_file *fl;
spin_lock(&ls->ls_stid.sc_file->fi_lock);
fl = ls->ls_file;
ls->ls_file = NULL;
spin_unlock(&ls->ls_stid.sc_file->fi_lock);
if (fl) {
if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
kernel_setlease(fl->nf_file, F_UNLCK, NULL,
(void **)&ls);
nfsd_file_put(fl);
}
}
static void static void
nfsd4_free_layout_stateid(struct nfs4_stid *stid) nfsd4_free_layout_stateid(struct nfs4_stid *stid)
{ {
@ -169,9 +186,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
list_del_init(&ls->ls_perfile); list_del_init(&ls->ls_perfile);
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls) nfsd4_close_layout(ls);
kernel_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
nfsd_file_put(ls->ls_file);
if (ls->ls_recalled) if (ls->ls_recalled)
atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls); atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@ -235,7 +250,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops, nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops,
NFSPROC4_CLNT_CB_LAYOUT); NFSPROC4_CLNT_CB_LAYOUT);
if (parent->sc_type == NFS4_DELEG_STID) if (parent->sc_type == SC_TYPE_DELEG)
ls->ls_file = nfsd_file_get(fp->fi_deleg_file); ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
else else
ls->ls_file = find_any_file(fp); ls->ls_file = find_any_file(fp);
@ -249,7 +264,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
} }
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
stp->sc_type = NFS4_LAYOUT_STID; stp->sc_type = SC_TYPE_LAYOUT;
list_add(&ls->ls_perclnt, &clp->cl_lo_states); list_add(&ls->ls_perclnt, &clp->cl_lo_states);
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
@ -268,13 +283,13 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
{ {
struct nfs4_layout_stateid *ls; struct nfs4_layout_stateid *ls;
struct nfs4_stid *stid; struct nfs4_stid *stid;
unsigned char typemask = NFS4_LAYOUT_STID; unsigned short typemask = SC_TYPE_LAYOUT;
__be32 status; __be32 status;
if (create) if (create)
typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID); typemask |= (SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG);
status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid, status = nfsd4_lookup_stateid(cstate, stateid, typemask, 0, &stid,
net_generic(SVC_NET(rqstp), nfsd_net_id)); net_generic(SVC_NET(rqstp), nfsd_net_id));
if (status) if (status)
goto out; goto out;
@ -285,7 +300,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
goto out_put_stid; goto out_put_stid;
} }
if (stid->sc_type != NFS4_LAYOUT_STID) { if (stid->sc_type != SC_TYPE_LAYOUT) {
ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type); ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type);
nfs4_put_stid(stid); nfs4_put_stid(stid);
@ -517,7 +532,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
lrp->lrs_present = true; lrp->lrs_present = true;
} else { } else {
trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid); trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid);
nfs4_unhash_stid(&ls->ls_stid); ls->ls_stid.sc_status |= SC_STATUS_CLOSED;
lrp->lrs_present = false; lrp->lrs_present = false;
} }
spin_unlock(&ls->ls_lock); spin_unlock(&ls->ls_lock);
@ -604,7 +619,7 @@ nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
} }
static void static void
nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
{ {
struct nfs4_client *clp = ls->ls_stid.sc_client; struct nfs4_client *clp = ls->ls_stid.sc_client;
char addr_str[INET6_ADDRSTRLEN]; char addr_str[INET6_ADDRSTRLEN];
@ -626,7 +641,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
argv[0] = (char *)nfsd_recall_failed; argv[0] = (char *)nfsd_recall_failed;
argv[1] = addr_str; argv[1] = addr_str;
argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id; argv[2] = file->nf_file->f_path.mnt->mnt_sb->s_id;
argv[3] = NULL; argv[3] = NULL;
error = call_usermodehelper(nfsd_recall_failed, argv, envp, error = call_usermodehelper(nfsd_recall_failed, argv, envp,
@ -656,6 +671,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
struct nfsd_net *nn; struct nfsd_net *nn;
ktime_t now, cutoff; ktime_t now, cutoff;
const struct nfsd4_layout_ops *ops; const struct nfsd4_layout_ops *ops;
struct nfsd_file *fl;
trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task); trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
switch (task->tk_status) { switch (task->tk_status) {
@ -687,12 +703,17 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
* Unknown error or non-responding client, we'll need to fence. * Unknown error or non-responding client, we'll need to fence.
*/ */
trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid); trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid);
rcu_read_lock();
ops = nfsd4_layout_ops[ls->ls_layout_type]; fl = nfsd_file_get(ls->ls_file);
if (ops->fence_client) rcu_read_unlock();
ops->fence_client(ls); if (fl) {
else ops = nfsd4_layout_ops[ls->ls_layout_type];
nfsd4_cb_layout_fail(ls); if (ops->fence_client)
ops->fence_client(ls, fl);
else
nfsd4_cb_layout_fail(ls, fl);
nfsd_file_put(fl);
}
return 1; return 1;
case -NFS4ERR_NOMATCHING_LAYOUT: case -NFS4ERR_NOMATCHING_LAYOUT:
trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid); trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
@ -755,13 +776,11 @@ nfsd4_init_pnfs(void)
for (i = 0; i < DEVID_HASH_SIZE; i++) for (i = 0; i < DEVID_HASH_SIZE; i++)
INIT_LIST_HEAD(&nfsd_devid_hash[i]); INIT_LIST_HEAD(&nfsd_devid_hash[i]);
nfs4_layout_cache = kmem_cache_create("nfs4_layout", nfs4_layout_cache = KMEM_CACHE(nfs4_layout, 0);
sizeof(struct nfs4_layout), 0, 0, NULL);
if (!nfs4_layout_cache) if (!nfs4_layout_cache)
return -ENOMEM; return -ENOMEM;
nfs4_layout_stateid_cache = kmem_cache_create("nfs4_layout_stateid", nfs4_layout_stateid_cache = KMEM_CACHE(nfs4_layout_stateid, 0);
sizeof(struct nfs4_layout_stateid), 0, 0, NULL);
if (!nfs4_layout_stateid_cache) { if (!nfs4_layout_stateid_cache) {
kmem_cache_destroy(nfs4_layout_cache); kmem_cache_destroy(nfs4_layout_cache);
return -ENOMEM; return -ENOMEM;

View File

@ -1143,6 +1143,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}; };
struct inode *inode; struct inode *inode;
__be32 status = nfs_ok; __be32 status = nfs_ok;
bool save_no_wcc;
int err; int err;
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
@ -1168,8 +1169,10 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status) if (status)
goto out; goto out;
status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, save_no_wcc = cstate->current_fh.fh_no_wcc;
0, (time64_t)0); cstate->current_fh.fh_no_wcc = true;
status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, NULL);
cstate->current_fh.fh_no_wcc = save_no_wcc;
if (!status) if (!status)
status = nfserrno(attrs.na_labelerr); status = nfserrno(attrs.na_labelerr);
if (!status) if (!status)
@ -2490,10 +2493,10 @@ nfsd4_proc_null(struct svc_rqst *rqstp)
return rpc_success; return rpc_success;
} }
static inline void nfsd4_increment_op_stats(u32 opnum) static inline void nfsd4_increment_op_stats(struct nfsd_net *nn, u32 opnum)
{ {
if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP) if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP)
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_NFS4_OP(opnum)]); percpu_counter_inc(&nn->counter[NFSD_STATS_NFS4_OP(opnum)]);
} }
static const struct nfsd4_operation nfsd4_ops[]; static const struct nfsd4_operation nfsd4_ops[];
@ -2768,7 +2771,7 @@ encode_op:
status, nfsd4_op_name(op->opnum)); status, nfsd4_op_name(op->opnum));
nfsd4_cstate_clear_replay(cstate); nfsd4_cstate_clear_replay(cstate);
nfsd4_increment_op_stats(op->opnum); nfsd4_increment_op_stats(nn, op->opnum);
} }
fh_put(current_fh); fh_put(current_fh);

File diff suppressed because it is too large Load Diff

View File

@ -3507,6 +3507,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
unsigned long mask[2]; unsigned long mask[2];
} u; } u;
unsigned long bit; unsigned long bit;
bool file_modified = false;
u64 size = 0;
WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1); WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1);
WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval)); WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval));
@ -3533,7 +3535,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
} }
args.size = 0; args.size = 0;
if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry)); status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
&file_modified, &size);
if (status) if (status)
goto out; goto out;
} }
@ -3543,7 +3546,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
AT_STATX_SYNC_AS_STAT); AT_STATX_SYNC_AS_STAT);
if (err) if (err)
goto out_nfserr; goto out_nfserr;
args.size = args.stat.size; if (file_modified)
args.size = size;
else
args.size = args.stat.size;
if (!(args.stat.result_mask & STATX_BTIME)) if (!(args.stat.result_mask & STATX_BTIME))
/* underlying FS does not offer btime so we can't share it */ /* underlying FS does not offer btime so we can't share it */
@ -5386,16 +5392,11 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs,
/* /*
* If the cookie is larger than the maximum number we can fit * If the cookie is larger than the maximum number we can fit
* in either the buffer we just got back from vfs_listxattr, or, * in the buffer we just got back from vfs_listxattr, it's invalid.
* XDR-encoded, in the return buffer, it's invalid.
*/ */
if (cookie > (listxattrs->lsxa_len) / (XATTR_USER_PREFIX_LEN + 2)) if (cookie > (listxattrs->lsxa_len) / (XATTR_USER_PREFIX_LEN + 2))
return nfserr_badcookie; return nfserr_badcookie;
if (cookie > (listxattrs->lsxa_maxcount /
(XDR_QUADLEN(XATTR_USER_PREFIX_LEN + 2) + 4)))
return nfserr_badcookie;
*offsetp = (u32)cookie; *offsetp = (u32)cookie;
return 0; return 0;
} }
@ -5412,6 +5413,7 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
u64 cookie; u64 cookie;
char *sp; char *sp;
__be32 status, tmp; __be32 status, tmp;
__be64 wire_cookie;
__be32 *p; __be32 *p;
u32 nuser; u32 nuser;
@ -5427,7 +5429,7 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
*/ */
cookie_offset = xdr->buf->len; cookie_offset = xdr->buf->len;
count_offset = cookie_offset + 8; count_offset = cookie_offset + 8;
p = xdr_reserve_space(xdr, 12); p = xdr_reserve_space(xdr, XDR_UNIT * 3);
if (!p) { if (!p) {
status = nfserr_resource; status = nfserr_resource;
goto out; goto out;
@ -5438,7 +5440,8 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
sp = listxattrs->lsxa_buf; sp = listxattrs->lsxa_buf;
nuser = 0; nuser = 0;
xdrleft = listxattrs->lsxa_maxcount; /* Bytes left is maxcount - 8 (cookie) - 4 (array count) */
xdrleft = listxattrs->lsxa_maxcount - XDR_UNIT * 3;
while (left > 0 && xdrleft > 0) { while (left > 0 && xdrleft > 0) {
slen = strlen(sp); slen = strlen(sp);
@ -5451,7 +5454,8 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
slen -= XATTR_USER_PREFIX_LEN; slen -= XATTR_USER_PREFIX_LEN;
xdrlen = 4 + ((slen + 3) & ~3); xdrlen = 4 + ((slen + 3) & ~3);
if (xdrlen > xdrleft) { /* Check if both entry and eof can fit in the XDR buffer */
if (xdrlen + XDR_UNIT > xdrleft) {
if (count == 0) { if (count == 0) {
/* /*
* Can't even fit the first attribute name. * Can't even fit the first attribute name.
@ -5503,7 +5507,8 @@ wreof:
cookie = offset + count; cookie = offset + count;
write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &cookie, 8); wire_cookie = cpu_to_be64(cookie);
write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &wire_cookie, 8);
tmp = cpu_to_be32(count); tmp = cpu_to_be32(count);
write_bytes_to_xdr_buf(xdr->buf, count_offset, &tmp, 4); write_bytes_to_xdr_buf(xdr->buf, count_offset, &tmp, 4);
out: out:
@ -5727,27 +5732,24 @@ release:
rqstp->rq_next_page = xdr->page_ptr + 1; rqstp->rq_next_page = xdr->page_ptr + 1;
} }
/* /**
* Encode the reply stored in the stateowner reply cache * nfsd4_encode_replay - encode a result stored in the stateowner reply cache
* * @xdr: send buffer's XDR stream
* XDR note: do not encode rp->rp_buflen: the buffer contains the * @op: operation being replayed
* previously sent already encoded operation. *
* @op->replay->rp_buf contains the previously-sent already-encoded result.
*/ */
void void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
{ {
__be32 *p;
struct nfs4_replay *rp = op->replay; struct nfs4_replay *rp = op->replay;
p = xdr_reserve_space(xdr, 8 + rp->rp_buflen); trace_nfsd_stateowner_replay(op->opnum, rp);
if (!p) {
WARN_ON_ONCE(1);
return;
}
*p++ = cpu_to_be32(op->opnum);
*p++ = rp->rp_status; /* already xdr'ed */
p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen); if (xdr_stream_encode_u32(xdr, op->opnum) != XDR_UNIT)
return;
if (xdr_stream_encode_be32(xdr, rp->rp_status) != XDR_UNIT)
return;
xdr_stream_encode_opaque_fixed(xdr, rp->rp_buf, rp->rp_buflen);
} }
void nfsd4_release_compoundargs(struct svc_rqst *rqstp) void nfsd4_release_compoundargs(struct svc_rqst *rqstp)

View File

@ -166,8 +166,7 @@ nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
int nfsd_drc_slab_create(void) int nfsd_drc_slab_create(void)
{ {
drc_slab = kmem_cache_create("nfsd_drc", drc_slab = KMEM_CACHE(nfsd_cacherep, 0);
sizeof(struct nfsd_cacherep), 0, 0, NULL);
return drc_slab ? 0: -ENOMEM; return drc_slab ? 0: -ENOMEM;
} }
@ -176,27 +175,6 @@ void nfsd_drc_slab_free(void)
kmem_cache_destroy(drc_slab); kmem_cache_destroy(drc_slab);
} }
/**
* nfsd_net_reply_cache_init - per net namespace reply cache set-up
* @nn: nfsd_net being initialized
*
* Returns zero on succes; otherwise a negative errno is returned.
*/
int nfsd_net_reply_cache_init(struct nfsd_net *nn)
{
return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM);
}
/**
* nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down
* @nn: nfsd_net being freed
*
*/
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn)
{
nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM);
}
int nfsd_reply_cache_init(struct nfsd_net *nn) int nfsd_reply_cache_init(struct nfsd_net *nn)
{ {
unsigned int hashsize; unsigned int hashsize;
@ -501,7 +479,7 @@ out:
int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
unsigned int len, struct nfsd_cacherep **cacherep) unsigned int len, struct nfsd_cacherep **cacherep)
{ {
struct nfsd_net *nn; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct nfsd_cacherep *rp, *found; struct nfsd_cacherep *rp, *found;
__wsum csum; __wsum csum;
struct nfsd_drc_bucket *b; struct nfsd_drc_bucket *b;
@ -510,7 +488,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
int rtn = RC_DOIT; int rtn = RC_DOIT;
if (type == RC_NOCACHE) { if (type == RC_NOCACHE) {
nfsd_stats_rc_nocache_inc(); nfsd_stats_rc_nocache_inc(nn);
goto out; goto out;
} }
@ -520,7 +498,6 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
* Since the common case is a cache miss followed by an insert, * Since the common case is a cache miss followed by an insert,
* preallocate an entry. * preallocate an entry.
*/ */
nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
rp = nfsd_cacherep_alloc(rqstp, csum, nn); rp = nfsd_cacherep_alloc(rqstp, csum, nn);
if (!rp) if (!rp)
goto out; goto out;
@ -537,7 +514,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
nfsd_cacherep_dispose(&dispose); nfsd_cacherep_dispose(&dispose);
nfsd_stats_rc_misses_inc(); nfsd_stats_rc_misses_inc(nn);
atomic_inc(&nn->num_drc_entries); atomic_inc(&nn->num_drc_entries);
nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp)); nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
goto out; goto out;
@ -545,7 +522,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
found_entry: found_entry:
/* We found a matching entry which is either in progress or done. */ /* We found a matching entry which is either in progress or done. */
nfsd_reply_cache_free_locked(NULL, rp, nn); nfsd_reply_cache_free_locked(NULL, rp, nn);
nfsd_stats_rc_hits_inc(); nfsd_stats_rc_hits_inc(nn);
rtn = RC_DROPIT; rtn = RC_DROPIT;
rp = found; rp = found;
@ -687,15 +664,15 @@ int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
atomic_read(&nn->num_drc_entries)); atomic_read(&nn->num_drc_entries));
seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits); seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits);
seq_printf(m, "mem usage: %lld\n", seq_printf(m, "mem usage: %lld\n",
percpu_counter_sum_positive(&nn->counter[NFSD_NET_DRC_MEM_USAGE])); percpu_counter_sum_positive(&nn->counter[NFSD_STATS_DRC_MEM_USAGE]));
seq_printf(m, "cache hits: %lld\n", seq_printf(m, "cache hits: %lld\n",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS])); percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]));
seq_printf(m, "cache misses: %lld\n", seq_printf(m, "cache misses: %lld\n",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES])); percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]));
seq_printf(m, "not cached: %lld\n", seq_printf(m, "not cached: %lld\n",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE])); percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]));
seq_printf(m, "payload misses: %lld\n", seq_printf(m, "payload misses: %lld\n",
percpu_counter_sum_positive(&nn->counter[NFSD_NET_PAYLOAD_MISSES])); percpu_counter_sum_positive(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]));
seq_printf(m, "longest chain len: %u\n", nn->longest_chain); seq_printf(m, "longest chain len: %u\n", nn->longest_chain);
seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
return 0; return 0;

View File

@ -281,6 +281,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
* 3. Is that directory the root of an exported file system? * 3. Is that directory the root of an exported file system?
*/ */
error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb); error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
nfsd4_revoke_states(netns(file), path.dentry->d_sb);
path_put(&path); path_put(&path);
return error; return error;
@ -1671,14 +1672,17 @@ static __net_init int nfsd_net_init(struct net *net)
retval = nfsd_idmap_init(net); retval = nfsd_idmap_init(net);
if (retval) if (retval)
goto out_idmap_error; goto out_idmap_error;
retval = nfsd_net_reply_cache_init(nn); retval = nfsd_stat_counters_init(nn);
if (retval) if (retval)
goto out_repcache_error; goto out_repcache_error;
memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats));
nn->nfsd_svcstats.program = &nfsd_program;
nn->nfsd_versions = NULL; nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL; nn->nfsd4_minorversions = NULL;
nfsd4_init_leases_net(nn); nfsd4_init_leases_net(nn);
get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
seqlock_init(&nn->writeverf_lock); seqlock_init(&nn->writeverf_lock);
nfsd_proc_stat_init(net);
return 0; return 0;
@ -1699,7 +1703,8 @@ static __net_exit void nfsd_net_exit(struct net *net)
{ {
struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfsd_net *nn = net_generic(net, nfsd_net_id);
nfsd_net_reply_cache_destroy(nn); nfsd_proc_stat_shutdown(net);
nfsd_stat_counters_destroy(nn);
nfsd_idmap_shutdown(net); nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net); nfsd_export_shutdown(net);
nfsd_netns_free_versions(nn); nfsd_netns_free_versions(nn);
@ -1722,12 +1727,9 @@ static int __init init_nfsd(void)
retval = nfsd4_init_pnfs(); retval = nfsd4_init_pnfs();
if (retval) if (retval)
goto out_free_slabs; goto out_free_slabs;
retval = nfsd_stat_init(); /* Statistics */
if (retval)
goto out_free_pnfs;
retval = nfsd_drc_slab_create(); retval = nfsd_drc_slab_create();
if (retval) if (retval)
goto out_free_stat; goto out_free_pnfs;
nfsd_lockd_init(); /* lockd->nfsd callbacks */ nfsd_lockd_init(); /* lockd->nfsd callbacks */
retval = create_proc_exports_entry(); retval = create_proc_exports_entry();
if (retval) if (retval)
@ -1761,8 +1763,6 @@ out_free_exports:
out_free_lockd: out_free_lockd:
nfsd_lockd_shutdown(); nfsd_lockd_shutdown();
nfsd_drc_slab_free(); nfsd_drc_slab_free();
out_free_stat:
nfsd_stat_shutdown();
out_free_pnfs: out_free_pnfs:
nfsd4_exit_pnfs(); nfsd4_exit_pnfs();
out_free_slabs: out_free_slabs:
@ -1780,7 +1780,6 @@ static void __exit exit_nfsd(void)
nfsd_drc_slab_free(); nfsd_drc_slab_free();
remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL); remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown();
nfsd_lockd_shutdown(); nfsd_lockd_shutdown();
nfsd4_free_slabs(); nfsd4_free_slabs();
nfsd4_exit_pnfs(); nfsd4_exit_pnfs();

View File

@ -86,6 +86,7 @@ extern struct mutex nfsd_mutex;
extern spinlock_t nfsd_drc_lock; extern spinlock_t nfsd_drc_lock;
extern unsigned long nfsd_drc_max_mem; extern unsigned long nfsd_drc_max_mem;
extern unsigned long nfsd_drc_mem_used; extern unsigned long nfsd_drc_mem_used;
extern atomic_t nfsd_th_cnt; /* number of available threads */
extern const struct seq_operations nfs_exports_op; extern const struct seq_operations nfs_exports_op;
@ -274,6 +275,7 @@ void nfsd_lockd_shutdown(void);
#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE) #define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD) #define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
#define nfserr_badname cpu_to_be32(NFSERR_BADNAME) #define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN) #define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
#define nfserr_locked cpu_to_be32(NFSERR_LOCKED) #define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC) #define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
@ -365,6 +367,7 @@ void nfsd_lockd_shutdown(void);
#define NFSD_CLIENT_MAX_TRIM_PER_RUN 128 #define NFSD_CLIENT_MAX_TRIM_PER_RUN 128
#define NFS4_CLIENTS_PER_GB 1024 #define NFS4_CLIENTS_PER_GB 1024
#define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */ #define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */
#define NFSD_CB_GETATTR_TIMEOUT NFSD_DELEGRETURN_TIMEOUT
/* /*
* The following attributes are currently not supported by the NFSv4 server: * The following attributes are currently not supported by the NFSv4 server:

View File

@ -327,6 +327,7 @@ out:
__be32 __be32
fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
{ {
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_export *exp = NULL; struct svc_export *exp = NULL;
struct dentry *dentry; struct dentry *dentry;
__be32 error; __be32 error;
@ -395,7 +396,7 @@ skip_pseudoflavor_check:
out: out:
trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error); trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
if (error == nfserr_stale) if (error == nfserr_stale)
nfsd_stats_fh_stale_inc(exp); nfsd_stats_fh_stale_inc(nn, exp);
return error; return error;
} }

View File

@ -103,7 +103,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
} }
} }
resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0); resp->status = nfsd_setattr(rqstp, fhp, &attrs, NULL);
if (resp->status != nfs_ok) if (resp->status != nfs_ok)
goto out; goto out;
@ -390,8 +390,8 @@ nfsd_proc_create(struct svc_rqst *rqstp)
*/ */
attr->ia_valid &= ATTR_SIZE; attr->ia_valid &= ATTR_SIZE;
if (attr->ia_valid) if (attr->ia_valid)
resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0, resp->status = nfsd_setattr(rqstp, newfhp, &attrs,
(time64_t)0); NULL);
} }
out_unlock: out_unlock:

View File

@ -34,6 +34,7 @@
#define NFSDDBG_FACILITY NFSDDBG_SVC #define NFSDDBG_FACILITY NFSDDBG_SVC
atomic_t nfsd_th_cnt = ATOMIC_INIT(0);
extern struct svc_program nfsd_program; extern struct svc_program nfsd_program;
static int nfsd(void *vrqstp); static int nfsd(void *vrqstp);
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
@ -80,7 +81,6 @@ unsigned long nfsd_drc_max_mem;
unsigned long nfsd_drc_mem_used; unsigned long nfsd_drc_mem_used;
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
static struct svc_stat nfsd_acl_svcstats;
static const struct svc_version *nfsd_acl_version[] = { static const struct svc_version *nfsd_acl_version[] = {
# if defined(CONFIG_NFSD_V2_ACL) # if defined(CONFIG_NFSD_V2_ACL)
[2] = &nfsd_acl_version2, [2] = &nfsd_acl_version2,
@ -99,15 +99,11 @@ static struct svc_program nfsd_acl_program = {
.pg_vers = nfsd_acl_version, .pg_vers = nfsd_acl_version,
.pg_name = "nfsacl", .pg_name = "nfsacl",
.pg_class = "nfsd", .pg_class = "nfsd",
.pg_stats = &nfsd_acl_svcstats,
.pg_authenticate = &svc_set_client, .pg_authenticate = &svc_set_client,
.pg_init_request = nfsd_acl_init_request, .pg_init_request = nfsd_acl_init_request,
.pg_rpcbind_set = nfsd_acl_rpcbind_set, .pg_rpcbind_set = nfsd_acl_rpcbind_set,
}; };
static struct svc_stat nfsd_acl_svcstats = {
.program = &nfsd_acl_program,
};
#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
static const struct svc_version *nfsd_version[] = { static const struct svc_version *nfsd_version[] = {
@ -132,7 +128,6 @@ struct svc_program nfsd_program = {
.pg_vers = nfsd_version, /* version table */ .pg_vers = nfsd_version, /* version table */
.pg_name = "nfsd", /* program name */ .pg_name = "nfsd", /* program name */
.pg_class = "nfsd", /* authentication class */ .pg_class = "nfsd", /* authentication class */
.pg_stats = &nfsd_svcstats, /* version table */
.pg_authenticate = &svc_set_client, /* export authentication */ .pg_authenticate = &svc_set_client, /* export authentication */
.pg_init_request = nfsd_init_request, .pg_init_request = nfsd_init_request,
.pg_rpcbind_set = nfsd_rpcbind_set, .pg_rpcbind_set = nfsd_rpcbind_set,
@ -666,7 +661,8 @@ int nfsd_create_serv(struct net *net)
if (nfsd_max_blksize == 0) if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_max_blksize = nfsd_get_default_max_blksize();
nfsd_reset_versions(nn); nfsd_reset_versions(nn);
serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd); serv = svc_create_pooled(&nfsd_program, &nn->nfsd_svcstats,
nfsd_max_blksize, nfsd);
if (serv == NULL) if (serv == NULL)
return -ENOMEM; return -ENOMEM;
@ -929,7 +925,7 @@ nfsd(void *vrqstp)
current->fs->umask = 0; current->fs->umask = 0;
atomic_inc(&nfsdstats.th_cnt); atomic_inc(&nfsd_th_cnt);
set_freezable(); set_freezable();
@ -941,9 +937,11 @@ nfsd(void *vrqstp)
rqstp->rq_server->sv_maxconn = nn->max_connections; rqstp->rq_server->sv_maxconn = nn->max_connections;
svc_recv(rqstp); svc_recv(rqstp);
nfsd_file_net_dispose(nn);
} }
atomic_dec(&nfsdstats.th_cnt); atomic_dec(&nfsd_th_cnt);
out: out:
/* Release the thread */ /* Release the thread */

View File

@ -37,7 +37,8 @@ struct nfsd4_layout_ops {
__be32 (*proc_layoutcommit)(struct inode *inode, __be32 (*proc_layoutcommit)(struct inode *inode,
struct nfsd4_layoutcommit *lcp); struct nfsd4_layoutcommit *lcp);
void (*fence_client)(struct nfs4_layout_stateid *ls); void (*fence_client)(struct nfs4_layout_stateid *ls,
struct nfsd_file *file);
}; };
extern const struct nfsd4_layout_ops *nfsd4_layout_ops[]; extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
@ -72,11 +73,13 @@ void nfsd4_setup_layout_type(struct svc_export *exp);
void nfsd4_return_all_client_layouts(struct nfs4_client *); void nfsd4_return_all_client_layouts(struct nfs4_client *);
void nfsd4_return_all_file_layouts(struct nfs4_client *clp, void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
struct nfs4_file *fp); struct nfs4_file *fp);
void nfsd4_close_layout(struct nfs4_layout_stateid *ls);
int nfsd4_init_pnfs(void); int nfsd4_init_pnfs(void);
void nfsd4_exit_pnfs(void); void nfsd4_exit_pnfs(void);
#else #else
struct nfs4_client; struct nfs4_client;
struct nfs4_file; struct nfs4_file;
struct nfs4_layout_stateid;
static inline void nfsd4_setup_layout_type(struct svc_export *exp) static inline void nfsd4_setup_layout_type(struct svc_export *exp)
{ {
@ -89,6 +92,9 @@ static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
struct nfs4_file *fp) struct nfs4_file *fp)
{ {
} }
static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
{
}
static inline void nfsd4_exit_pnfs(void) static inline void nfsd4_exit_pnfs(void)
{ {
} }

View File

@ -68,7 +68,7 @@ struct nfsd4_callback {
struct nfs4_client *cb_clp; struct nfs4_client *cb_clp;
struct rpc_message cb_msg; struct rpc_message cb_msg;
const struct nfsd4_callback_ops *cb_ops; const struct nfsd4_callback_ops *cb_ops;
struct work_struct cb_work; struct delayed_work cb_work;
int cb_seq_status; int cb_seq_status;
int cb_status; int cb_status;
bool cb_need_restart; bool cb_need_restart;
@ -88,17 +88,34 @@ struct nfsd4_callback_ops {
*/ */
struct nfs4_stid { struct nfs4_stid {
refcount_t sc_count; refcount_t sc_count;
#define NFS4_OPEN_STID 1
#define NFS4_LOCK_STID 2 /* A new stateid is added to the cl_stateids idr early before it
#define NFS4_DELEG_STID 4 * is fully initialised. Its sc_type is then zero. After
/* For an open stateid kept around *only* to process close replays: */ * initialisation the sc_type it set under cl_lock, and then
#define NFS4_CLOSED_STID 8 * never changes.
*/
#define SC_TYPE_OPEN BIT(0)
#define SC_TYPE_LOCK BIT(1)
#define SC_TYPE_DELEG BIT(2)
#define SC_TYPE_LAYOUT BIT(3)
unsigned short sc_type;
/* state_lock protects sc_status for delegation stateids.
* ->cl_lock protects sc_status for open and lock stateids.
* ->st_mutex also protect sc_status for open stateids.
* ->ls_lock protects sc_status for layout stateids.
*/
/*
* For an open stateid kept around *only* to process close replays.
* For deleg stateid, kept in idr until last reference is dropped.
*/
#define SC_STATUS_CLOSED BIT(0)
/* For a deleg stateid kept around only to process free_stateid's: */ /* For a deleg stateid kept around only to process free_stateid's: */
#define NFS4_REVOKED_DELEG_STID 16 #define SC_STATUS_REVOKED BIT(1)
#define NFS4_CLOSED_DELEG_STID 32 #define SC_STATUS_ADMIN_REVOKED BIT(2)
#define NFS4_LAYOUT_STID 64 unsigned short sc_status;
struct list_head sc_cp_list; struct list_head sc_cp_list;
unsigned char sc_type;
stateid_t sc_stateid; stateid_t sc_stateid;
spinlock_t sc_lock; spinlock_t sc_lock;
struct nfs4_client *sc_client; struct nfs4_client *sc_client;
@ -117,6 +134,24 @@ struct nfs4_cpntf_state {
time64_t cpntf_time; /* last time stateid used */ time64_t cpntf_time; /* last time stateid used */
}; };
struct nfs4_cb_fattr {
struct nfsd4_callback ncf_getattr;
u32 ncf_cb_status;
u32 ncf_cb_bmap[1];
/* from CB_GETATTR reply */
u64 ncf_cb_change;
u64 ncf_cb_fsize;
unsigned long ncf_cb_flags;
bool ncf_file_modified;
u64 ncf_initial_cinfo;
u64 ncf_cur_fsize;
};
/* bits for ncf_cb_flags */
#define CB_GETATTR_BUSY 0
/* /*
* Represents a delegation stateid. The nfs4_client holds references to these * Represents a delegation stateid. The nfs4_client holds references to these
* and they are put when it is being destroyed or when the delegation is * and they are put when it is being destroyed or when the delegation is
@ -150,6 +185,9 @@ struct nfs4_delegation {
int dl_retries; int dl_retries;
struct nfsd4_callback dl_recall; struct nfsd4_callback dl_recall;
bool dl_recalled; bool dl_recalled;
/* for CB_GETATTR */
struct nfs4_cb_fattr dl_cb_fattr;
}; };
#define cb_to_delegation(cb) \ #define cb_to_delegation(cb) \
@ -317,8 +355,9 @@ enum {
* 0. If they are not renewed within a lease period, they become eligible for * 0. If they are not renewed within a lease period, they become eligible for
* destruction by the laundromat. * destruction by the laundromat.
* *
* These objects can also be destroyed prematurely by the fault injection code, * These objects can also be destroyed if the client sends certain forms of
* or if the client sends certain forms of SETCLIENTID or EXCHANGE_ID updates. * SETCLIENTID or EXCHANGE_ID operations.
*
* Care is taken *not* to do this however when the objects have an elevated * Care is taken *not* to do this however when the objects have an elevated
* refcount. * refcount.
* *
@ -326,7 +365,7 @@ enum {
* *
* o Each nfs4_clients is also hashed by name (the opaque quantity initially * o Each nfs4_clients is also hashed by name (the opaque quantity initially
* sent by the client to identify itself). * sent by the client to identify itself).
* *
* o cl_perclient list is used to ensure no dangling stateowner references * o cl_perclient list is used to ensure no dangling stateowner references
* when we expire the nfs4_client * when we expire the nfs4_client
*/ */
@ -351,6 +390,7 @@ struct nfs4_client {
clientid_t cl_clientid; /* generated by server */ clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */
u32 cl_minorversion; u32 cl_minorversion;
atomic_t cl_admin_revoked; /* count of admin-revoked states */
/* NFSv4.1 client implementation id: */ /* NFSv4.1 client implementation id: */
struct xdr_netobj cl_nii_domain; struct xdr_netobj cl_nii_domain;
struct xdr_netobj cl_nii_name; struct xdr_netobj cl_nii_name;
@ -640,6 +680,7 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_SEQUENCE, NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK, NFSPROC4_CLNT_CB_NOTIFY_LOCK,
NFSPROC4_CLNT_CB_RECALL_ANY, NFSPROC4_CLNT_CB_RECALL_ANY,
NFSPROC4_CLNT_CB_GETATTR,
}; };
/* Returns true iff a is later than b: */ /* Returns true iff a is later than b: */
@ -672,15 +713,15 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
stateid_t *stateid, int flags, struct nfsd_file **filp, stateid_t *stateid, int flags, struct nfsd_file **filp,
struct nfs4_stid **cstid); struct nfs4_stid **cstid);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask, stateid_t *stateid, unsigned short typemask,
struct nfs4_stid **s, struct nfsd_net *nn); unsigned short statusmask,
struct nfs4_stid **s, struct nfsd_net *nn);
struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
void (*sc_free)(struct nfs4_stid *)); void (*sc_free)(struct nfs4_stid *));
int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy); int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
void nfs4_free_copy_state(struct nfsd4_copy *copy); void nfs4_free_copy_state(struct nfsd4_copy *copy);
struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
struct nfs4_stid *p_stid); struct nfs4_stid *p_stid);
void nfs4_unhash_stid(struct nfs4_stid *s);
void nfs4_put_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s);
void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
@ -714,6 +755,14 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
} }
struct nfsd_file *find_any_file(struct nfs4_file *f); struct nfsd_file *find_any_file(struct nfs4_file *f);
#ifdef CONFIG_NFSD_V4
void nfsd4_revoke_states(struct net *net, struct super_block *sb);
#else
static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
{
}
#endif
/* grace period management */ /* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn); void nfsd4_end_grace(struct nfsd_net *nn);
@ -732,5 +781,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
} }
extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
struct inode *inode); struct inode *inode, bool *file_modified, u64 *size);
#endif /* NFSD4_STATE_H */ #endif /* NFSD4_STATE_H */

View File

@ -27,25 +27,22 @@
#include "nfsd.h" #include "nfsd.h"
struct nfsd_stats nfsdstats;
struct svc_stat nfsd_svcstats = {
.program = &nfsd_program,
};
static int nfsd_show(struct seq_file *seq, void *v) static int nfsd_show(struct seq_file *seq, void *v)
{ {
struct net *net = pde_data(file_inode(seq->file));
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int i; int i;
seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n", seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]), percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]),
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]), percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]),
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]), percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]),
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_FH_STALE]), percpu_counter_sum_positive(&nn->counter[NFSD_STATS_FH_STALE]),
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_READ]), percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_READ]),
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE])); percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_WRITE]));
/* thread usage: */ /* thread usage: */
seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt)); seq_printf(seq, "th %u 0", atomic_read(&nfsd_th_cnt));
/* deprecated thread usage histogram stats */ /* deprecated thread usage histogram stats */
for (i = 0; i < 10; i++) for (i = 0; i < 10; i++)
@ -55,7 +52,7 @@ static int nfsd_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n"); seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n");
/* show my rpc info */ /* show my rpc info */
svc_seq_show(seq, &nfsd_svcstats); svc_seq_show(seq, &nn->nfsd_svcstats);
#ifdef CONFIG_NFSD_V4 #ifdef CONFIG_NFSD_V4
/* Show count for individual nfsv4 operations */ /* Show count for individual nfsv4 operations */
@ -63,10 +60,10 @@ static int nfsd_show(struct seq_file *seq, void *v)
seq_printf(seq, "proc4ops %u", LAST_NFS4_OP + 1); seq_printf(seq, "proc4ops %u", LAST_NFS4_OP + 1);
for (i = 0; i <= LAST_NFS4_OP; i++) { for (i = 0; i <= LAST_NFS4_OP; i++) {
seq_printf(seq, " %lld", seq_printf(seq, " %lld",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)])); percpu_counter_sum_positive(&nn->counter[NFSD_STATS_NFS4_OP(i)]));
} }
seq_printf(seq, "\nwdeleg_getattr %lld", seq_printf(seq, "\nwdeleg_getattr %lld",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR])); percpu_counter_sum_positive(&nn->counter[NFSD_STATS_WDELEG_GETATTR]));
seq_putc(seq, '\n'); seq_putc(seq, '\n');
#endif #endif
@ -108,31 +105,24 @@ void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num)
percpu_counter_destroy(&counters[i]); percpu_counter_destroy(&counters[i]);
} }
static int nfsd_stat_counters_init(void) int nfsd_stat_counters_init(struct nfsd_net *nn)
{ {
return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); return nfsd_percpu_counters_init(nn->counter, NFSD_STATS_COUNTERS_NUM);
} }
static void nfsd_stat_counters_destroy(void) void nfsd_stat_counters_destroy(struct nfsd_net *nn)
{ {
nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); nfsd_percpu_counters_destroy(nn->counter, NFSD_STATS_COUNTERS_NUM);
} }
int nfsd_stat_init(void) void nfsd_proc_stat_init(struct net *net)
{ {
int err; struct nfsd_net *nn = net_generic(net, nfsd_net_id);
err = nfsd_stat_counters_init(); svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops);
if (err)
return err;
svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops);
return 0;
} }
void nfsd_stat_shutdown(void) void nfsd_proc_stat_shutdown(struct net *net)
{ {
nfsd_stat_counters_destroy(); svc_proc_unregister(net, "nfsd");
svc_proc_unregister(&init_net, "nfsd");
} }

View File

@ -10,94 +10,72 @@
#include <uapi/linux/nfsd/stats.h> #include <uapi/linux/nfsd/stats.h>
#include <linux/percpu_counter.h> #include <linux/percpu_counter.h>
enum {
NFSD_STATS_RC_HITS, /* repcache hits */
NFSD_STATS_RC_MISSES, /* repcache misses */
NFSD_STATS_RC_NOCACHE, /* uncached reqs */
NFSD_STATS_FH_STALE, /* FH stale error */
NFSD_STATS_IO_READ, /* bytes returned to read requests */
NFSD_STATS_IO_WRITE, /* bytes passed in write requests */
#ifdef CONFIG_NFSD_V4
NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
#endif
NFSD_STATS_COUNTERS_NUM
};
struct nfsd_stats {
struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
atomic_t th_cnt; /* number of available threads */
};
extern struct nfsd_stats nfsdstats;
extern struct svc_stat nfsd_svcstats;
int nfsd_percpu_counters_init(struct percpu_counter *counters, int num); int nfsd_percpu_counters_init(struct percpu_counter *counters, int num);
void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num); void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num);
void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num); void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num);
int nfsd_stat_init(void); int nfsd_stat_counters_init(struct nfsd_net *nn);
void nfsd_stat_shutdown(void); void nfsd_stat_counters_destroy(struct nfsd_net *nn);
void nfsd_proc_stat_init(struct net *net);
void nfsd_proc_stat_shutdown(struct net *net);
static inline void nfsd_stats_rc_hits_inc(void) static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn)
{ {
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_HITS]); percpu_counter_inc(&nn->counter[NFSD_STATS_RC_HITS]);
} }
static inline void nfsd_stats_rc_misses_inc(void) static inline void nfsd_stats_rc_misses_inc(struct nfsd_net *nn)
{ {
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_MISSES]); percpu_counter_inc(&nn->counter[NFSD_STATS_RC_MISSES]);
} }
static inline void nfsd_stats_rc_nocache_inc(void) static inline void nfsd_stats_rc_nocache_inc(struct nfsd_net *nn)
{ {
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]); percpu_counter_inc(&nn->counter[NFSD_STATS_RC_NOCACHE]);
} }
static inline void nfsd_stats_fh_stale_inc(struct svc_export *exp) static inline void nfsd_stats_fh_stale_inc(struct nfsd_net *nn,
struct svc_export *exp)
{ {
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_FH_STALE]); percpu_counter_inc(&nn->counter[NFSD_STATS_FH_STALE]);
if (exp && exp->ex_stats) if (exp && exp->ex_stats)
percpu_counter_inc(&exp->ex_stats->counter[EXP_STATS_FH_STALE]); percpu_counter_inc(&exp->ex_stats->counter[EXP_STATS_FH_STALE]);
} }
static inline void nfsd_stats_io_read_add(struct svc_export *exp, s64 amount) static inline void nfsd_stats_io_read_add(struct nfsd_net *nn,
struct svc_export *exp, s64 amount)
{ {
percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_READ], amount); percpu_counter_add(&nn->counter[NFSD_STATS_IO_READ], amount);
if (exp && exp->ex_stats) if (exp && exp->ex_stats)
percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_READ], amount); percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_READ], amount);
} }
static inline void nfsd_stats_io_write_add(struct svc_export *exp, s64 amount) static inline void nfsd_stats_io_write_add(struct nfsd_net *nn,
struct svc_export *exp, s64 amount)
{ {
percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_WRITE], amount); percpu_counter_add(&nn->counter[NFSD_STATS_IO_WRITE], amount);
if (exp && exp->ex_stats) if (exp && exp->ex_stats)
percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_WRITE], amount); percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_WRITE], amount);
} }
static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn) static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn)
{ {
percpu_counter_inc(&nn->counter[NFSD_NET_PAYLOAD_MISSES]); percpu_counter_inc(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]);
} }
static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount) static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount)
{ {
percpu_counter_add(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount); percpu_counter_add(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount);
} }
static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount) static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount)
{ {
percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount); percpu_counter_sub(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount);
} }
#ifdef CONFIG_NFSD_V4 #ifdef CONFIG_NFSD_V4
static inline void nfsd_stats_wdeleg_getattr_inc(void) static inline void nfsd_stats_wdeleg_getattr_inc(struct nfsd_net *nn)
{ {
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]); percpu_counter_inc(&nn->counter[NFSD_STATS_WDELEG_GETATTR]);
} }
#endif #endif
#endif /* _NFSD_STATS_H */ #endif /* _NFSD_STATS_H */

View File

@ -9,8 +9,10 @@
#define _NFSD_TRACE_H #define _NFSD_TRACE_H
#include <linux/tracepoint.h> #include <linux/tracepoint.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h> #include <linux/sunrpc/xprt.h>
#include <trace/misc/nfs.h> #include <trace/misc/nfs.h>
#include <trace/misc/sunrpc.h>
#include "export.h" #include "export.h"
#include "nfsfh.h" #include "nfsfh.h"
@ -641,23 +643,18 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
DEFINE_STATESEQID_EVENT(preprocess); DEFINE_STATESEQID_EVENT(preprocess);
DEFINE_STATESEQID_EVENT(open_confirm); DEFINE_STATESEQID_EVENT(open_confirm);
TRACE_DEFINE_ENUM(NFS4_OPEN_STID);
TRACE_DEFINE_ENUM(NFS4_LOCK_STID);
TRACE_DEFINE_ENUM(NFS4_DELEG_STID);
TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
#define show_stid_type(x) \ #define show_stid_type(x) \
__print_flags(x, "|", \ __print_flags(x, "|", \
{ NFS4_OPEN_STID, "OPEN" }, \ { SC_TYPE_OPEN, "OPEN" }, \
{ NFS4_LOCK_STID, "LOCK" }, \ { SC_TYPE_LOCK, "LOCK" }, \
{ NFS4_DELEG_STID, "DELEG" }, \ { SC_TYPE_DELEG, "DELEG" }, \
{ NFS4_CLOSED_STID, "CLOSED" }, \ { SC_TYPE_LAYOUT, "LAYOUT" })
{ NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
{ NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \ #define show_stid_status(x) \
{ NFS4_LAYOUT_STID, "LAYOUT" }) __print_flags(x, "|", \
{ SC_STATUS_CLOSED, "CLOSED" }, \
{ SC_STATUS_REVOKED, "REVOKED" }, \
{ SC_STATUS_ADMIN_REVOKED, "ADMIN_REVOKED" })
DECLARE_EVENT_CLASS(nfsd_stid_class, DECLARE_EVENT_CLASS(nfsd_stid_class,
TP_PROTO( TP_PROTO(
@ -666,6 +663,7 @@ DECLARE_EVENT_CLASS(nfsd_stid_class,
TP_ARGS(stid), TP_ARGS(stid),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned long, sc_type) __field(unsigned long, sc_type)
__field(unsigned long, sc_status)
__field(int, sc_count) __field(int, sc_count)
__field(u32, cl_boot) __field(u32, cl_boot)
__field(u32, cl_id) __field(u32, cl_id)
@ -676,16 +674,18 @@ DECLARE_EVENT_CLASS(nfsd_stid_class,
const stateid_t *stp = &stid->sc_stateid; const stateid_t *stp = &stid->sc_stateid;
__entry->sc_type = stid->sc_type; __entry->sc_type = stid->sc_type;
__entry->sc_status = stid->sc_status;
__entry->sc_count = refcount_read(&stid->sc_count); __entry->sc_count = refcount_read(&stid->sc_count);
__entry->cl_boot = stp->si_opaque.so_clid.cl_boot; __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
__entry->cl_id = stp->si_opaque.so_clid.cl_id; __entry->cl_id = stp->si_opaque.so_clid.cl_id;
__entry->si_id = stp->si_opaque.so_id; __entry->si_id = stp->si_opaque.so_id;
__entry->si_generation = stp->si_generation; __entry->si_generation = stp->si_generation;
), ),
TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s", TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s state=%s",
__entry->cl_boot, __entry->cl_id, __entry->cl_boot, __entry->cl_id,
__entry->si_id, __entry->si_generation, __entry->si_id, __entry->si_generation,
__entry->sc_count, show_stid_type(__entry->sc_type) __entry->sc_count, show_stid_type(__entry->sc_type),
show_stid_status(__entry->sc_status)
) )
); );
@ -696,6 +696,59 @@ DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \
DEFINE_STID_EVENT(revoke); DEFINE_STID_EVENT(revoke);
TRACE_EVENT(nfsd_stateowner_replay,
TP_PROTO(
u32 opnum,
const struct nfs4_replay *rp
),
TP_ARGS(opnum, rp),
TP_STRUCT__entry(
__field(unsigned long, status)
__field(u32, opnum)
),
TP_fast_assign(
__entry->status = be32_to_cpu(rp->rp_status);
__entry->opnum = opnum;
),
TP_printk("opnum=%u status=%lu",
__entry->opnum, __entry->status)
);
TRACE_EVENT_CONDITION(nfsd_seq4_status,
TP_PROTO(
const struct svc_rqst *rqstp,
const struct nfsd4_sequence *sequence
),
TP_ARGS(rqstp, sequence),
TP_CONDITION(sequence->status_flags),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(u32, xid)
__field(u32, cl_boot)
__field(u32, cl_id)
__field(u32, seqno)
__field(u32, reserved)
__field(unsigned long, status_flags)
),
TP_fast_assign(
const struct nfsd4_sessionid *sid =
(struct nfsd4_sessionid *)&sequence->sessionid;
__entry->netns_ino = SVC_NET(rqstp)->ns.inum;
__entry->xid = be32_to_cpu(rqstp->rq_xid);
__entry->cl_boot = sid->clientid.cl_boot;
__entry->cl_id = sid->clientid.cl_id;
__entry->seqno = sid->sequence;
__entry->reserved = sid->reserved;
__entry->status_flags = sequence->status_flags;
),
TP_printk("xid=0x%08x sessionid=%08x:%08x:%08x:%08x status_flags=%s",
__entry->xid, __entry->cl_boot, __entry->cl_id,
__entry->seqno, __entry->reserved,
show_nfs4_seq4_status(__entry->status_flags)
)
);
DECLARE_EVENT_CLASS(nfsd_clientid_class, DECLARE_EVENT_CLASS(nfsd_clientid_class,
TP_PROTO(const clientid_t *clid), TP_PROTO(const clientid_t *clid),
TP_ARGS(clid), TP_ARGS(clid),
@ -1334,7 +1387,8 @@ DEFINE_EVENT(nfsd_cb_class, nfsd_cb_##name, \
TP_PROTO(const struct nfs4_client *clp), \ TP_PROTO(const struct nfs4_client *clp), \
TP_ARGS(clp)) TP_ARGS(clp))
DEFINE_NFSD_CB_EVENT(state); DEFINE_NFSD_CB_EVENT(start);
DEFINE_NFSD_CB_EVENT(new_state);
DEFINE_NFSD_CB_EVENT(probe); DEFINE_NFSD_CB_EVENT(probe);
DEFINE_NFSD_CB_EVENT(lost); DEFINE_NFSD_CB_EVENT(lost);
DEFINE_NFSD_CB_EVENT(shutdown); DEFINE_NFSD_CB_EVENT(shutdown);
@ -1405,6 +1459,128 @@ TRACE_EVENT(nfsd_cb_setup_err,
__entry->error) __entry->error)
); );
DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class,
TP_PROTO(
const struct nfs4_client *clp,
const struct nfsd4_callback *cb
),
TP_ARGS(clp, cb),
TP_STRUCT__entry(
__field(u32, cl_boot)
__field(u32, cl_id)
__field(const void *, cb)
__field(bool, need_restart)
__sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
__entry->cb = cb;
__entry->need_restart = cb->cb_need_restart;
__assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
clp->cl_cb_conn.cb_addrlen)
),
TP_printk("addr=%pISpc client %08x:%08x cb=%p%s",
__get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
__entry->cb, __entry->need_restart ?
" (need restart)" : " (first try)"
)
);
#define DEFINE_NFSD_CB_LIFETIME_EVENT(name) \
DEFINE_EVENT(nfsd_cb_lifetime_class, nfsd_cb_##name, \
TP_PROTO( \
const struct nfs4_client *clp, \
const struct nfsd4_callback *cb \
), \
TP_ARGS(clp, cb))
DEFINE_NFSD_CB_LIFETIME_EVENT(queue);
DEFINE_NFSD_CB_LIFETIME_EVENT(destroy);
DEFINE_NFSD_CB_LIFETIME_EVENT(restart);
DEFINE_NFSD_CB_LIFETIME_EVENT(bc_update);
DEFINE_NFSD_CB_LIFETIME_EVENT(bc_shutdown);
TRACE_EVENT(nfsd_cb_seq_status,
TP_PROTO(
const struct rpc_task *task,
const struct nfsd4_callback *cb
),
TP_ARGS(task, cb),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, cl_boot)
__field(u32, cl_id)
__field(u32, seqno)
__field(u32, reserved)
__field(int, tk_status)
__field(int, seq_status)
),
TP_fast_assign(
const struct nfs4_client *clp = cb->cb_clp;
const struct nfsd4_session *session = clp->cl_cb_session;
const struct nfsd4_sessionid *sid =
(struct nfsd4_sessionid *)&session->se_sessionid;
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client ?
task->tk_client->cl_clid : -1;
__entry->cl_boot = sid->clientid.cl_boot;
__entry->cl_id = sid->clientid.cl_id;
__entry->seqno = sid->sequence;
__entry->reserved = sid->reserved;
__entry->tk_status = task->tk_status;
__entry->seq_status = cb->cb_seq_status;
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
" sessionid=%08x:%08x:%08x:%08x tk_status=%d seq_status=%d\n",
__entry->task_id, __entry->client_id,
__entry->cl_boot, __entry->cl_id,
__entry->seqno, __entry->reserved,
__entry->tk_status, __entry->seq_status
)
);
TRACE_EVENT(nfsd_cb_free_slot,
TP_PROTO(
const struct rpc_task *task,
const struct nfsd4_callback *cb
),
TP_ARGS(task, cb),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, cl_boot)
__field(u32, cl_id)
__field(u32, seqno)
__field(u32, reserved)
__field(u32, slot_seqno)
),
TP_fast_assign(
const struct nfs4_client *clp = cb->cb_clp;
const struct nfsd4_session *session = clp->cl_cb_session;
const struct nfsd4_sessionid *sid =
(struct nfsd4_sessionid *)&session->se_sessionid;
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client ?
task->tk_client->cl_clid : -1;
__entry->cl_boot = sid->clientid.cl_boot;
__entry->cl_id = sid->clientid.cl_id;
__entry->seqno = sid->sequence;
__entry->reserved = sid->reserved;
__entry->slot_seqno = session->se_cb_seq_nr;
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
" sessionid=%08x:%08x:%08x:%08x new slot seqno=%u\n",
__entry->task_id, __entry->client_id,
__entry->cl_boot, __entry->cl_id,
__entry->seqno, __entry->reserved,
__entry->slot_seqno
)
);
TRACE_EVENT_CONDITION(nfsd_cb_recall, TRACE_EVENT_CONDITION(nfsd_cb_recall,
TP_PROTO( TP_PROTO(
const struct nfs4_stid *stid const struct nfs4_stid *stid

View File

@ -476,7 +476,6 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
* @rqstp: controlling RPC transaction * @rqstp: controlling RPC transaction
* @fhp: filehandle of target * @fhp: filehandle of target
* @attr: attributes to set * @attr: attributes to set
* @check_guard: set to 1 if guardtime is a valid timestamp
* @guardtime: do not act if ctime.tv_sec does not match this timestamp * @guardtime: do not act if ctime.tv_sec does not match this timestamp
* *
* This call may adjust the contents of @attr (in particular, this * This call may adjust the contents of @attr (in particular, this
@ -488,8 +487,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
*/ */
__be32 __be32
nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct nfsd_attrs *attr, struct nfsd_attrs *attr, const struct timespec64 *guardtime)
int check_guard, time64_t guardtime)
{ {
struct dentry *dentry; struct dentry *dentry;
struct inode *inode; struct inode *inode;
@ -497,7 +495,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int accmode = NFSD_MAY_SATTR; int accmode = NFSD_MAY_SATTR;
umode_t ftype = 0; umode_t ftype = 0;
__be32 err; __be32 err;
int host_err; int host_err = 0;
bool get_write_count; bool get_write_count;
bool size_change = (iap->ia_valid & ATTR_SIZE); bool size_change = (iap->ia_valid & ATTR_SIZE);
int retries; int retries;
@ -538,9 +536,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_sanitize_attrs(inode, iap); nfsd_sanitize_attrs(inode, iap);
if (check_guard && guardtime != inode_get_ctime_sec(inode))
return nfserr_notsync;
/* /*
* The size case is special, it changes the file in addition to the * The size case is special, it changes the file in addition to the
* attributes, and file systems don't expect it to be mixed with * attributes, and file systems don't expect it to be mixed with
@ -555,6 +550,19 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
} }
inode_lock(inode); inode_lock(inode);
err = fh_fill_pre_attrs(fhp);
if (err)
goto out_unlock;
if (guardtime) {
struct timespec64 ctime = inode_get_ctime(inode);
if ((u32)guardtime->tv_sec != (u32)ctime.tv_sec ||
guardtime->tv_nsec != ctime.tv_nsec) {
err = nfserr_notsync;
goto out_fill_attrs;
}
}
for (retries = 1;;) { for (retries = 1;;) {
struct iattr attrs; struct iattr attrs;
@ -582,13 +590,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
attr->na_aclerr = set_posix_acl(&nop_mnt_idmap, attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
dentry, ACL_TYPE_DEFAULT, dentry, ACL_TYPE_DEFAULT,
attr->na_dpacl); attr->na_dpacl);
out_fill_attrs:
/*
* RFC 1813 Section 3.3.2 does not mandate that an NFS server
* returns wcc_data for SETATTR. Some client implementations
* depend on receiving wcc_data, however, to sort out partial
* updates (eg., the client requested that size and mode be
* modified, but the server changed only the file mode).
*/
fh_fill_post_attrs(fhp);
out_unlock:
inode_unlock(inode); inode_unlock(inode);
if (size_change) if (size_change)
put_write_access(inode); put_write_access(inode);
out: out:
if (!host_err) if (!host_err)
host_err = commit_metadata(fhp); host_err = commit_metadata(fhp);
return nfserrno(host_err); return err != 0 ? err : nfserrno(host_err);
} }
#if defined(CONFIG_NFSD_V4) #if defined(CONFIG_NFSD_V4)
@ -1002,7 +1020,9 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned long *count, u32 *eof, ssize_t host_err) unsigned long *count, u32 *eof, ssize_t host_err)
{ {
if (host_err >= 0) { if (host_err >= 0) {
nfsd_stats_io_read_add(fhp->fh_export, host_err); struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
nfsd_stats_io_read_add(nn, fhp->fh_export, host_err);
*eof = nfsd_eof_on_read(file, offset, host_err, *count); *eof = nfsd_eof_on_read(file, offset, host_err, *count);
*count = host_err; *count = host_err;
fsnotify_access(file); fsnotify_access(file);
@ -1185,7 +1205,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
goto out_nfserr; goto out_nfserr;
} }
*cnt = host_err; *cnt = host_err;
nfsd_stats_io_write_add(exp, *cnt); nfsd_stats_io_write_add(nn, exp, *cnt);
fsnotify_modify(file); fsnotify_modify(file);
host_err = filemap_check_wb_err(file->f_mapping, since); host_err = filemap_check_wb_err(file->f_mapping, since);
if (host_err < 0) if (host_err < 0)
@ -1404,7 +1424,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
* if the attributes have not changed. * if the attributes have not changed.
*/ */
if (iap->ia_valid) if (iap->ia_valid)
status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0); status = nfsd_setattr(rqstp, resfhp, attrs, NULL);
else else
status = nfserrno(commit_metadata(resfhp)); status = nfserrno(commit_metadata(resfhp));
@ -1906,10 +1926,10 @@ out_unlock:
fh_drop_write(ffhp); fh_drop_write(ffhp);
/* /*
* If the target dentry has cached open files, then we need to try to * If the target dentry has cached open files, then we need to
* close them prior to doing the rename. Flushing delayed fput * try to close them prior to doing the rename. Final fput
* shouldn't be done with locks held however, so we delay it until this * shouldn't be done with locks held however, so we delay it
* point and then reattempt the whole shebang. * until this point and then reattempt the whole shebang.
*/ */
if (close_cached) { if (close_cached) {
close_cached = false; close_cached = false;
@ -2177,11 +2197,43 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
if (err == nfserr_eof || err == nfserr_toosmall) if (err == nfserr_eof || err == nfserr_toosmall)
err = nfs_ok; /* can still be found in ->err */ err = nfs_ok; /* can still be found in ->err */
out_close: out_close:
fput(file); nfsd_filp_close(file);
out: out:
return err; return err;
} }
/**
* nfsd_filp_close: close a file synchronously
* @fp: the file to close
*
* nfsd_filp_close() is similar in behaviour to filp_close().
* The difference is that if this is the final close on the
* file, the that finalisation happens immediately, rather then
* being handed over to a work_queue, as it the case for
* filp_close().
* When a user-space process closes a file (even when using
* filp_close() the finalisation happens before returning to
* userspace, so it is effectively synchronous. When a kernel thread
* uses file_close(), on the other hand, the handling is completely
* asynchronous. This means that any cost imposed by that finalisation
* is not imposed on the nfsd thread, and nfsd could potentually
* close files more quickly than the work queue finalises the close,
* which would lead to unbounded growth in the queue.
*
* In some contexts is it not safe to synchronously wait for
* close finalisation (see comment for __fput_sync()), but nfsd
* does not match those contexts. In partcilarly it does not, at the
* time that this function is called, hold and locks and no finalisation
* of any file, socket, or device driver would have any cause to wait
* for nfsd to make progress.
*/
void nfsd_filp_close(struct file *fp)
{
get_file(fp);
filp_close(fp, NULL);
__fput_sync(fp);
}
/* /*
* Get file system stats * Get file system stats
* N.B. After this call fhp needs an fh_put * N.B. After this call fhp needs an fh_put

View File

@ -69,7 +69,7 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
const char *, unsigned int, const char *, unsigned int,
struct svc_export **, struct dentry **); struct svc_export **, struct dentry **);
__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
struct nfsd_attrs *, int, time64_t); struct nfsd_attrs *, const struct timespec64 *);
int nfsd_mountpoint(struct dentry *, struct svc_export *); int nfsd_mountpoint(struct dentry *, struct svc_export *);
#ifdef CONFIG_NFSD_V4 #ifdef CONFIG_NFSD_V4
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
@ -148,6 +148,8 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *, __be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int); struct dentry *, int);
void nfsd_filp_close(struct file *fp);
static inline int fh_want_write(struct svc_fh *fh) static inline int fh_want_write(struct svc_fh *fh)
{ {
int ret; int ret;

View File

@ -14,7 +14,7 @@ struct nfsd3_sattrargs {
struct svc_fh fh; struct svc_fh fh;
struct iattr attrs; struct iattr attrs;
int check_guard; int check_guard;
time64_t guardtime; struct timespec64 guardtime;
}; };
struct nfsd3_diropargs { struct nfsd3_diropargs {

View File

@ -54,3 +54,21 @@
#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \ #define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \ cb_sequence_dec_sz + \
op_dec_sz) op_dec_sz)
/*
* 1: CB_GETATTR opcode (32-bit)
* N: file_handle
* 1: number of entry in attribute array (32-bit)
* 1: entry 0 in attribute array (32-bit)
*/
#define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \
cb_sequence_enc_sz + \
1 + enc_nfs4_fh_sz + 1 + 1)
/*
* 4: fattr_bitmap_maxsz
* 1: attribute array len
* 2: change attr (64-bit)
* 2: size (64-bit)
*/
#define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz)

View File

@ -339,7 +339,6 @@ struct svc_program {
const struct svc_version **pg_vers; /* version array */ const struct svc_version **pg_vers; /* version array */
char * pg_name; /* service name */ char * pg_name; /* service name */
char * pg_class; /* class name: services sharing authentication */ char * pg_class; /* class name: services sharing authentication */
struct svc_stat * pg_stats; /* rpc statistics */
enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp); enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp);
__be32 (*pg_init_request)(struct svc_rqst *, __be32 (*pg_init_request)(struct svc_rqst *,
const struct svc_program *, const struct svc_program *,
@ -411,7 +410,9 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp,
void svc_rqst_release_pages(struct svc_rqst *rqstp); void svc_rqst_release_pages(struct svc_rqst *rqstp);
void svc_rqst_free(struct svc_rqst *); void svc_rqst_free(struct svc_rqst *);
void svc_exit_thread(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, struct svc_serv * svc_create_pooled(struct svc_program *prog,
struct svc_stat *stats,
unsigned int bufsize,
int (*threadfn)(void *data)); int (*threadfn)(void *data));
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
int svc_pool_stats_open(struct svc_info *si, struct file *file); int svc_pool_stats_open(struct svc_info *si, struct file *file);

View File

@ -203,6 +203,30 @@ struct svc_rdma_recv_ctxt {
struct page *rc_pages[RPCSVC_MAXPAGES]; struct page *rc_pages[RPCSVC_MAXPAGES];
}; };
/*
* State for sending a Write chunk.
* - Tracks progress of writing one chunk over all its segments
* - Stores arguments for the SGL constructor functions
*/
struct svc_rdma_write_info {
struct svcxprt_rdma *wi_rdma;
struct list_head wi_list;
const struct svc_rdma_chunk *wi_chunk;
/* write state of this chunk */
unsigned int wi_seg_off;
unsigned int wi_seg_no;
/* SGL constructor arguments */
const struct xdr_buf *wi_xdr;
unsigned char *wi_base;
unsigned int wi_next_off;
struct svc_rdma_chunk_ctxt wi_cc;
struct work_struct wi_work;
};
struct svc_rdma_send_ctxt { struct svc_rdma_send_ctxt {
struct llist_node sc_node; struct llist_node sc_node;
struct rpc_rdma_cid sc_cid; struct rpc_rdma_cid sc_cid;
@ -210,9 +234,15 @@ struct svc_rdma_send_ctxt {
struct svcxprt_rdma *sc_rdma; struct svcxprt_rdma *sc_rdma;
struct ib_send_wr sc_send_wr; struct ib_send_wr sc_send_wr;
struct ib_send_wr *sc_wr_chain;
int sc_sqecount;
struct ib_cqe sc_cqe; struct ib_cqe sc_cqe;
struct xdr_buf sc_hdrbuf; struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream; struct xdr_stream sc_stream;
struct list_head sc_write_info_list;
struct svc_rdma_write_info sc_reply_info;
void *sc_xprt_buf; void *sc_xprt_buf;
int sc_page_count; int sc_page_count;
int sc_cur_sge_no; int sc_cur_sge_no;
@ -236,18 +266,27 @@ extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
extern int svc_rdma_recvfrom(struct svc_rqst *); extern int svc_rdma_recvfrom(struct svc_rqst *);
/* svc_rdma_rw.c */ /* svc_rdma_rw.c */
extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc);
extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc); struct svc_rdma_chunk_ctxt *cc);
extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma, extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc, struct svc_rdma_chunk_ctxt *cc,
enum dma_data_direction dir); enum dma_data_direction dir);
extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
const struct svc_rdma_chunk *chunk, struct svc_rdma_send_ctxt *ctxt);
const struct xdr_buf *xdr); extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt);
const struct svc_rdma_recv_ctxt *rctxt, extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
const struct xdr_buf *xdr); const struct svc_rdma_pcl *write_pcl,
struct svc_rdma_send_ctxt *sctxt,
const struct xdr_buf *xdr);
extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_pcl *write_pcl,
const struct svc_rdma_pcl *reply_pcl,
struct svc_rdma_send_ctxt *sctxt,
const struct xdr_buf *xdr);
extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp, struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *head); struct svc_rdma_recv_ctxt *head);
@ -258,8 +297,8 @@ extern struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma);
extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt); struct svc_rdma_send_ctxt *ctxt);
extern int svc_rdma_send(struct svcxprt_rdma *rdma, extern int svc_rdma_post_send(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt); struct svc_rdma_send_ctxt *ctxt);
extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt, struct svc_rdma_send_ctxt *sctxt,
const struct svc_rdma_pcl *write_pcl, const struct svc_rdma_pcl *write_pcl,

View File

@ -2118,6 +2118,10 @@ DEFINE_SIMPLE_CID_EVENT(svcrdma_wc_write);
DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_flush); DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_flush);
DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_err); DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_err);
DEFINE_SIMPLE_CID_EVENT(svcrdma_wc_reply);
DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_reply_flush);
DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_reply_err);
TRACE_EVENT(svcrdma_qp_error, TRACE_EVENT(svcrdma_qp_error,
TP_PROTO( TP_PROTO(
const struct ib_event *event, const struct ib_event *event,

View File

@ -385,3 +385,37 @@ TRACE_DEFINE_ENUM(IOMODE_ANY);
{ SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \ { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \
{ SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \ { SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \
{ SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" }) { SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" })
TRACE_DEFINE_ENUM(OP_CB_GETATTR);
TRACE_DEFINE_ENUM(OP_CB_RECALL);
TRACE_DEFINE_ENUM(OP_CB_LAYOUTRECALL);
TRACE_DEFINE_ENUM(OP_CB_NOTIFY);
TRACE_DEFINE_ENUM(OP_CB_PUSH_DELEG);
TRACE_DEFINE_ENUM(OP_CB_RECALL_ANY);
TRACE_DEFINE_ENUM(OP_CB_RECALLABLE_OBJ_AVAIL);
TRACE_DEFINE_ENUM(OP_CB_RECALL_SLOT);
TRACE_DEFINE_ENUM(OP_CB_SEQUENCE);
TRACE_DEFINE_ENUM(OP_CB_WANTS_CANCELLED);
TRACE_DEFINE_ENUM(OP_CB_NOTIFY_LOCK);
TRACE_DEFINE_ENUM(OP_CB_NOTIFY_DEVICEID);
TRACE_DEFINE_ENUM(OP_CB_OFFLOAD);
TRACE_DEFINE_ENUM(OP_CB_ILLEGAL);
#define show_nfs4_cb_op(x) \
__print_symbolic(x, \
{ 0, "CB_NULL" }, \
{ 1, "CB_COMPOUND" }, \
{ OP_CB_GETATTR, "CB_GETATTR" }, \
{ OP_CB_RECALL, "CB_RECALL" }, \
{ OP_CB_LAYOUTRECALL, "CB_LAYOUTRECALL" }, \
{ OP_CB_NOTIFY, "CB_NOTIFY" }, \
{ OP_CB_PUSH_DELEG, "CB_PUSH_DELEG" }, \
{ OP_CB_RECALL_ANY, "CB_RECALL_ANY" }, \
{ OP_CB_RECALLABLE_OBJ_AVAIL, "CB_RECALLABLE_OBJ_AVAIL" }, \
{ OP_CB_RECALL_SLOT, "CB_RECALL_SLOT" }, \
{ OP_CB_SEQUENCE, "CB_SEQUENCE" }, \
{ OP_CB_WANTS_CANCELLED, "CB_WANTS_CANCELLED" }, \
{ OP_CB_NOTIFY_LOCK, "CB_NOTIFY_LOCK" }, \
{ OP_CB_NOTIFY_DEVICEID, "CB_NOTIFY_DEVICEID" }, \
{ OP_CB_OFFLOAD, "CB_OFFLOAD" }, \
{ OP_CB_ILLEGAL, "CB_ILLEGAL" })

View File

@ -921,6 +921,8 @@ out_err:
* Caller provides the truncation length of the output token (h) in * Caller provides the truncation length of the output token (h) in
* cksumout.len. * cksumout.len.
* *
* Note that for RPCSEC, the "initial cipher state" is always all zeroes.
*
* Return values: * Return values:
* %GSS_S_COMPLETE: Digest computed, @cksumout filled in * %GSS_S_COMPLETE: Digest computed, @cksumout filled in
* %GSS_S_FAILURE: Call failed * %GSS_S_FAILURE: Call failed
@ -931,22 +933,19 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
int body_offset, struct xdr_netobj *cksumout) int body_offset, struct xdr_netobj *cksumout)
{ {
unsigned int ivsize = crypto_sync_skcipher_ivsize(cipher); unsigned int ivsize = crypto_sync_skcipher_ivsize(cipher);
static const u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
struct ahash_request *req; struct ahash_request *req;
struct scatterlist sg[1]; struct scatterlist sg[1];
u8 *iv, *checksumdata;
int err = -ENOMEM; int err = -ENOMEM;
u8 *checksumdata;
checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL); checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL);
if (!checksumdata) if (!checksumdata)
return GSS_S_FAILURE; return GSS_S_FAILURE;
/* For RPCSEC, the "initial cipher state" is always all zeroes. */
iv = kzalloc(ivsize, GFP_KERNEL);
if (!iv)
goto out_free_mem;
req = ahash_request_alloc(tfm, GFP_KERNEL); req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req) if (!req)
goto out_free_mem; goto out_free_cksumdata;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
err = crypto_ahash_init(req); err = crypto_ahash_init(req);
if (err) if (err)
@ -970,8 +969,7 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
out_free_ahash: out_free_ahash:
ahash_request_free(req); ahash_request_free(req);
out_free_mem: out_free_cksumdata:
kfree(iv);
kfree_sensitive(checksumdata); kfree_sensitive(checksumdata);
return err ? GSS_S_FAILURE : GSS_S_COMPLETE; return err ? GSS_S_FAILURE : GSS_S_COMPLETE;
} }

View File

@ -398,6 +398,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
u64 seq_send64; u64 seq_send64;
int keylen; int keylen;
u32 time32; u32 time32;
int ret;
p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags)); p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
if (IS_ERR(p)) if (IS_ERR(p))
@ -450,8 +451,16 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
} }
ctx->mech_used.len = gss_kerberos_mech.gm_oid.len; ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
return gss_krb5_import_ctx_v2(ctx, gfp_mask); ret = gss_krb5_import_ctx_v2(ctx, gfp_mask);
if (ret) {
p = ERR_PTR(ret);
goto out_free;
}
return 0;
out_free:
kfree(ctx->mech_used.data);
out_err: out_err:
return PTR_ERR(p); return PTR_ERR(p);
} }

View File

@ -250,8 +250,8 @@ static int gssx_dec_option_array(struct xdr_stream *xdr,
creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL); creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL);
if (!creds) { if (!creds) {
kfree(oa->data); err = -ENOMEM;
return -ENOMEM; goto free_oa;
} }
oa->data[0].option.data = CREDS_VALUE; oa->data[0].option.data = CREDS_VALUE;
@ -265,29 +265,40 @@ static int gssx_dec_option_array(struct xdr_stream *xdr,
/* option buffer */ /* option buffer */
p = xdr_inline_decode(xdr, 4); p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL)) if (unlikely(p == NULL)) {
return -ENOSPC; err = -ENOSPC;
goto free_creds;
}
length = be32_to_cpup(p); length = be32_to_cpup(p);
p = xdr_inline_decode(xdr, length); p = xdr_inline_decode(xdr, length);
if (unlikely(p == NULL)) if (unlikely(p == NULL)) {
return -ENOSPC; err = -ENOSPC;
goto free_creds;
}
if (length == sizeof(CREDS_VALUE) && if (length == sizeof(CREDS_VALUE) &&
memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) { memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
/* We have creds here. parse them */ /* We have creds here. parse them */
err = gssx_dec_linux_creds(xdr, creds); err = gssx_dec_linux_creds(xdr, creds);
if (err) if (err)
return err; goto free_creds;
oa->data[0].value.len = 1; /* presence */ oa->data[0].value.len = 1; /* presence */
} else { } else {
/* consume uninteresting buffer */ /* consume uninteresting buffer */
err = gssx_dec_buffer(xdr, &dummy); err = gssx_dec_buffer(xdr, &dummy);
if (err) if (err)
return err; goto free_creds;
} }
} }
return 0; return 0;
free_creds:
kfree(creds);
free_oa:
kfree(oa->data);
oa->data = NULL;
return err;
} }
static int gssx_dec_status(struct xdr_stream *xdr, static int gssx_dec_status(struct xdr_stream *xdr,

View File

@ -314,7 +314,7 @@ EXPORT_SYMBOL_GPL(rpc_proc_unregister);
struct proc_dir_entry * struct proc_dir_entry *
svc_proc_register(struct net *net, struct svc_stat *statp, const struct proc_ops *proc_ops) svc_proc_register(struct net *net, struct svc_stat *statp, const struct proc_ops *proc_ops)
{ {
return do_register(net, statp->program->pg_name, statp, proc_ops); return do_register(net, statp->program->pg_name, net, proc_ops);
} }
EXPORT_SYMBOL_GPL(svc_proc_register); EXPORT_SYMBOL_GPL(svc_proc_register);

View File

@ -451,8 +451,8 @@ __svc_init_bc(struct svc_serv *serv)
* Create an RPC service * Create an RPC service
*/ */
static struct svc_serv * static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools, __svc_create(struct svc_program *prog, struct svc_stat *stats,
int (*threadfn)(void *data)) unsigned int bufsize, int npools, int (*threadfn)(void *data))
{ {
struct svc_serv *serv; struct svc_serv *serv;
unsigned int vers; unsigned int vers;
@ -463,7 +463,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
return NULL; return NULL;
serv->sv_name = prog->pg_name; serv->sv_name = prog->pg_name;
serv->sv_program = prog; serv->sv_program = prog;
serv->sv_stats = prog->pg_stats; serv->sv_stats = stats;
if (bufsize > RPCSVC_MAXPAYLOAD) if (bufsize > RPCSVC_MAXPAYLOAD)
bufsize = RPCSVC_MAXPAYLOAD; bufsize = RPCSVC_MAXPAYLOAD;
serv->sv_max_payload = bufsize? bufsize : 4096; serv->sv_max_payload = bufsize? bufsize : 4096;
@ -529,26 +529,28 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize, struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
int (*threadfn)(void *data)) int (*threadfn)(void *data))
{ {
return __svc_create(prog, bufsize, 1, threadfn); return __svc_create(prog, NULL, bufsize, 1, threadfn);
} }
EXPORT_SYMBOL_GPL(svc_create); EXPORT_SYMBOL_GPL(svc_create);
/** /**
* svc_create_pooled - Create an RPC service with pooled threads * svc_create_pooled - Create an RPC service with pooled threads
* @prog: the RPC program the new service will handle * @prog: the RPC program the new service will handle
* @stats: the stats struct if desired
* @bufsize: maximum message size for @prog * @bufsize: maximum message size for @prog
* @threadfn: a function to service RPC requests for @prog * @threadfn: a function to service RPC requests for @prog
* *
* Returns an instantiated struct svc_serv object or NULL. * Returns an instantiated struct svc_serv object or NULL.
*/ */
struct svc_serv *svc_create_pooled(struct svc_program *prog, struct svc_serv *svc_create_pooled(struct svc_program *prog,
struct svc_stat *stats,
unsigned int bufsize, unsigned int bufsize,
int (*threadfn)(void *data)) int (*threadfn)(void *data))
{ {
struct svc_serv *serv; struct svc_serv *serv;
unsigned int npools = svc_pool_map_get(); unsigned int npools = svc_pool_map_get();
serv = __svc_create(prog, bufsize, npools, threadfn); serv = __svc_create(prog, stats, bufsize, npools, threadfn);
if (!serv) if (!serv)
goto out_err; goto out_err;
return serv; return serv;
@ -1375,7 +1377,8 @@ svc_process_common(struct svc_rqst *rqstp)
goto err_bad_proc; goto err_bad_proc;
/* Syntactic check complete */ /* Syntactic check complete */
serv->sv_stats->rpccnt++; if (serv->sv_stats)
serv->sv_stats->rpccnt++;
trace_svc_process(rqstp, progp->pg_name); trace_svc_process(rqstp, progp->pg_name);
aoffset = xdr_stream_pos(xdr); aoffset = xdr_stream_pos(xdr);
@ -1427,7 +1430,8 @@ err_short_len:
goto close_xprt; goto close_xprt;
err_bad_rpc: err_bad_rpc:
serv->sv_stats->rpcbadfmt++; if (serv->sv_stats)
serv->sv_stats->rpcbadfmt++;
xdr_stream_encode_u32(xdr, RPC_MSG_DENIED); xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
xdr_stream_encode_u32(xdr, RPC_MISMATCH); xdr_stream_encode_u32(xdr, RPC_MISMATCH);
/* Only RPCv2 supported */ /* Only RPCv2 supported */
@ -1438,7 +1442,8 @@ err_bad_rpc:
err_bad_auth: err_bad_auth:
dprintk("svc: authentication failed (%d)\n", dprintk("svc: authentication failed (%d)\n",
be32_to_cpu(rqstp->rq_auth_stat)); be32_to_cpu(rqstp->rq_auth_stat));
serv->sv_stats->rpcbadauth++; if (serv->sv_stats)
serv->sv_stats->rpcbadauth++;
/* Restore write pointer to location of reply status: */ /* Restore write pointer to location of reply status: */
xdr_truncate_encode(xdr, XDR_UNIT * 2); xdr_truncate_encode(xdr, XDR_UNIT * 2);
xdr_stream_encode_u32(xdr, RPC_MSG_DENIED); xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
@ -1448,7 +1453,8 @@ err_bad_auth:
err_bad_prog: err_bad_prog:
dprintk("svc: unknown program %d\n", rqstp->rq_prog); dprintk("svc: unknown program %d\n", rqstp->rq_prog);
serv->sv_stats->rpcbadfmt++; if (serv->sv_stats)
serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_prog_unavail; *rqstp->rq_accept_statp = rpc_prog_unavail;
goto sendit; goto sendit;
@ -1456,7 +1462,8 @@ err_bad_vers:
svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n", svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
rqstp->rq_vers, rqstp->rq_prog, progp->pg_name); rqstp->rq_vers, rqstp->rq_prog, progp->pg_name);
serv->sv_stats->rpcbadfmt++; if (serv->sv_stats)
serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_prog_mismatch; *rqstp->rq_accept_statp = rpc_prog_mismatch;
/* /*
@ -1470,19 +1477,22 @@ err_bad_vers:
err_bad_proc: err_bad_proc:
svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc); svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc);
serv->sv_stats->rpcbadfmt++; if (serv->sv_stats)
serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_proc_unavail; *rqstp->rq_accept_statp = rpc_proc_unavail;
goto sendit; goto sendit;
err_garbage_args: err_garbage_args:
svc_printk(rqstp, "failed to decode RPC header\n"); svc_printk(rqstp, "failed to decode RPC header\n");
serv->sv_stats->rpcbadfmt++; if (serv->sv_stats)
serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_garbage_args; *rqstp->rq_accept_statp = rpc_garbage_args;
goto sendit; goto sendit;
err_system_err: err_system_err:
serv->sv_stats->rpcbadfmt++; if (serv->sv_stats)
serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_system_err; *rqstp->rq_accept_statp = rpc_system_err;
goto sendit; goto sendit;
} }
@ -1534,7 +1544,8 @@ void svc_process(struct svc_rqst *rqstp)
out_baddir: out_baddir:
svc_printk(rqstp, "bad direction 0x%08x, dropping request\n", svc_printk(rqstp, "bad direction 0x%08x, dropping request\n",
be32_to_cpu(*p)); be32_to_cpu(*p));
rqstp->rq_server->sv_stats->rpcbadfmt++; if (rqstp->rq_server->sv_stats)
rqstp->rq_server->sv_stats->rpcbadfmt++;
out_drop: out_drop:
svc_drop(rqstp); svc_drop(rqstp);
} }
@ -1612,7 +1623,6 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
rpc_put_task(task); rpc_put_task(task);
} }
EXPORT_SYMBOL_GPL(svc_process_bc);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */
/** /**

View File

@ -90,7 +90,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/ */
get_page(virt_to_page(rqst->rq_buffer)); get_page(virt_to_page(rqst->rq_buffer));
sctxt->sc_send_wr.opcode = IB_WR_SEND; sctxt->sc_send_wr.opcode = IB_WR_SEND;
return svc_rdma_send(rdma, sctxt); return svc_rdma_post_send(rdma, sctxt);
} }
/* Server-side transport endpoint wants a whole page for its send /* Server-side transport endpoint wants a whole page for its send

View File

@ -197,28 +197,6 @@ void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
llist_add_batch(first, last, &rdma->sc_rw_ctxts); llist_add_batch(first, last, &rdma->sc_rw_ctxts);
} }
/* State for sending a Write or Reply chunk.
* - Tracks progress of writing one chunk over all its segments
* - Stores arguments for the SGL constructor functions
*/
struct svc_rdma_write_info {
struct svcxprt_rdma *wi_rdma;
const struct svc_rdma_chunk *wi_chunk;
/* write state of this chunk */
unsigned int wi_seg_off;
unsigned int wi_seg_no;
/* SGL constructor arguments */
const struct xdr_buf *wi_xdr;
unsigned char *wi_base;
unsigned int wi_next_off;
struct svc_rdma_chunk_ctxt wi_cc;
struct work_struct wi_work;
};
static struct svc_rdma_write_info * static struct svc_rdma_write_info *
svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
const struct svc_rdma_chunk *chunk) const struct svc_rdma_chunk *chunk)
@ -252,6 +230,71 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
queue_work(svcrdma_wq, &info->wi_work); queue_work(svcrdma_wq, &info->wi_work);
} }
/**
* svc_rdma_write_chunk_release - Release Write chunk I/O resources
* @rdma: controlling transport
* @ctxt: Send context that is being released
*/
void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt)
{
struct svc_rdma_write_info *info;
struct svc_rdma_chunk_ctxt *cc;
while (!list_empty(&ctxt->sc_write_info_list)) {
info = list_first_entry(&ctxt->sc_write_info_list,
struct svc_rdma_write_info, wi_list);
list_del(&info->wi_list);
cc = &info->wi_cc;
svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
svc_rdma_write_info_free(info);
}
}
/**
* svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
* @rdma: controlling transport
* @ctxt: Send context that is being released
*/
void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt)
{
struct svc_rdma_chunk_ctxt *cc = &ctxt->sc_reply_info.wi_cc;
if (!cc->cc_sqecount)
return;
svc_rdma_cc_release(rdma, cc, DMA_TO_DEVICE);
}
/**
* svc_rdma_reply_done - Reply chunk Write completion handler
* @cq: controlling Completion Queue
* @wc: Work Completion report
*
* Pages under I/O are released by a subsequent Send completion.
*/
static void svc_rdma_reply_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
struct svcxprt_rdma *rdma = cq->cq_context;
switch (wc->status) {
case IB_WC_SUCCESS:
trace_svcrdma_wc_reply(&cc->cc_cid);
return;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_reply_flush(wc, &cc->cc_cid);
break;
default:
trace_svcrdma_wc_reply_err(wc, &cc->cc_cid);
}
svc_xprt_deferred_close(&rdma->sc_xprt);
}
/** /**
* svc_rdma_write_done - Write chunk completion * svc_rdma_write_done - Write chunk completion
* @cq: controlling Completion Queue * @cq: controlling Completion Queue
@ -265,13 +308,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe; struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc = struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
struct svc_rdma_write_info *info =
container_of(cc, struct svc_rdma_write_info, wi_cc);
switch (wc->status) { switch (wc->status) {
case IB_WC_SUCCESS: case IB_WC_SUCCESS:
trace_svcrdma_wc_write(&cc->cc_cid); trace_svcrdma_wc_write(&cc->cc_cid);
break; return;
case IB_WC_WR_FLUSH_ERR: case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_write_flush(wc, &cc->cc_cid); trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
break; break;
@ -279,12 +320,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_write_err(wc, &cc->cc_cid); trace_svcrdma_wc_write_err(wc, &cc->cc_cid);
} }
svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); /* The RDMA Write has flushed, so the client won't get
* some of the outgoing RPC message. Signal the loss
if (unlikely(wc->status != IB_WC_SUCCESS)) * to the client by closing the connection.
svc_xprt_deferred_close(&rdma->sc_xprt); */
svc_xprt_deferred_close(&rdma->sc_xprt);
svc_rdma_write_info_free(info);
} }
/** /**
@ -580,41 +620,54 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
return xdr->len; return xdr->len;
} }
/** /* Link Write WRs for @chunk onto @sctxt's WR chain.
* svc_rdma_send_write_chunk - Write all segments in a Write chunk
* @rdma: controlling RDMA transport
* @chunk: Write chunk provided by the client
* @xdr: xdr_buf containing the data payload
*
* Returns a non-negative number of bytes the chunk consumed, or
* %-E2BIG if the payload was larger than the Write chunk,
* %-EINVAL if client provided too many segments,
* %-ENOMEM if rdma_rw context pool was exhausted,
* %-ENOTCONN if posting failed (connection is lost),
* %-EIO if rdma_rw initialization failed (DMA mapping, etc).
*/ */
int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_chunk *chunk, struct svc_rdma_send_ctxt *sctxt,
const struct xdr_buf *xdr) const struct svc_rdma_chunk *chunk,
const struct xdr_buf *xdr)
{ {
struct svc_rdma_write_info *info; struct svc_rdma_write_info *info;
struct svc_rdma_chunk_ctxt *cc; struct svc_rdma_chunk_ctxt *cc;
struct ib_send_wr *first_wr;
struct xdr_buf payload;
struct list_head *pos;
struct ib_cqe *cqe;
int ret; int ret;
if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position,
chunk->ch_payload_length))
return -EMSGSIZE;
info = svc_rdma_write_info_alloc(rdma, chunk); info = svc_rdma_write_info_alloc(rdma, chunk);
if (!info) if (!info)
return -ENOMEM; return -ENOMEM;
cc = &info->wi_cc; cc = &info->wi_cc;
ret = svc_rdma_xb_write(xdr, info); ret = svc_rdma_xb_write(&payload, info);
if (ret != xdr->len) if (ret != payload.len)
goto out_err; goto out_err;
trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount); ret = -EINVAL;
ret = svc_rdma_post_chunk_ctxt(rdma, cc); if (unlikely(cc->cc_sqecount > rdma->sc_sq_depth))
if (ret < 0)
goto out_err; goto out_err;
return xdr->len;
first_wr = sctxt->sc_wr_chain;
cqe = &cc->cc_cqe;
list_for_each(pos, &cc->cc_rwctxts) {
struct svc_rdma_rw_ctxt *rwc;
rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, cqe, first_wr);
cqe = NULL;
}
sctxt->sc_wr_chain = first_wr;
sctxt->sc_sqecount += cc->cc_sqecount;
list_add(&info->wi_list, &sctxt->sc_write_info_list);
trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
return 0;
out_err: out_err:
svc_rdma_write_info_free(info); svc_rdma_write_info_free(info);
@ -622,9 +675,39 @@ out_err:
} }
/** /**
* svc_rdma_send_reply_chunk - Write all segments in the Reply chunk * svc_rdma_prepare_write_list - Construct WR chain for sending Write list
* @rdma: controlling RDMA transport * @rdma: controlling RDMA transport
* @rctxt: Write and Reply chunks from client * @write_pcl: Write list provisioned by the client
* @sctxt: Send WR resources
* @xdr: xdr_buf containing an RPC Reply message
*
* Returns zero on success, or a negative errno if one or more
* Write chunks could not be sent.
*/
int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
const struct svc_rdma_pcl *write_pcl,
struct svc_rdma_send_ctxt *sctxt,
const struct xdr_buf *xdr)
{
struct svc_rdma_chunk *chunk;
int ret;
pcl_for_each_chunk(chunk, write_pcl) {
if (!chunk->ch_payload_length)
break;
ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr);
if (ret < 0)
return ret;
}
return 0;
}
/**
* svc_rdma_prepare_reply_chunk - Construct WR chain for writing the Reply chunk
* @rdma: controlling RDMA transport
* @write_pcl: Write chunk list provided by client
* @reply_pcl: Reply chunk provided by client
* @sctxt: Send WR resources
* @xdr: xdr_buf containing an RPC Reply * @xdr: xdr_buf containing an RPC Reply
* *
* Returns a non-negative number of bytes the chunk consumed, or * Returns a non-negative number of bytes the chunk consumed, or
@ -634,39 +717,45 @@ out_err:
* %-ENOTCONN if posting failed (connection is lost), * %-ENOTCONN if posting failed (connection is lost),
* %-EIO if rdma_rw initialization failed (DMA mapping, etc). * %-EIO if rdma_rw initialization failed (DMA mapping, etc).
*/ */
int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_recv_ctxt *rctxt, const struct svc_rdma_pcl *write_pcl,
const struct xdr_buf *xdr) const struct svc_rdma_pcl *reply_pcl,
struct svc_rdma_send_ctxt *sctxt,
const struct xdr_buf *xdr)
{ {
struct svc_rdma_write_info *info; struct svc_rdma_write_info *info = &sctxt->sc_reply_info;
struct svc_rdma_chunk_ctxt *cc; struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
struct svc_rdma_chunk *chunk; struct ib_send_wr *first_wr;
struct list_head *pos;
struct ib_cqe *cqe;
int ret; int ret;
if (pcl_is_empty(&rctxt->rc_reply_pcl)) info->wi_rdma = rdma;
return 0; info->wi_chunk = pcl_first_chunk(reply_pcl);
info->wi_seg_off = 0;
info->wi_seg_no = 0;
info->wi_cc.cc_cqe.done = svc_rdma_reply_done;
chunk = pcl_first_chunk(&rctxt->rc_reply_pcl); ret = pcl_process_nonpayloads(write_pcl, xdr,
info = svc_rdma_write_info_alloc(rdma, chunk);
if (!info)
return -ENOMEM;
cc = &info->wi_cc;
ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
svc_rdma_xb_write, info); svc_rdma_xb_write, info);
if (ret < 0) if (ret < 0)
goto out_err; return ret;
first_wr = sctxt->sc_wr_chain;
cqe = &cc->cc_cqe;
list_for_each(pos, &cc->cc_rwctxts) {
struct svc_rdma_rw_ctxt *rwc;
rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, cqe, first_wr);
cqe = NULL;
}
sctxt->sc_wr_chain = first_wr;
sctxt->sc_sqecount += cc->cc_sqecount;
trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount); trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
ret = svc_rdma_post_chunk_ctxt(rdma, cc);
if (ret < 0)
goto out_err;
return xdr->len; return xdr->len;
out_err:
svc_rdma_write_info_free(info);
return ret;
} }
/** /**

View File

@ -142,6 +142,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.sg_list = ctxt->sc_sges; ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
ctxt->sc_cqe.done = svc_rdma_wc_send; ctxt->sc_cqe.done = svc_rdma_wc_send;
INIT_LIST_HEAD(&ctxt->sc_write_info_list);
ctxt->sc_xprt_buf = buffer; ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
rdma->sc_max_req_size); rdma->sc_max_req_size);
@ -205,9 +206,13 @@ out:
xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf, xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf,
ctxt->sc_xprt_buf, NULL); ctxt->sc_xprt_buf, NULL);
svc_rdma_cc_init(rdma, &ctxt->sc_reply_info.wi_cc);
ctxt->sc_send_wr.num_sge = 0; ctxt->sc_send_wr.num_sge = 0;
ctxt->sc_cur_sge_no = 0; ctxt->sc_cur_sge_no = 0;
ctxt->sc_page_count = 0; ctxt->sc_page_count = 0;
ctxt->sc_wr_chain = &ctxt->sc_send_wr;
ctxt->sc_sqecount = 1;
return ctxt; return ctxt;
out_empty: out_empty:
@ -223,6 +228,9 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
struct ib_device *device = rdma->sc_cm_id->device; struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i; unsigned int i;
svc_rdma_write_chunk_release(rdma, ctxt);
svc_rdma_reply_chunk_release(rdma, ctxt);
if (ctxt->sc_page_count) if (ctxt->sc_page_count)
release_pages(ctxt->sc_pages, ctxt->sc_page_count); release_pages(ctxt->sc_pages, ctxt->sc_page_count);
@ -293,7 +301,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_send_ctxt *ctxt = struct svc_rdma_send_ctxt *ctxt =
container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
svc_rdma_wake_send_waiters(rdma, 1); svc_rdma_wake_send_waiters(rdma, ctxt->sc_sqecount);
if (unlikely(wc->status != IB_WC_SUCCESS)) if (unlikely(wc->status != IB_WC_SUCCESS))
goto flushed; goto flushed;
@ -312,51 +320,76 @@ flushed:
} }
/** /**
* svc_rdma_send - Post a single Send WR * svc_rdma_post_send - Post a WR chain to the Send Queue
* @rdma: transport on which to post the WR * @rdma: transport context
* @ctxt: send ctxt with a Send WR ready to post * @ctxt: WR chain to post
* *
* Returns zero if the Send WR was posted successfully. Otherwise, a * Copy fields in @ctxt to stack variables in order to guarantee
* negative errno is returned. * that these values remain available after the ib_post_send() call.
* In some error flow cases, svc_rdma_wc_send() releases @ctxt.
*
* Note there is potential for starvation when the Send Queue is
* full because there is no order to when waiting threads are
* awoken. The transport is typically provisioned with a deep
* enough Send Queue that SQ exhaustion should be a rare event.
*
* Return values:
* %0: @ctxt's WR chain was posted successfully
* %-ENOTCONN: The connection was lost
*/ */
int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) int svc_rdma_post_send(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt)
{ {
struct ib_send_wr *wr = &ctxt->sc_send_wr; struct ib_send_wr *first_wr = ctxt->sc_wr_chain;
int ret; struct ib_send_wr *send_wr = &ctxt->sc_send_wr;
const struct ib_send_wr *bad_wr = first_wr;
struct rpc_rdma_cid cid = ctxt->sc_cid;
int ret, sqecount = ctxt->sc_sqecount;
might_sleep(); might_sleep();
/* Sync the transport header buffer */ /* Sync the transport header buffer */
ib_dma_sync_single_for_device(rdma->sc_pd->device, ib_dma_sync_single_for_device(rdma->sc_pd->device,
wr->sg_list[0].addr, send_wr->sg_list[0].addr,
wr->sg_list[0].length, send_wr->sg_list[0].length,
DMA_TO_DEVICE); DMA_TO_DEVICE);
/* If the SQ is full, wait until an SQ entry is available */ /* If the SQ is full, wait until an SQ entry is available */
while (1) { while (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) {
if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) { if (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
svc_rdma_wake_send_waiters(rdma, sqecount);
/* When the transport is torn down, assume
* ib_drain_sq() will trigger enough Send
* completions to wake us. The XPT_CLOSE test
* above should then cause the while loop to
* exit.
*/
percpu_counter_inc(&svcrdma_stat_sq_starve); percpu_counter_inc(&svcrdma_stat_sq_starve);
trace_svcrdma_sq_full(rdma, &ctxt->sc_cid); trace_svcrdma_sq_full(rdma, &cid);
atomic_inc(&rdma->sc_sq_avail);
wait_event(rdma->sc_send_wait, wait_event(rdma->sc_send_wait,
atomic_read(&rdma->sc_sq_avail) > 1); atomic_read(&rdma->sc_sq_avail) > 0);
if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) trace_svcrdma_sq_retry(rdma, &cid);
return -ENOTCONN;
trace_svcrdma_sq_retry(rdma, &ctxt->sc_cid);
continue; continue;
} }
trace_svcrdma_post_send(ctxt); trace_svcrdma_post_send(ctxt);
ret = ib_post_send(rdma->sc_qp, wr, NULL); ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
if (ret) if (ret) {
break; trace_svcrdma_sq_post_err(rdma, &cid, ret);
svc_xprt_deferred_close(&rdma->sc_xprt);
/* If even one WR was posted, there will be a
* Send completion that bumps sc_sq_avail.
*/
if (bad_wr == first_wr) {
svc_rdma_wake_send_waiters(rdma, sqecount);
break;
}
}
return 0; return 0;
} }
return -ENOTCONN;
trace_svcrdma_sq_post_err(rdma, &ctxt->sc_cid, ret);
svc_xprt_deferred_close(&rdma->sc_xprt);
wake_up(&rdma->sc_send_wait);
return ret;
} }
/** /**
@ -839,16 +872,10 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
* in sc_sges[0], and the RPC xdr_buf is prepared in following sges. * in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
* *
* Depending on whether a Write list or Reply chunk is present, * Depending on whether a Write list or Reply chunk is present,
* the server may send all, a portion of, or none of the xdr_buf. * the server may Send all, a portion of, or none of the xdr_buf.
* In the latter case, only the transport header (sc_sges[0]) is * In the latter case, only the transport header (sc_sges[0]) is
* transmitted. * transmitted.
* *
* RDMA Send is the last step of transmitting an RPC reply. Pages
* involved in the earlier RDMA Writes are here transferred out
* of the rqstp and into the sctxt's page array. These pages are
* DMA unmapped by each Write completion, but the subsequent Send
* completion finally releases these pages.
*
* Assumptions: * Assumptions:
* - The Reply's transport header will never be larger than a page. * - The Reply's transport header will never be larger than a page.
*/ */
@ -857,6 +884,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
const struct svc_rdma_recv_ctxt *rctxt, const struct svc_rdma_recv_ctxt *rctxt,
struct svc_rqst *rqstp) struct svc_rqst *rqstp)
{ {
struct ib_send_wr *send_wr = &sctxt->sc_send_wr;
int ret; int ret;
ret = svc_rdma_map_reply_msg(rdma, sctxt, &rctxt->rc_write_pcl, ret = svc_rdma_map_reply_msg(rdma, sctxt, &rctxt->rc_write_pcl,
@ -864,16 +892,19 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
if (ret < 0) if (ret < 0)
return ret; return ret;
/* Transfer pages involved in RDMA Writes to the sctxt's
* page array. Completion handling releases these pages.
*/
svc_rdma_save_io_pages(rqstp, sctxt); svc_rdma_save_io_pages(rqstp, sctxt);
if (rctxt->rc_inv_rkey) { if (rctxt->rc_inv_rkey) {
sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; send_wr->opcode = IB_WR_SEND_WITH_INV;
sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey; send_wr->ex.invalidate_rkey = rctxt->rc_inv_rkey;
} else { } else {
sctxt->sc_send_wr.opcode = IB_WR_SEND; send_wr->opcode = IB_WR_SEND;
} }
return svc_rdma_send(rdma, sctxt); return svc_rdma_post_send(rdma, sctxt);
} }
/** /**
@ -937,7 +968,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
sctxt->sc_send_wr.num_sge = 1; sctxt->sc_send_wr.num_sge = 1;
sctxt->sc_send_wr.opcode = IB_WR_SEND; sctxt->sc_send_wr.opcode = IB_WR_SEND;
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
if (svc_rdma_send(rdma, sctxt)) if (svc_rdma_post_send(rdma, sctxt))
goto put_ctxt; goto put_ctxt;
return; return;
@ -984,10 +1015,20 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (!p) if (!p)
goto put_ctxt; goto put_ctxt;
ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res); ret = svc_rdma_prepare_write_list(rdma, &rctxt->rc_write_pcl, sctxt,
&rqstp->rq_res);
if (ret < 0) if (ret < 0)
goto reply_chunk; goto put_ctxt;
rc_size = ret;
rc_size = 0;
if (!pcl_is_empty(&rctxt->rc_reply_pcl)) {
ret = svc_rdma_prepare_reply_chunk(rdma, &rctxt->rc_write_pcl,
&rctxt->rc_reply_pcl, sctxt,
&rqstp->rq_res);
if (ret < 0)
goto reply_chunk;
rc_size = ret;
}
*p++ = *rdma_argp; *p++ = *rdma_argp;
*p++ = *(rdma_argp + 1); *p++ = *(rdma_argp + 1);
@ -1030,45 +1071,33 @@ drop_connection:
/** /**
* svc_rdma_result_payload - special processing for a result payload * svc_rdma_result_payload - special processing for a result payload
* @rqstp: svc_rqst to operate on * @rqstp: RPC transaction context
* @offset: payload's byte offset in @xdr * @offset: payload's byte offset in @rqstp->rq_res
* @length: size of payload, in bytes * @length: size of payload, in bytes
* *
* Assign the passed-in result payload to the current Write chunk,
* and advance to cur_result_payload to the next Write chunk, if
* there is one.
*
* Return values: * Return values:
* %0 if successful or nothing needed to be done * %0 if successful or nothing needed to be done
* %-EMSGSIZE on XDR buffer overflow
* %-E2BIG if the payload was larger than the Write chunk * %-E2BIG if the payload was larger than the Write chunk
* %-EINVAL if client provided too many segments
* %-ENOMEM if rdma_rw context pool was exhausted
* %-ENOTCONN if posting failed (connection is lost)
* %-EIO if rdma_rw initialization failed (DMA mapping, etc)
*/ */
int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
unsigned int length) unsigned int length)
{ {
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
struct svc_rdma_chunk *chunk; struct svc_rdma_chunk *chunk;
struct svcxprt_rdma *rdma;
struct xdr_buf subbuf;
int ret;
chunk = rctxt->rc_cur_result_payload; chunk = rctxt->rc_cur_result_payload;
if (!length || !chunk) if (!length || !chunk)
return 0; return 0;
rctxt->rc_cur_result_payload = rctxt->rc_cur_result_payload =
pcl_next_chunk(&rctxt->rc_write_pcl, chunk); pcl_next_chunk(&rctxt->rc_write_pcl, chunk);
if (length > chunk->ch_length) if (length > chunk->ch_length)
return -E2BIG; return -E2BIG;
chunk->ch_position = offset; chunk->ch_position = offset;
chunk->ch_payload_length = length; chunk->ch_payload_length = length;
if (xdr_buf_subsegment(&rqstp->rq_res, &subbuf, offset, length))
return -EMSGSIZE;
rdma = container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt);
ret = svc_rdma_send_write_chunk(rdma, chunk, &subbuf);
if (ret < 0)
return ret;
return 0; return 0;
} }

View File

@ -415,15 +415,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
newxprt->sc_max_send_sges = dev->attrs.max_send_sge; newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests + rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests +
newxprt->sc_recv_batch; newxprt->sc_recv_batch + 1 /* drain */;
if (rq_depth > dev->attrs.max_qp_wr) { if (rq_depth > dev->attrs.max_qp_wr) {
rq_depth = dev->attrs.max_qp_wr; rq_depth = dev->attrs.max_qp_wr;
newxprt->sc_recv_batch = 1; newxprt->sc_recv_batch = 1;
newxprt->sc_max_requests = rq_depth - 2; newxprt->sc_max_requests = rq_depth - 2;
newxprt->sc_max_bc_requests = 2; newxprt->sc_max_bc_requests = 2;
} }
ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
ctxts *= newxprt->sc_max_requests; /* Arbitrarily estimate the number of rw_ctxs needed for
* this transport. This is enough rw_ctxs to make forward
* progress even if the client is using one rkey per page
* in each Read chunk.
*/
ctxts = 3 * RPCSVC_MAXPAGES;
newxprt->sc_sq_depth = rq_depth + ctxts; newxprt->sc_sq_depth = rq_depth + ctxts;
if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
newxprt->sc_sq_depth = dev->attrs.max_qp_wr; newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
@ -460,12 +465,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr); qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr);
dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n", dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n",
qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge); qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge);
dprintk(" send CQ depth = %u, recv CQ depth = %u\n",
newxprt->sc_sq_depth, rq_depth);
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr); ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) { if (ret) {
trace_svcrdma_qp_err(newxprt, ret); trace_svcrdma_qp_err(newxprt, ret);
goto errout; goto errout;
} }
newxprt->sc_max_send_sges = qp_attr.cap.max_send_sge;
newxprt->sc_qp = newxprt->sc_cm_id->qp; newxprt->sc_qp = newxprt->sc_cm_id->qp;
if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))

View File

@ -2987,20 +2987,11 @@ static int bc_send_request(struct rpc_rqst *req)
return len; return len;
} }
/*
* The close routine. Since this is client initiated, we do nothing
*/
static void bc_close(struct rpc_xprt *xprt) static void bc_close(struct rpc_xprt *xprt)
{ {
xprt_disconnect_done(xprt); xprt_disconnect_done(xprt);
} }
/*
* The xprt destroy routine. Again, because this connection is client
* initiated, we do nothing
*/
static void bc_destroy(struct rpc_xprt *xprt) static void bc_destroy(struct rpc_xprt *xprt)
{ {
dprintk("RPC: bc_destroy xprt %p\n", xprt); dprintk("RPC: bc_destroy xprt %p\n", xprt);