NFSD 6.9 Release Notes

The bulk of the patches for this release are optimizations, code
 clean-ups, and minor bug fixes.
 
 One new feature to mention is that NFSD administrators now have the
 ability to revoke NFSv4 open and lock state. NFSD's NFSv3 support
 has had this capability for some time.
 
 As always I am grateful to NFSD contributors, reviewers, and
 testers.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEKLLlsBKG3yQ88j7+M2qzM29mf5cFAmXwV4QACgkQM2qzM29m
 f5c7cg/8CRe0mGbeEMonoSycBjANDuiRolCM+DhVccUvSyWPqf4blF5yrNHcf5zN
 WmjQHVXIJUMVpLovcakj+4aBIuXGgdSmBJamFTy9fVfcFadiWYRceNgMMXpLMDDI
 fMAszRUyfL/r0Evj0Zajt86R5/gGn+W9X6HlDc1k7VV0Z+fzRw9WMxADy11cgHLp
 mh2bzyPmwu0EfBYlWNWLqzWVZm1C5UCGnlInyr0KXImCLOkpJqAVXTDvDkGFW2Qw
 1kJhodyabf6fRV2ZqPjLUuR4aRqABey83rB0N5z7MumO/dJUBW3CHR3uNMqvkmh3
 XevI8bPzS2Kypijcx7dONtkDWwU+fsvCdepNpmVDB73B19BFiLG+HDbMypJ0dmp+
 rvvfILRDCmIb+FA1DUeT3lIc6ac1f1+qAVc7hi3E7rGctEJWeHDsZg+E1PuTvpxM
 3XfRaFnucY5vwyiB2/uI4eblBHcVXoKho+pUqQMegLPRbgsEUyFUfg3+ZMtntagd
 OVUXvWYIARP97HNh0J5ChcGI72UpXtFWMlbbiTiCzYx4FeiCffeczIERXNJ4FYAg
 fKUaiBhdAN1PPFCRXJORZ5XlSIeZttUNSJUPfmuOpkscMdkpRUIhuEUYo9K8/1eL
 O+YZeGW/kTG+llxOERfEHJoekLf1TgGdU7oBmTIgQIK03hTUih8=
 =75G4
 -----END PGP SIGNATURE-----

Merge tag 'nfsd-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux

Pull nfsd updates from Chuck Lever:
 "The bulk of the patches for this release are optimizations, code
  clean-ups, and minor bug fixes.

  One new feature to mention is that NFSD administrators now have the
  ability to revoke NFSv4 open and lock state. NFSD's NFSv3 support has
  had this capability for some time.

  As always I am grateful to NFSD contributors, reviewers, and testers"

* tag 'nfsd-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (75 commits)
  NFSD: Clean up nfsd4_encode_replay()
  NFSD: send OP_CB_RECALL_ANY to clients when number of delegations reaches its limit
  NFSD: Document nfsd_setattr() fill-attributes behavior
  nfsd: Fix NFSv3 atomicity bugs in nfsd_setattr()
  nfsd: Fix a regression in nfsd_setattr()
  NFSD: OP_CB_RECALL_ANY should recall both read and write delegations
  NFSD: handle GETATTR conflict with write delegation
  NFSD: add support for CB_GETATTR callback
  NFSD: Document the phases of CREATE_SESSION
  NFSD: Fix the NFSv4.1 CREATE_SESSION operation
  nfsd: clean up comments over nfs4_client definition
  svcrdma: Add Write chunk WRs to the RPC's Send WR chain
  svcrdma: Post WRs for Write chunks in svc_rdma_sendto()
  svcrdma: Post the Reply chunk and Send WR together
  svcrdma: Move write_info for Reply chunks into struct svc_rdma_send_ctxt
  svcrdma: Post Send WR chain
  svcrdma: Fix retry loop in svc_rdma_send()
  svcrdma: Prevent a UAF in svc_rdma_send()
  svcrdma: Fix SQ wake-ups
  svcrdma: Increase the per-transport rw_ctx count
  ...
This commit is contained in:
Linus Torvalds 2024-03-12 14:27:37 -07:00
commit a01c9fe323
44 changed files with 1765 additions and 755 deletions

View File

@ -8172,6 +8172,7 @@ F: include/uapi/scsi/fc/
FILE LOCKING (flock() and fcntl()/lockf())
M: Jeff Layton <jlayton@kernel.org>
M: Chuck Lever <chuck.lever@oracle.com>
R: Alexander Aring <alex.aring@gmail.com>
L: linux-fsdevel@vger.kernel.org
S: Maintained
F: fs/fcntl.c

View File

@ -710,8 +710,6 @@ static const struct svc_version *nlmsvc_version[] = {
#endif
};
static struct svc_stat nlmsvc_stats;
#define NLM_NRVERS ARRAY_SIZE(nlmsvc_version)
static struct svc_program nlmsvc_program = {
.pg_prog = NLM_PROGRAM, /* program number */
@ -719,7 +717,6 @@ static struct svc_program nlmsvc_program = {
.pg_vers = nlmsvc_version, /* version table */
.pg_name = "lockd", /* service name */
.pg_class = "nfsd", /* share authentication with nfsd */
.pg_stats = &nlmsvc_stats, /* stats table */
.pg_authenticate = &lockd_authenticate, /* export authentication */
.pg_init_request = svc_generic_init_request,
.pg_rpcbind_set = svc_generic_rpcbind_set,

View File

@ -356,15 +356,12 @@ static const struct svc_version *nfs4_callback_version[] = {
[4] = &nfs4_callback_version4,
};
static struct svc_stat nfs4_callback_stats;
static struct svc_program nfs4_callback_program = {
.pg_prog = NFS4_CALLBACK, /* RPC service number */
.pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */
.pg_vers = nfs4_callback_version, /* version table */
.pg_name = "NFSv4 callback", /* service name */
.pg_class = "nfs", /* authentication class */
.pg_stats = &nfs4_callback_stats,
.pg_authenticate = nfs_callback_authenticate,
.pg_init_request = svc_generic_init_request,
.pg_rpcbind_set = svc_generic_rpcbind_set,

View File

@ -328,10 +328,10 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
}
static void
nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
nfsd4_scsi_pr_key(clp), 0, true);

View File

@ -80,8 +80,6 @@ enum {
int nfsd_drc_slab_create(void);
void nfsd_drc_slab_free(void);
int nfsd_net_reply_cache_init(struct nfsd_net *nn);
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,

View File

@ -61,13 +61,10 @@ static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
struct nfsd_fcache_disposal {
struct work_struct work;
spinlock_t lock;
struct list_head freeme;
};
static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
static struct kmem_cache *nfsd_file_slab;
static struct kmem_cache *nfsd_file_mark_slab;
static struct list_lru nfsd_file_lru;
@ -283,7 +280,7 @@ nfsd_file_free(struct nfsd_file *nf)
nfsd_file_mark_put(nf->nf_mark);
if (nf->nf_file) {
nfsd_file_check_write_error(nf);
filp_close(nf->nf_file, NULL);
nfsd_filp_close(nf->nf_file);
}
/*
@ -421,7 +418,37 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
spin_lock(&l->lock);
list_move_tail(&nf->nf_lru, &l->freeme);
spin_unlock(&l->lock);
queue_work(nfsd_filecache_wq, &l->work);
svc_wake_up(nn->nfsd_serv);
}
}
/**
* nfsd_file_net_dispose - deal with nfsd_files waiting to be disposed.
* @nn: nfsd_net in which to find files to be disposed.
*
* When files held open for nfsv3 are removed from the filecache, whether
* due to memory pressure or garbage collection, they are queued to
* a per-net-ns queue. This function completes the disposal, either
* directly or by waking another nfsd thread to help with the work.
*/
void nfsd_file_net_dispose(struct nfsd_net *nn)
{
struct nfsd_fcache_disposal *l = nn->fcache_disposal;
if (!list_empty(&l->freeme)) {
LIST_HEAD(dispose);
int i;
spin_lock(&l->lock);
for (i = 0; i < 8 && !list_empty(&l->freeme); i++)
list_move(l->freeme.next, &dispose);
spin_unlock(&l->lock);
if (!list_empty(&l->freeme))
/* Wake up another thread to share the work
* *before* doing any actual disposing.
*/
svc_wake_up(nn->nfsd_serv);
nfsd_file_dispose_list(&dispose);
}
}
@ -631,28 +658,6 @@ nfsd_file_close_inode_sync(struct inode *inode)
list_del_init(&nf->nf_lru);
nfsd_file_free(nf);
}
flush_delayed_fput();
}
/**
* nfsd_file_delayed_close - close unused nfsd_files
* @work: dummy
*
* Scrape the freeme list for this nfsd_net, and then dispose of them
* all.
*/
static void
nfsd_file_delayed_close(struct work_struct *work)
{
LIST_HEAD(head);
struct nfsd_fcache_disposal *l = container_of(work,
struct nfsd_fcache_disposal, work);
spin_lock(&l->lock);
list_splice_init(&l->freeme, &head);
spin_unlock(&l->lock);
nfsd_file_dispose_list(&head);
}
static int
@ -717,25 +722,18 @@ nfsd_file_cache_init(void)
return ret;
ret = -ENOMEM;
nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", WQ_UNBOUND, 0);
if (!nfsd_filecache_wq)
goto out;
nfsd_file_slab = kmem_cache_create("nfsd_file",
sizeof(struct nfsd_file), 0, 0, NULL);
nfsd_file_slab = KMEM_CACHE(nfsd_file, 0);
if (!nfsd_file_slab) {
pr_err("nfsd: unable to create nfsd_file_slab\n");
goto out_err;
}
nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
sizeof(struct nfsd_file_mark), 0, 0, NULL);
nfsd_file_mark_slab = KMEM_CACHE(nfsd_file_mark, 0);
if (!nfsd_file_mark_slab) {
pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
goto out_err;
}
ret = list_lru_init(&nfsd_file_lru);
if (ret) {
pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
@ -785,8 +783,6 @@ out_err:
nfsd_file_slab = NULL;
kmem_cache_destroy(nfsd_file_mark_slab);
nfsd_file_mark_slab = NULL;
destroy_workqueue(nfsd_filecache_wq);
nfsd_filecache_wq = NULL;
rhltable_destroy(&nfsd_file_rhltable);
goto out;
}
@ -832,7 +828,6 @@ nfsd_alloc_fcache_disposal(void)
l = kmalloc(sizeof(*l), GFP_KERNEL);
if (!l)
return NULL;
INIT_WORK(&l->work, nfsd_file_delayed_close);
spin_lock_init(&l->lock);
INIT_LIST_HEAD(&l->freeme);
return l;
@ -841,7 +836,6 @@ nfsd_alloc_fcache_disposal(void)
static void
nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
{
cancel_work_sync(&l->work);
nfsd_file_dispose_list(&l->freeme);
kfree(l);
}
@ -910,8 +904,6 @@ nfsd_file_cache_shutdown(void)
fsnotify_wait_marks_destroyed();
kmem_cache_destroy(nfsd_file_mark_slab);
nfsd_file_mark_slab = NULL;
destroy_workqueue(nfsd_filecache_wq);
nfsd_filecache_wq = NULL;
rhltable_destroy(&nfsd_file_rhltable);
for_each_possible_cpu(i) {

View File

@ -56,6 +56,7 @@ void nfsd_file_cache_shutdown_net(struct net *net);
void nfsd_file_put(struct nfsd_file *nf);
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
void nfsd_file_close_inode_sync(struct inode *inode);
void nfsd_file_net_dispose(struct nfsd_net *nn);
bool nfsd_file_is_cached(struct inode *inode);
__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);

View File

@ -11,8 +11,10 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/filelock.h>
#include <linux/nfs4.h>
#include <linux/percpu_counter.h>
#include <linux/siphash.h>
#include <linux/sunrpc/stats.h>
/* Hash tables for nfs4_clientid state */
#define CLIENT_HASH_BITS 4
@ -26,10 +28,22 @@ struct nfsd4_client_tracking_ops;
enum {
/* cache misses due only to checksum comparison failures */
NFSD_NET_PAYLOAD_MISSES,
NFSD_STATS_PAYLOAD_MISSES,
/* amount of memory (in bytes) currently consumed by the DRC */
NFSD_NET_DRC_MEM_USAGE,
NFSD_NET_COUNTERS_NUM
NFSD_STATS_DRC_MEM_USAGE,
NFSD_STATS_RC_HITS, /* repcache hits */
NFSD_STATS_RC_MISSES, /* repcache misses */
NFSD_STATS_RC_NOCACHE, /* uncached reqs */
NFSD_STATS_FH_STALE, /* FH stale error */
NFSD_STATS_IO_READ, /* bytes returned to read requests */
NFSD_STATS_IO_WRITE, /* bytes passed in write requests */
#ifdef CONFIG_NFSD_V4
NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
#endif
NFSD_STATS_COUNTERS_NUM
};
/*
@ -164,7 +178,10 @@ struct nfsd_net {
atomic_t num_drc_entries;
/* Per-netns stats counters */
struct percpu_counter counter[NFSD_NET_COUNTERS_NUM];
struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
/* sunrpc svc stats */
struct svc_stat nfsd_svcstats;
/* longest hash chain seen */
unsigned int longest_chain;
@ -192,6 +209,10 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients;
struct shrinker *nfsd_client_shrinker;
struct work_struct nfsd_shrinker_work;
/* last time an admin-revoke happened for NFSv4.0 */
time64_t nfs40_last_revoke;
};
/* Simple check to find out if a given net was properly initialized */

View File

@ -71,13 +71,15 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp)
struct nfsd_attrs attrs = {
.na_iattr = &argp->attrs,
};
const struct timespec64 *guardtime = NULL;
dprintk("nfsd: SETATTR(3) %s\n",
SVCFH_fmt(&argp->fh));
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs,
argp->check_guard, argp->guardtime);
if (argp->check_guard)
guardtime = &argp->guardtime;
resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, guardtime);
return rpc_success;
}

View File

@ -295,17 +295,14 @@ svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr,
static bool
svcxdr_decode_sattrguard3(struct xdr_stream *xdr, struct nfsd3_sattrargs *args)
{
__be32 *p;
u32 check;
if (xdr_stream_decode_bool(xdr, &check) < 0)
return false;
if (check) {
p = xdr_inline_decode(xdr, XDR_UNIT * 2);
if (!p)
if (!svcxdr_decode_nfstime3(xdr, &args->guardtime))
return false;
args->check_guard = 1;
args->guardtime = be32_to_cpup(p);
} else
args->check_guard = 0;

View File

@ -45,7 +45,7 @@
#define NFSDDBG_FACILITY NFSDDBG_PROC
static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason);
static void nfsd4_mark_cb_fault(struct nfs4_client *clp);
#define NFSPROC4_CB_NULL 0
#define NFSPROC4_CB_COMPOUND 1
@ -85,7 +85,21 @@ static void encode_uint32(struct xdr_stream *xdr, u32 n)
static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap,
size_t len)
{
WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0);
xdr_stream_encode_uint32_array(xdr, bitmap, len);
}
static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap,
struct nfs4_cb_fattr *fattr)
{
fattr->ncf_cb_change = 0;
fattr->ncf_cb_fsize = 0;
if (bitmap[0] & FATTR4_WORD0_CHANGE)
if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0)
return -NFSERR_BAD_XDR;
if (bitmap[0] & FATTR4_WORD0_SIZE)
if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0)
return -NFSERR_BAD_XDR;
return 0;
}
static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
@ -333,6 +347,30 @@ encode_cb_recallany4args(struct xdr_stream *xdr,
hdr->nops++;
}
/*
* CB_GETATTR4args
* struct CB_GETATTR4args {
* nfs_fh4 fh;
* bitmap4 attr_request;
* };
*
* The size and change attributes are the only one
* guaranteed to be serviced by the client.
*/
static void
encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr,
struct nfs4_cb_fattr *fattr)
{
struct nfs4_delegation *dp =
container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle;
encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR);
encode_nfs_fh4(xdr, fh);
encode_bitmap4(xdr, fattr->ncf_cb_bmap, ARRAY_SIZE(fattr->ncf_cb_bmap));
hdr->nops++;
}
/*
* CB_SEQUENCE4args
*
@ -468,6 +506,26 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
xdr_reserve_space(xdr, 0);
}
/*
* 20.1. Operation 3: CB_GETATTR - Get Attributes
*/
static void nfs4_xdr_enc_cb_getattr(struct rpc_rqst *req,
struct xdr_stream *xdr, const void *data)
{
const struct nfsd4_callback *cb = data;
struct nfs4_cb_fattr *ncf =
container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
struct nfs4_cb_compound_hdr hdr = {
.ident = cb->cb_clp->cl_cb_ident,
.minorversion = cb->cb_clp->cl_minorversion,
};
encode_cb_compound4args(xdr, &hdr);
encode_cb_sequence4args(xdr, cb, &hdr);
encode_cb_getattr4args(xdr, &hdr, ncf);
encode_cb_nops(&hdr);
}
/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
@ -523,6 +581,42 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
return 0;
}
/*
* 20.1. Operation 3: CB_GETATTR - Get Attributes
*/
static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
void *data)
{
struct nfsd4_callback *cb = data;
struct nfs4_cb_compound_hdr hdr;
int status;
u32 bitmap[3] = {0};
u32 attrlen;
struct nfs4_cb_fattr *ncf =
container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
status = decode_cb_compound4res(xdr, &hdr);
if (unlikely(status))
return status;
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
if (status)
return status;
if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
return -NFSERR_BAD_XDR;
if (xdr_stream_decode_u32(xdr, &attrlen) < 0)
return -NFSERR_BAD_XDR;
if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize)))
return -NFSERR_BAD_XDR;
status = decode_cb_fattr4(xdr, bitmap, ncf);
return status;
}
/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
@ -831,6 +925,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr),
};
static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
@ -887,7 +982,16 @@ static struct workqueue_struct *callback_wq;
static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
{
return queue_work(callback_wq, &cb->cb_work);
trace_nfsd_cb_queue(cb->cb_clp, cb);
return queue_delayed_work(callback_wq, &cb->cb_work, 0);
}
static void nfsd4_queue_cb_delayed(struct nfsd4_callback *cb,
unsigned long msecs)
{
trace_nfsd_cb_queue(cb->cb_clp, cb);
queue_delayed_work(callback_wq, &cb->cb_work,
msecs_to_jiffies(msecs));
}
static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
@ -999,18 +1103,18 @@ static void nfsd4_mark_cb_state(struct nfs4_client *clp, int newstate)
{
if (clp->cl_cb_state != newstate) {
clp->cl_cb_state = newstate;
trace_nfsd_cb_state(clp);
trace_nfsd_cb_new_state(clp);
}
}
static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
static void nfsd4_mark_cb_down(struct nfs4_client *clp)
{
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
nfsd4_mark_cb_state(clp, NFSD4_CB_DOWN);
}
static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
static void nfsd4_mark_cb_fault(struct nfs4_client *clp)
{
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
@ -1022,7 +1126,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
if (task->tk_status)
nfsd4_mark_cb_down(clp, task->tk_status);
nfsd4_mark_cb_down(clp);
else
nfsd4_mark_cb_state(clp, NFSD4_CB_UP);
}
@ -1106,6 +1210,7 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
trace_nfsd_cb_destroy(clp, cb);
nfsd41_cb_release_slot(cb);
if (cb->cb_ops && cb->cb_ops->release)
cb->cb_ops->release(cb);
@ -1158,6 +1263,8 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
if (!cb->cb_holds_slot)
goto need_restart;
/* This is the operation status code for CB_SEQUENCE */
trace_nfsd_cb_seq_status(task, cb);
switch (cb->cb_seq_status) {
case 0:
/*
@ -1171,13 +1278,23 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
break;
case -ESERVERFAULT:
++session->se_cb_seq_nr;
fallthrough;
case 1:
case -NFS4ERR_BADSESSION:
nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
nfsd4_mark_cb_fault(cb->cb_clp);
ret = false;
break;
case 1:
/*
* cb_seq_status remains 1 if an RPC Reply was never
* received. NFSD can't know if the client processed
* the CB_SEQUENCE operation. Ask the client to send a
* DESTROY_SESSION to recover.
*/
fallthrough;
case -NFS4ERR_BADSESSION:
nfsd4_mark_cb_fault(cb->cb_clp);
ret = false;
goto need_restart;
case -NFS4ERR_DELAY:
cb->cb_seq_status = 1;
if (!rpc_restart_call(task))
goto out;
@ -1192,14 +1309,11 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
}
break;
default:
nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
dprintk("%s: unprocessed error %d\n", __func__,
cb->cb_seq_status);
nfsd4_mark_cb_fault(cb->cb_clp);
}
nfsd41_cb_release_slot(cb);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_session->se_cb_seq_nr);
trace_nfsd_cb_free_slot(task, cb);
if (RPC_SIGNALLED(task))
goto need_restart;
@ -1211,6 +1325,7 @@ retry_nowait:
goto out;
need_restart:
if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
trace_nfsd_cb_restart(clp, cb);
task->tk_status = 0;
cb->cb_need_restart = true;
}
@ -1240,7 +1355,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
case -EIO:
case -ETIMEDOUT:
case -EACCES:
nfsd4_mark_cb_down(clp, task->tk_status);
nfsd4_mark_cb_down(clp);
}
break;
default:
@ -1295,12 +1410,13 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
nfsd41_cb_inflight_wait_complete(clp);
}
/* requires cl_lock: */
static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
{
struct nfsd4_session *s;
struct nfsd4_conn *c;
lockdep_assert_held(&clp->cl_lock);
list_for_each_entry(s, &clp->cl_sessions, se_perclnt) {
list_for_each_entry(c, &s->se_conns, cn_persession) {
if (c->cn_flags & NFS4_CDFC4_BACK)
@ -1324,11 +1440,14 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
struct nfsd4_conn *c;
int err;
trace_nfsd_cb_bc_update(clp, cb);
/*
* This is either an update, or the client dying; in either case,
* kill the old client:
*/
if (clp->cl_cb_client) {
trace_nfsd_cb_bc_shutdown(clp, cb);
rpc_shutdown_client(clp->cl_cb_client);
clp->cl_cb_client = NULL;
put_cred(clp->cl_cb_cred);
@ -1340,13 +1459,15 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
}
if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
return;
spin_lock(&clp->cl_lock);
/*
* Only serialized callback code is allowed to clear these
* flags; main nfsd code can only set them:
*/
BUG_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK));
WARN_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK));
clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
c = __nfsd4_find_backchannel(clp);
if (c) {
@ -1358,7 +1479,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
err = setup_callback_client(clp, &conn, ses);
if (err) {
nfsd4_mark_cb_down(clp, err);
nfsd4_mark_cb_down(clp);
if (c)
svc_xprt_put(c->cn_xprt);
return;
@ -1369,25 +1490,28 @@ static void
nfsd4_run_cb_work(struct work_struct *work)
{
struct nfsd4_callback *cb =
container_of(work, struct nfsd4_callback, cb_work);
container_of(work, struct nfsd4_callback, cb_work.work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
int flags;
if (cb->cb_need_restart) {
cb->cb_need_restart = false;
} else {
if (cb->cb_ops && cb->cb_ops->prepare)
cb->cb_ops->prepare(cb);
}
trace_nfsd_cb_start(clp);
if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client;
if (!clnt) {
/* Callback channel broken, or client killed; give up: */
nfsd41_destroy_cb(cb);
if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
nfsd41_destroy_cb(cb);
else {
/*
* XXX: Ideally, we could wait for the client to
* reconnect, but I haven't figured out how
* to do that yet.
*/
nfsd4_queue_cb_delayed(cb, 25);
}
return;
}
@ -1400,6 +1524,12 @@ nfsd4_run_cb_work(struct work_struct *work)
return;
}
if (cb->cb_need_restart) {
cb->cb_need_restart = false;
} else {
if (cb->cb_ops && cb->cb_ops->prepare)
cb->cb_ops->prepare(cb);
}
cb->cb_msg.rpc_cred = clp->cl_cb_cred;
flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN;
rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
@ -1414,8 +1544,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb;
cb->cb_ops = ops;
INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_seq_status = 1;
INIT_DELAYED_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_status = 0;
cb->cb_need_restart = false;
cb->cb_holds_slot = false;

View File

@ -152,6 +152,23 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
#endif
}
void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
{
struct nfsd_file *fl;
spin_lock(&ls->ls_stid.sc_file->fi_lock);
fl = ls->ls_file;
ls->ls_file = NULL;
spin_unlock(&ls->ls_stid.sc_file->fi_lock);
if (fl) {
if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
kernel_setlease(fl->nf_file, F_UNLCK, NULL,
(void **)&ls);
nfsd_file_put(fl);
}
}
static void
nfsd4_free_layout_stateid(struct nfs4_stid *stid)
{
@ -169,9 +186,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
list_del_init(&ls->ls_perfile);
spin_unlock(&fp->fi_lock);
if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
kernel_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
nfsd_file_put(ls->ls_file);
nfsd4_close_layout(ls);
if (ls->ls_recalled)
atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@ -235,7 +250,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops,
NFSPROC4_CLNT_CB_LAYOUT);
if (parent->sc_type == NFS4_DELEG_STID)
if (parent->sc_type == SC_TYPE_DELEG)
ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
else
ls->ls_file = find_any_file(fp);
@ -249,7 +264,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
}
spin_lock(&clp->cl_lock);
stp->sc_type = NFS4_LAYOUT_STID;
stp->sc_type = SC_TYPE_LAYOUT;
list_add(&ls->ls_perclnt, &clp->cl_lo_states);
spin_unlock(&clp->cl_lock);
@ -268,13 +283,13 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
{
struct nfs4_layout_stateid *ls;
struct nfs4_stid *stid;
unsigned char typemask = NFS4_LAYOUT_STID;
unsigned short typemask = SC_TYPE_LAYOUT;
__be32 status;
if (create)
typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID);
typemask |= (SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG);
status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid,
status = nfsd4_lookup_stateid(cstate, stateid, typemask, 0, &stid,
net_generic(SVC_NET(rqstp), nfsd_net_id));
if (status)
goto out;
@ -285,7 +300,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
goto out_put_stid;
}
if (stid->sc_type != NFS4_LAYOUT_STID) {
if (stid->sc_type != SC_TYPE_LAYOUT) {
ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type);
nfs4_put_stid(stid);
@ -517,7 +532,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
lrp->lrs_present = true;
} else {
trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid);
nfs4_unhash_stid(&ls->ls_stid);
ls->ls_stid.sc_status |= SC_STATUS_CLOSED;
lrp->lrs_present = false;
}
spin_unlock(&ls->ls_lock);
@ -604,7 +619,7 @@ nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
}
static void
nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
char addr_str[INET6_ADDRSTRLEN];
@ -626,7 +641,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
argv[0] = (char *)nfsd_recall_failed;
argv[1] = addr_str;
argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
argv[2] = file->nf_file->f_path.mnt->mnt_sb->s_id;
argv[3] = NULL;
error = call_usermodehelper(nfsd_recall_failed, argv, envp,
@ -656,6 +671,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
struct nfsd_net *nn;
ktime_t now, cutoff;
const struct nfsd4_layout_ops *ops;
struct nfsd_file *fl;
trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
switch (task->tk_status) {
@ -687,12 +703,17 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
* Unknown error or non-responding client, we'll need to fence.
*/
trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid);
ops = nfsd4_layout_ops[ls->ls_layout_type];
if (ops->fence_client)
ops->fence_client(ls);
else
nfsd4_cb_layout_fail(ls);
rcu_read_lock();
fl = nfsd_file_get(ls->ls_file);
rcu_read_unlock();
if (fl) {
ops = nfsd4_layout_ops[ls->ls_layout_type];
if (ops->fence_client)
ops->fence_client(ls, fl);
else
nfsd4_cb_layout_fail(ls, fl);
nfsd_file_put(fl);
}
return 1;
case -NFS4ERR_NOMATCHING_LAYOUT:
trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
@ -755,13 +776,11 @@ nfsd4_init_pnfs(void)
for (i = 0; i < DEVID_HASH_SIZE; i++)
INIT_LIST_HEAD(&nfsd_devid_hash[i]);
nfs4_layout_cache = kmem_cache_create("nfs4_layout",
sizeof(struct nfs4_layout), 0, 0, NULL);
nfs4_layout_cache = KMEM_CACHE(nfs4_layout, 0);
if (!nfs4_layout_cache)
return -ENOMEM;
nfs4_layout_stateid_cache = kmem_cache_create("nfs4_layout_stateid",
sizeof(struct nfs4_layout_stateid), 0, 0, NULL);
nfs4_layout_stateid_cache = KMEM_CACHE(nfs4_layout_stateid, 0);
if (!nfs4_layout_stateid_cache) {
kmem_cache_destroy(nfs4_layout_cache);
return -ENOMEM;

View File

@ -1143,6 +1143,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
};
struct inode *inode;
__be32 status = nfs_ok;
bool save_no_wcc;
int err;
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
@ -1168,8 +1169,10 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs,
0, (time64_t)0);
save_no_wcc = cstate->current_fh.fh_no_wcc;
cstate->current_fh.fh_no_wcc = true;
status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, NULL);
cstate->current_fh.fh_no_wcc = save_no_wcc;
if (!status)
status = nfserrno(attrs.na_labelerr);
if (!status)
@ -2490,10 +2493,10 @@ nfsd4_proc_null(struct svc_rqst *rqstp)
return rpc_success;
}
static inline void nfsd4_increment_op_stats(u32 opnum)
static inline void nfsd4_increment_op_stats(struct nfsd_net *nn, u32 opnum)
{
if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP)
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_NFS4_OP(opnum)]);
percpu_counter_inc(&nn->counter[NFSD_STATS_NFS4_OP(opnum)]);
}
static const struct nfsd4_operation nfsd4_ops[];
@ -2768,7 +2771,7 @@ encode_op:
status, nfsd4_op_name(op->opnum));
nfsd4_cstate_clear_replay(cstate);
nfsd4_increment_op_stats(op->opnum);
nfsd4_increment_op_stats(nn, op->opnum);
}
fh_put(current_fh);

File diff suppressed because it is too large Load Diff

View File

@ -3507,6 +3507,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
unsigned long mask[2];
} u;
unsigned long bit;
bool file_modified = false;
u64 size = 0;
WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1);
WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval));
@ -3533,7 +3535,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
}
args.size = 0;
if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
&file_modified, &size);
if (status)
goto out;
}
@ -3543,7 +3546,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
AT_STATX_SYNC_AS_STAT);
if (err)
goto out_nfserr;
args.size = args.stat.size;
if (file_modified)
args.size = size;
else
args.size = args.stat.size;
if (!(args.stat.result_mask & STATX_BTIME))
/* underlying FS does not offer btime so we can't share it */
@ -5386,16 +5392,11 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs,
/*
* If the cookie is larger than the maximum number we can fit
* in either the buffer we just got back from vfs_listxattr, or,
* XDR-encoded, in the return buffer, it's invalid.
* in the buffer we just got back from vfs_listxattr, it's invalid.
*/
if (cookie > (listxattrs->lsxa_len) / (XATTR_USER_PREFIX_LEN + 2))
return nfserr_badcookie;
if (cookie > (listxattrs->lsxa_maxcount /
(XDR_QUADLEN(XATTR_USER_PREFIX_LEN + 2) + 4)))
return nfserr_badcookie;
*offsetp = (u32)cookie;
return 0;
}
@ -5412,6 +5413,7 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
u64 cookie;
char *sp;
__be32 status, tmp;
__be64 wire_cookie;
__be32 *p;
u32 nuser;
@ -5427,7 +5429,7 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
*/
cookie_offset = xdr->buf->len;
count_offset = cookie_offset + 8;
p = xdr_reserve_space(xdr, 12);
p = xdr_reserve_space(xdr, XDR_UNIT * 3);
if (!p) {
status = nfserr_resource;
goto out;
@ -5438,7 +5440,8 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
sp = listxattrs->lsxa_buf;
nuser = 0;
xdrleft = listxattrs->lsxa_maxcount;
/* Bytes left is maxcount - 8 (cookie) - 4 (array count) */
xdrleft = listxattrs->lsxa_maxcount - XDR_UNIT * 3;
while (left > 0 && xdrleft > 0) {
slen = strlen(sp);
@ -5451,7 +5454,8 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
slen -= XATTR_USER_PREFIX_LEN;
xdrlen = 4 + ((slen + 3) & ~3);
if (xdrlen > xdrleft) {
/* Check if both entry and eof can fit in the XDR buffer */
if (xdrlen + XDR_UNIT > xdrleft) {
if (count == 0) {
/*
* Can't even fit the first attribute name.
@ -5503,7 +5507,8 @@ wreof:
cookie = offset + count;
write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &cookie, 8);
wire_cookie = cpu_to_be64(cookie);
write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &wire_cookie, 8);
tmp = cpu_to_be32(count);
write_bytes_to_xdr_buf(xdr->buf, count_offset, &tmp, 4);
out:
@ -5727,27 +5732,24 @@ release:
rqstp->rq_next_page = xdr->page_ptr + 1;
}
/*
* Encode the reply stored in the stateowner reply cache
*
* XDR note: do not encode rp->rp_buflen: the buffer contains the
* previously sent already encoded operation.
/**
* nfsd4_encode_replay - encode a result stored in the stateowner reply cache
* @xdr: send buffer's XDR stream
* @op: operation being replayed
*
* @op->replay->rp_buf contains the previously-sent already-encoded result.
*/
void
nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
{
__be32 *p;
struct nfs4_replay *rp = op->replay;
p = xdr_reserve_space(xdr, 8 + rp->rp_buflen);
if (!p) {
WARN_ON_ONCE(1);
return;
}
*p++ = cpu_to_be32(op->opnum);
*p++ = rp->rp_status; /* already xdr'ed */
trace_nfsd_stateowner_replay(op->opnum, rp);
p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen);
if (xdr_stream_encode_u32(xdr, op->opnum) != XDR_UNIT)
return;
if (xdr_stream_encode_be32(xdr, rp->rp_status) != XDR_UNIT)
return;
xdr_stream_encode_opaque_fixed(xdr, rp->rp_buf, rp->rp_buflen);
}
void nfsd4_release_compoundargs(struct svc_rqst *rqstp)

View File

@ -166,8 +166,7 @@ nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
int nfsd_drc_slab_create(void)
{
drc_slab = kmem_cache_create("nfsd_drc",
sizeof(struct nfsd_cacherep), 0, 0, NULL);
drc_slab = KMEM_CACHE(nfsd_cacherep, 0);
return drc_slab ? 0: -ENOMEM;
}
@ -176,27 +175,6 @@ void nfsd_drc_slab_free(void)
kmem_cache_destroy(drc_slab);
}
/**
* nfsd_net_reply_cache_init - per net namespace reply cache set-up
* @nn: nfsd_net being initialized
*
* Returns zero on succes; otherwise a negative errno is returned.
*/
int nfsd_net_reply_cache_init(struct nfsd_net *nn)
{
return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM);
}
/**
* nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down
* @nn: nfsd_net being freed
*
*/
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn)
{
nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM);
}
int nfsd_reply_cache_init(struct nfsd_net *nn)
{
unsigned int hashsize;
@ -501,7 +479,7 @@ out:
int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
unsigned int len, struct nfsd_cacherep **cacherep)
{
struct nfsd_net *nn;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct nfsd_cacherep *rp, *found;
__wsum csum;
struct nfsd_drc_bucket *b;
@ -510,7 +488,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
int rtn = RC_DOIT;
if (type == RC_NOCACHE) {
nfsd_stats_rc_nocache_inc();
nfsd_stats_rc_nocache_inc(nn);
goto out;
}
@ -520,7 +498,6 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
* Since the common case is a cache miss followed by an insert,
* preallocate an entry.
*/
nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
rp = nfsd_cacherep_alloc(rqstp, csum, nn);
if (!rp)
goto out;
@ -537,7 +514,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
nfsd_cacherep_dispose(&dispose);
nfsd_stats_rc_misses_inc();
nfsd_stats_rc_misses_inc(nn);
atomic_inc(&nn->num_drc_entries);
nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
goto out;
@ -545,7 +522,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
found_entry:
/* We found a matching entry which is either in progress or done. */
nfsd_reply_cache_free_locked(NULL, rp, nn);
nfsd_stats_rc_hits_inc();
nfsd_stats_rc_hits_inc(nn);
rtn = RC_DROPIT;
rp = found;
@ -687,15 +664,15 @@ int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
atomic_read(&nn->num_drc_entries));
seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits);
seq_printf(m, "mem usage: %lld\n",
percpu_counter_sum_positive(&nn->counter[NFSD_NET_DRC_MEM_USAGE]));
percpu_counter_sum_positive(&nn->counter[NFSD_STATS_DRC_MEM_USAGE]));
seq_printf(m, "cache hits: %lld\n",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]));
percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]));
seq_printf(m, "cache misses: %lld\n",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]));
percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]));
seq_printf(m, "not cached: %lld\n",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]));
percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]));
seq_printf(m, "payload misses: %lld\n",
percpu_counter_sum_positive(&nn->counter[NFSD_NET_PAYLOAD_MISSES]));
percpu_counter_sum_positive(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]));
seq_printf(m, "longest chain len: %u\n", nn->longest_chain);
seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
return 0;

View File

@ -281,6 +281,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
* 3. Is that directory the root of an exported file system?
*/
error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
nfsd4_revoke_states(netns(file), path.dentry->d_sb);
path_put(&path);
return error;
@ -1671,14 +1672,17 @@ static __net_init int nfsd_net_init(struct net *net)
retval = nfsd_idmap_init(net);
if (retval)
goto out_idmap_error;
retval = nfsd_net_reply_cache_init(nn);
retval = nfsd_stat_counters_init(nn);
if (retval)
goto out_repcache_error;
memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats));
nn->nfsd_svcstats.program = &nfsd_program;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
nfsd4_init_leases_net(nn);
get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
seqlock_init(&nn->writeverf_lock);
nfsd_proc_stat_init(net);
return 0;
@ -1699,7 +1703,8 @@ static __net_exit void nfsd_net_exit(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
nfsd_net_reply_cache_destroy(nn);
nfsd_proc_stat_shutdown(net);
nfsd_stat_counters_destroy(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(nn);
@ -1722,12 +1727,9 @@ static int __init init_nfsd(void)
retval = nfsd4_init_pnfs();
if (retval)
goto out_free_slabs;
retval = nfsd_stat_init(); /* Statistics */
if (retval)
goto out_free_pnfs;
retval = nfsd_drc_slab_create();
if (retval)
goto out_free_stat;
goto out_free_pnfs;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
retval = create_proc_exports_entry();
if (retval)
@ -1761,8 +1763,6 @@ out_free_exports:
out_free_lockd:
nfsd_lockd_shutdown();
nfsd_drc_slab_free();
out_free_stat:
nfsd_stat_shutdown();
out_free_pnfs:
nfsd4_exit_pnfs();
out_free_slabs:
@ -1780,7 +1780,6 @@ static void __exit exit_nfsd(void)
nfsd_drc_slab_free();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown();
nfsd_lockd_shutdown();
nfsd4_free_slabs();
nfsd4_exit_pnfs();

View File

@ -86,6 +86,7 @@ extern struct mutex nfsd_mutex;
extern spinlock_t nfsd_drc_lock;
extern unsigned long nfsd_drc_max_mem;
extern unsigned long nfsd_drc_mem_used;
extern atomic_t nfsd_th_cnt; /* number of available threads */
extern const struct seq_operations nfs_exports_op;
@ -274,6 +275,7 @@ void nfsd_lockd_shutdown(void);
#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
#define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
#define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
@ -365,6 +367,7 @@ void nfsd_lockd_shutdown(void);
#define NFSD_CLIENT_MAX_TRIM_PER_RUN 128
#define NFS4_CLIENTS_PER_GB 1024
#define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */
#define NFSD_CB_GETATTR_TIMEOUT NFSD_DELEGRETURN_TIMEOUT
/*
* The following attributes are currently not supported by the NFSv4 server:

View File

@ -327,6 +327,7 @@ out:
__be32
fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_export *exp = NULL;
struct dentry *dentry;
__be32 error;
@ -395,7 +396,7 @@ skip_pseudoflavor_check:
out:
trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
if (error == nfserr_stale)
nfsd_stats_fh_stale_inc(exp);
nfsd_stats_fh_stale_inc(nn, exp);
return error;
}

View File

@ -103,7 +103,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
}
}
resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0);
resp->status = nfsd_setattr(rqstp, fhp, &attrs, NULL);
if (resp->status != nfs_ok)
goto out;
@ -390,8 +390,8 @@ nfsd_proc_create(struct svc_rqst *rqstp)
*/
attr->ia_valid &= ATTR_SIZE;
if (attr->ia_valid)
resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0,
(time64_t)0);
resp->status = nfsd_setattr(rqstp, newfhp, &attrs,
NULL);
}
out_unlock:

View File

@ -34,6 +34,7 @@
#define NFSDDBG_FACILITY NFSDDBG_SVC
atomic_t nfsd_th_cnt = ATOMIC_INIT(0);
extern struct svc_program nfsd_program;
static int nfsd(void *vrqstp);
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
@ -80,7 +81,6 @@ unsigned long nfsd_drc_max_mem;
unsigned long nfsd_drc_mem_used;
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
static struct svc_stat nfsd_acl_svcstats;
static const struct svc_version *nfsd_acl_version[] = {
# if defined(CONFIG_NFSD_V2_ACL)
[2] = &nfsd_acl_version2,
@ -99,15 +99,11 @@ static struct svc_program nfsd_acl_program = {
.pg_vers = nfsd_acl_version,
.pg_name = "nfsacl",
.pg_class = "nfsd",
.pg_stats = &nfsd_acl_svcstats,
.pg_authenticate = &svc_set_client,
.pg_init_request = nfsd_acl_init_request,
.pg_rpcbind_set = nfsd_acl_rpcbind_set,
};
static struct svc_stat nfsd_acl_svcstats = {
.program = &nfsd_acl_program,
};
#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
static const struct svc_version *nfsd_version[] = {
@ -132,7 +128,6 @@ struct svc_program nfsd_program = {
.pg_vers = nfsd_version, /* version table */
.pg_name = "nfsd", /* program name */
.pg_class = "nfsd", /* authentication class */
.pg_stats = &nfsd_svcstats, /* version table */
.pg_authenticate = &svc_set_client, /* export authentication */
.pg_init_request = nfsd_init_request,
.pg_rpcbind_set = nfsd_rpcbind_set,
@ -666,7 +661,8 @@ int nfsd_create_serv(struct net *net)
if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize();
nfsd_reset_versions(nn);
serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd);
serv = svc_create_pooled(&nfsd_program, &nn->nfsd_svcstats,
nfsd_max_blksize, nfsd);
if (serv == NULL)
return -ENOMEM;
@ -929,7 +925,7 @@ nfsd(void *vrqstp)
current->fs->umask = 0;
atomic_inc(&nfsdstats.th_cnt);
atomic_inc(&nfsd_th_cnt);
set_freezable();
@ -941,9 +937,11 @@ nfsd(void *vrqstp)
rqstp->rq_server->sv_maxconn = nn->max_connections;
svc_recv(rqstp);
nfsd_file_net_dispose(nn);
}
atomic_dec(&nfsdstats.th_cnt);
atomic_dec(&nfsd_th_cnt);
out:
/* Release the thread */

View File

@ -37,7 +37,8 @@ struct nfsd4_layout_ops {
__be32 (*proc_layoutcommit)(struct inode *inode,
struct nfsd4_layoutcommit *lcp);
void (*fence_client)(struct nfs4_layout_stateid *ls);
void (*fence_client)(struct nfs4_layout_stateid *ls,
struct nfsd_file *file);
};
extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
@ -72,11 +73,13 @@ void nfsd4_setup_layout_type(struct svc_export *exp);
void nfsd4_return_all_client_layouts(struct nfs4_client *);
void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
struct nfs4_file *fp);
void nfsd4_close_layout(struct nfs4_layout_stateid *ls);
int nfsd4_init_pnfs(void);
void nfsd4_exit_pnfs(void);
#else
struct nfs4_client;
struct nfs4_file;
struct nfs4_layout_stateid;
static inline void nfsd4_setup_layout_type(struct svc_export *exp)
{
@ -89,6 +92,9 @@ static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
struct nfs4_file *fp)
{
}
static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
{
}
static inline void nfsd4_exit_pnfs(void)
{
}

View File

@ -68,7 +68,7 @@ struct nfsd4_callback {
struct nfs4_client *cb_clp;
struct rpc_message cb_msg;
const struct nfsd4_callback_ops *cb_ops;
struct work_struct cb_work;
struct delayed_work cb_work;
int cb_seq_status;
int cb_status;
bool cb_need_restart;
@ -88,17 +88,34 @@ struct nfsd4_callback_ops {
*/
struct nfs4_stid {
refcount_t sc_count;
#define NFS4_OPEN_STID 1
#define NFS4_LOCK_STID 2
#define NFS4_DELEG_STID 4
/* For an open stateid kept around *only* to process close replays: */
#define NFS4_CLOSED_STID 8
/* A new stateid is added to the cl_stateids idr early before it
* is fully initialised. Its sc_type is then zero. After
* initialisation the sc_type it set under cl_lock, and then
* never changes.
*/
#define SC_TYPE_OPEN BIT(0)
#define SC_TYPE_LOCK BIT(1)
#define SC_TYPE_DELEG BIT(2)
#define SC_TYPE_LAYOUT BIT(3)
unsigned short sc_type;
/* state_lock protects sc_status for delegation stateids.
* ->cl_lock protects sc_status for open and lock stateids.
* ->st_mutex also protect sc_status for open stateids.
* ->ls_lock protects sc_status for layout stateids.
*/
/*
* For an open stateid kept around *only* to process close replays.
* For deleg stateid, kept in idr until last reference is dropped.
*/
#define SC_STATUS_CLOSED BIT(0)
/* For a deleg stateid kept around only to process free_stateid's: */
#define NFS4_REVOKED_DELEG_STID 16
#define NFS4_CLOSED_DELEG_STID 32
#define NFS4_LAYOUT_STID 64
#define SC_STATUS_REVOKED BIT(1)
#define SC_STATUS_ADMIN_REVOKED BIT(2)
unsigned short sc_status;
struct list_head sc_cp_list;
unsigned char sc_type;
stateid_t sc_stateid;
spinlock_t sc_lock;
struct nfs4_client *sc_client;
@ -117,6 +134,24 @@ struct nfs4_cpntf_state {
time64_t cpntf_time; /* last time stateid used */
};
struct nfs4_cb_fattr {
struct nfsd4_callback ncf_getattr;
u32 ncf_cb_status;
u32 ncf_cb_bmap[1];
/* from CB_GETATTR reply */
u64 ncf_cb_change;
u64 ncf_cb_fsize;
unsigned long ncf_cb_flags;
bool ncf_file_modified;
u64 ncf_initial_cinfo;
u64 ncf_cur_fsize;
};
/* bits for ncf_cb_flags */
#define CB_GETATTR_BUSY 0
/*
* Represents a delegation stateid. The nfs4_client holds references to these
* and they are put when it is being destroyed or when the delegation is
@ -150,6 +185,9 @@ struct nfs4_delegation {
int dl_retries;
struct nfsd4_callback dl_recall;
bool dl_recalled;
/* for CB_GETATTR */
struct nfs4_cb_fattr dl_cb_fattr;
};
#define cb_to_delegation(cb) \
@ -317,8 +355,9 @@ enum {
* 0. If they are not renewed within a lease period, they become eligible for
* destruction by the laundromat.
*
* These objects can also be destroyed prematurely by the fault injection code,
* or if the client sends certain forms of SETCLIENTID or EXCHANGE_ID updates.
* These objects can also be destroyed if the client sends certain forms of
* SETCLIENTID or EXCHANGE_ID operations.
*
* Care is taken *not* to do this however when the objects have an elevated
* refcount.
*
@ -326,7 +365,7 @@ enum {
*
* o Each nfs4_clients is also hashed by name (the opaque quantity initially
* sent by the client to identify itself).
*
*
* o cl_perclient list is used to ensure no dangling stateowner references
* when we expire the nfs4_client
*/
@ -351,6 +390,7 @@ struct nfs4_client {
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
u32 cl_minorversion;
atomic_t cl_admin_revoked; /* count of admin-revoked states */
/* NFSv4.1 client implementation id: */
struct xdr_netobj cl_nii_domain;
struct xdr_netobj cl_nii_name;
@ -640,6 +680,7 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
NFSPROC4_CLNT_CB_RECALL_ANY,
NFSPROC4_CLNT_CB_GETATTR,
};
/* Returns true iff a is later than b: */
@ -672,15 +713,15 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
stateid_t *stateid, int flags, struct nfsd_file **filp,
struct nfs4_stid **cstid);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
stateid_t *stateid, unsigned short typemask,
unsigned short statusmask,
struct nfs4_stid **s, struct nfsd_net *nn);
struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
void (*sc_free)(struct nfs4_stid *));
int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
void nfs4_free_copy_state(struct nfsd4_copy *copy);
struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
struct nfs4_stid *p_stid);
void nfs4_unhash_stid(struct nfs4_stid *s);
void nfs4_put_stid(struct nfs4_stid *s);
void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
@ -714,6 +755,14 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
}
struct nfsd_file *find_any_file(struct nfs4_file *f);
#ifdef CONFIG_NFSD_V4
void nfsd4_revoke_states(struct net *net, struct super_block *sb);
#else
static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
{
}
#endif
/* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn);
@ -732,5 +781,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
}
extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
struct inode *inode);
struct inode *inode, bool *file_modified, u64 *size);
#endif /* NFSD4_STATE_H */

View File

@ -27,25 +27,22 @@
#include "nfsd.h"
struct nfsd_stats nfsdstats;
struct svc_stat nfsd_svcstats = {
.program = &nfsd_program,
};
static int nfsd_show(struct seq_file *seq, void *v)
{
struct net *net = pde_data(file_inode(seq->file));
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int i;
seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]),
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]),
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]),
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_FH_STALE]),
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_READ]),
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE]));
percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]),
percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]),
percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]),
percpu_counter_sum_positive(&nn->counter[NFSD_STATS_FH_STALE]),
percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_READ]),
percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_WRITE]));
/* thread usage: */
seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt));
seq_printf(seq, "th %u 0", atomic_read(&nfsd_th_cnt));
/* deprecated thread usage histogram stats */
for (i = 0; i < 10; i++)
@ -55,7 +52,7 @@ static int nfsd_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n");
/* show my rpc info */
svc_seq_show(seq, &nfsd_svcstats);
svc_seq_show(seq, &nn->nfsd_svcstats);
#ifdef CONFIG_NFSD_V4
/* Show count for individual nfsv4 operations */
@ -63,10 +60,10 @@ static int nfsd_show(struct seq_file *seq, void *v)
seq_printf(seq, "proc4ops %u", LAST_NFS4_OP + 1);
for (i = 0; i <= LAST_NFS4_OP; i++) {
seq_printf(seq, " %lld",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)]));
percpu_counter_sum_positive(&nn->counter[NFSD_STATS_NFS4_OP(i)]));
}
seq_printf(seq, "\nwdeleg_getattr %lld",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]));
percpu_counter_sum_positive(&nn->counter[NFSD_STATS_WDELEG_GETATTR]));
seq_putc(seq, '\n');
#endif
@ -108,31 +105,24 @@ void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num)
percpu_counter_destroy(&counters[i]);
}
static int nfsd_stat_counters_init(void)
int nfsd_stat_counters_init(struct nfsd_net *nn)
{
return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM);
return nfsd_percpu_counters_init(nn->counter, NFSD_STATS_COUNTERS_NUM);
}
static void nfsd_stat_counters_destroy(void)
void nfsd_stat_counters_destroy(struct nfsd_net *nn)
{
nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM);
nfsd_percpu_counters_destroy(nn->counter, NFSD_STATS_COUNTERS_NUM);
}
int nfsd_stat_init(void)
void nfsd_proc_stat_init(struct net *net)
{
int err;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
err = nfsd_stat_counters_init();
if (err)
return err;
svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops);
return 0;
svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops);
}
void nfsd_stat_shutdown(void)
void nfsd_proc_stat_shutdown(struct net *net)
{
nfsd_stat_counters_destroy();
svc_proc_unregister(&init_net, "nfsd");
svc_proc_unregister(net, "nfsd");
}

View File

@ -10,94 +10,72 @@
#include <uapi/linux/nfsd/stats.h>
#include <linux/percpu_counter.h>
enum {
NFSD_STATS_RC_HITS, /* repcache hits */
NFSD_STATS_RC_MISSES, /* repcache misses */
NFSD_STATS_RC_NOCACHE, /* uncached reqs */
NFSD_STATS_FH_STALE, /* FH stale error */
NFSD_STATS_IO_READ, /* bytes returned to read requests */
NFSD_STATS_IO_WRITE, /* bytes passed in write requests */
#ifdef CONFIG_NFSD_V4
NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
#endif
NFSD_STATS_COUNTERS_NUM
};
struct nfsd_stats {
struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
atomic_t th_cnt; /* number of available threads */
};
extern struct nfsd_stats nfsdstats;
extern struct svc_stat nfsd_svcstats;
int nfsd_percpu_counters_init(struct percpu_counter *counters, int num);
void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num);
void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num);
int nfsd_stat_init(void);
void nfsd_stat_shutdown(void);
int nfsd_stat_counters_init(struct nfsd_net *nn);
void nfsd_stat_counters_destroy(struct nfsd_net *nn);
void nfsd_proc_stat_init(struct net *net);
void nfsd_proc_stat_shutdown(struct net *net);
static inline void nfsd_stats_rc_hits_inc(void)
static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn)
{
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_HITS]);
percpu_counter_inc(&nn->counter[NFSD_STATS_RC_HITS]);
}
static inline void nfsd_stats_rc_misses_inc(void)
static inline void nfsd_stats_rc_misses_inc(struct nfsd_net *nn)
{
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_MISSES]);
percpu_counter_inc(&nn->counter[NFSD_STATS_RC_MISSES]);
}
static inline void nfsd_stats_rc_nocache_inc(void)
static inline void nfsd_stats_rc_nocache_inc(struct nfsd_net *nn)
{
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]);
percpu_counter_inc(&nn->counter[NFSD_STATS_RC_NOCACHE]);
}
static inline void nfsd_stats_fh_stale_inc(struct svc_export *exp)
static inline void nfsd_stats_fh_stale_inc(struct nfsd_net *nn,
struct svc_export *exp)
{
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_FH_STALE]);
percpu_counter_inc(&nn->counter[NFSD_STATS_FH_STALE]);
if (exp && exp->ex_stats)
percpu_counter_inc(&exp->ex_stats->counter[EXP_STATS_FH_STALE]);
}
static inline void nfsd_stats_io_read_add(struct svc_export *exp, s64 amount)
static inline void nfsd_stats_io_read_add(struct nfsd_net *nn,
struct svc_export *exp, s64 amount)
{
percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_READ], amount);
percpu_counter_add(&nn->counter[NFSD_STATS_IO_READ], amount);
if (exp && exp->ex_stats)
percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_READ], amount);
}
static inline void nfsd_stats_io_write_add(struct svc_export *exp, s64 amount)
static inline void nfsd_stats_io_write_add(struct nfsd_net *nn,
struct svc_export *exp, s64 amount)
{
percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_WRITE], amount);
percpu_counter_add(&nn->counter[NFSD_STATS_IO_WRITE], amount);
if (exp && exp->ex_stats)
percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_WRITE], amount);
}
static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn)
{
percpu_counter_inc(&nn->counter[NFSD_NET_PAYLOAD_MISSES]);
percpu_counter_inc(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]);
}
static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount)
{
percpu_counter_add(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
percpu_counter_add(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount);
}
static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount)
{
percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
percpu_counter_sub(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount);
}
#ifdef CONFIG_NFSD_V4
static inline void nfsd_stats_wdeleg_getattr_inc(void)
static inline void nfsd_stats_wdeleg_getattr_inc(struct nfsd_net *nn)
{
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]);
percpu_counter_inc(&nn->counter[NFSD_STATS_WDELEG_GETATTR]);
}
#endif
#endif /* _NFSD_STATS_H */

View File

@ -9,8 +9,10 @@
#define _NFSD_TRACE_H
#include <linux/tracepoint.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h>
#include <trace/misc/nfs.h>
#include <trace/misc/sunrpc.h>
#include "export.h"
#include "nfsfh.h"
@ -641,23 +643,18 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
DEFINE_STATESEQID_EVENT(preprocess);
DEFINE_STATESEQID_EVENT(open_confirm);
TRACE_DEFINE_ENUM(NFS4_OPEN_STID);
TRACE_DEFINE_ENUM(NFS4_LOCK_STID);
TRACE_DEFINE_ENUM(NFS4_DELEG_STID);
TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
#define show_stid_type(x) \
__print_flags(x, "|", \
{ NFS4_OPEN_STID, "OPEN" }, \
{ NFS4_LOCK_STID, "LOCK" }, \
{ NFS4_DELEG_STID, "DELEG" }, \
{ NFS4_CLOSED_STID, "CLOSED" }, \
{ NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
{ NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
{ NFS4_LAYOUT_STID, "LAYOUT" })
{ SC_TYPE_OPEN, "OPEN" }, \
{ SC_TYPE_LOCK, "LOCK" }, \
{ SC_TYPE_DELEG, "DELEG" }, \
{ SC_TYPE_LAYOUT, "LAYOUT" })
#define show_stid_status(x) \
__print_flags(x, "|", \
{ SC_STATUS_CLOSED, "CLOSED" }, \
{ SC_STATUS_REVOKED, "REVOKED" }, \
{ SC_STATUS_ADMIN_REVOKED, "ADMIN_REVOKED" })
DECLARE_EVENT_CLASS(nfsd_stid_class,
TP_PROTO(
@ -666,6 +663,7 @@ DECLARE_EVENT_CLASS(nfsd_stid_class,
TP_ARGS(stid),
TP_STRUCT__entry(
__field(unsigned long, sc_type)
__field(unsigned long, sc_status)
__field(int, sc_count)
__field(u32, cl_boot)
__field(u32, cl_id)
@ -676,16 +674,18 @@ DECLARE_EVENT_CLASS(nfsd_stid_class,
const stateid_t *stp = &stid->sc_stateid;
__entry->sc_type = stid->sc_type;
__entry->sc_status = stid->sc_status;
__entry->sc_count = refcount_read(&stid->sc_count);
__entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
__entry->cl_id = stp->si_opaque.so_clid.cl_id;
__entry->si_id = stp->si_opaque.so_id;
__entry->si_generation = stp->si_generation;
),
TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s",
TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s state=%s",
__entry->cl_boot, __entry->cl_id,
__entry->si_id, __entry->si_generation,
__entry->sc_count, show_stid_type(__entry->sc_type)
__entry->sc_count, show_stid_type(__entry->sc_type),
show_stid_status(__entry->sc_status)
)
);
@ -696,6 +696,59 @@ DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \
DEFINE_STID_EVENT(revoke);
TRACE_EVENT(nfsd_stateowner_replay,
TP_PROTO(
u32 opnum,
const struct nfs4_replay *rp
),
TP_ARGS(opnum, rp),
TP_STRUCT__entry(
__field(unsigned long, status)
__field(u32, opnum)
),
TP_fast_assign(
__entry->status = be32_to_cpu(rp->rp_status);
__entry->opnum = opnum;
),
TP_printk("opnum=%u status=%lu",
__entry->opnum, __entry->status)
);
TRACE_EVENT_CONDITION(nfsd_seq4_status,
TP_PROTO(
const struct svc_rqst *rqstp,
const struct nfsd4_sequence *sequence
),
TP_ARGS(rqstp, sequence),
TP_CONDITION(sequence->status_flags),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(u32, xid)
__field(u32, cl_boot)
__field(u32, cl_id)
__field(u32, seqno)
__field(u32, reserved)
__field(unsigned long, status_flags)
),
TP_fast_assign(
const struct nfsd4_sessionid *sid =
(struct nfsd4_sessionid *)&sequence->sessionid;
__entry->netns_ino = SVC_NET(rqstp)->ns.inum;
__entry->xid = be32_to_cpu(rqstp->rq_xid);
__entry->cl_boot = sid->clientid.cl_boot;
__entry->cl_id = sid->clientid.cl_id;
__entry->seqno = sid->sequence;
__entry->reserved = sid->reserved;
__entry->status_flags = sequence->status_flags;
),
TP_printk("xid=0x%08x sessionid=%08x:%08x:%08x:%08x status_flags=%s",
__entry->xid, __entry->cl_boot, __entry->cl_id,
__entry->seqno, __entry->reserved,
show_nfs4_seq4_status(__entry->status_flags)
)
);
DECLARE_EVENT_CLASS(nfsd_clientid_class,
TP_PROTO(const clientid_t *clid),
TP_ARGS(clid),
@ -1334,7 +1387,8 @@ DEFINE_EVENT(nfsd_cb_class, nfsd_cb_##name, \
TP_PROTO(const struct nfs4_client *clp), \
TP_ARGS(clp))
DEFINE_NFSD_CB_EVENT(state);
DEFINE_NFSD_CB_EVENT(start);
DEFINE_NFSD_CB_EVENT(new_state);
DEFINE_NFSD_CB_EVENT(probe);
DEFINE_NFSD_CB_EVENT(lost);
DEFINE_NFSD_CB_EVENT(shutdown);
@ -1405,6 +1459,128 @@ TRACE_EVENT(nfsd_cb_setup_err,
__entry->error)
);
DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class,
TP_PROTO(
const struct nfs4_client *clp,
const struct nfsd4_callback *cb
),
TP_ARGS(clp, cb),
TP_STRUCT__entry(
__field(u32, cl_boot)
__field(u32, cl_id)
__field(const void *, cb)
__field(bool, need_restart)
__sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
__entry->cb = cb;
__entry->need_restart = cb->cb_need_restart;
__assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
clp->cl_cb_conn.cb_addrlen)
),
TP_printk("addr=%pISpc client %08x:%08x cb=%p%s",
__get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
__entry->cb, __entry->need_restart ?
" (need restart)" : " (first try)"
)
);
#define DEFINE_NFSD_CB_LIFETIME_EVENT(name) \
DEFINE_EVENT(nfsd_cb_lifetime_class, nfsd_cb_##name, \
TP_PROTO( \
const struct nfs4_client *clp, \
const struct nfsd4_callback *cb \
), \
TP_ARGS(clp, cb))
DEFINE_NFSD_CB_LIFETIME_EVENT(queue);
DEFINE_NFSD_CB_LIFETIME_EVENT(destroy);
DEFINE_NFSD_CB_LIFETIME_EVENT(restart);
DEFINE_NFSD_CB_LIFETIME_EVENT(bc_update);
DEFINE_NFSD_CB_LIFETIME_EVENT(bc_shutdown);
TRACE_EVENT(nfsd_cb_seq_status,
TP_PROTO(
const struct rpc_task *task,
const struct nfsd4_callback *cb
),
TP_ARGS(task, cb),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, cl_boot)
__field(u32, cl_id)
__field(u32, seqno)
__field(u32, reserved)
__field(int, tk_status)
__field(int, seq_status)
),
TP_fast_assign(
const struct nfs4_client *clp = cb->cb_clp;
const struct nfsd4_session *session = clp->cl_cb_session;
const struct nfsd4_sessionid *sid =
(struct nfsd4_sessionid *)&session->se_sessionid;
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client ?
task->tk_client->cl_clid : -1;
__entry->cl_boot = sid->clientid.cl_boot;
__entry->cl_id = sid->clientid.cl_id;
__entry->seqno = sid->sequence;
__entry->reserved = sid->reserved;
__entry->tk_status = task->tk_status;
__entry->seq_status = cb->cb_seq_status;
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
" sessionid=%08x:%08x:%08x:%08x tk_status=%d seq_status=%d\n",
__entry->task_id, __entry->client_id,
__entry->cl_boot, __entry->cl_id,
__entry->seqno, __entry->reserved,
__entry->tk_status, __entry->seq_status
)
);
TRACE_EVENT(nfsd_cb_free_slot,
TP_PROTO(
const struct rpc_task *task,
const struct nfsd4_callback *cb
),
TP_ARGS(task, cb),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, cl_boot)
__field(u32, cl_id)
__field(u32, seqno)
__field(u32, reserved)
__field(u32, slot_seqno)
),
TP_fast_assign(
const struct nfs4_client *clp = cb->cb_clp;
const struct nfsd4_session *session = clp->cl_cb_session;
const struct nfsd4_sessionid *sid =
(struct nfsd4_sessionid *)&session->se_sessionid;
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client ?
task->tk_client->cl_clid : -1;
__entry->cl_boot = sid->clientid.cl_boot;
__entry->cl_id = sid->clientid.cl_id;
__entry->seqno = sid->sequence;
__entry->reserved = sid->reserved;
__entry->slot_seqno = session->se_cb_seq_nr;
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
" sessionid=%08x:%08x:%08x:%08x new slot seqno=%u\n",
__entry->task_id, __entry->client_id,
__entry->cl_boot, __entry->cl_id,
__entry->seqno, __entry->reserved,
__entry->slot_seqno
)
);
TRACE_EVENT_CONDITION(nfsd_cb_recall,
TP_PROTO(
const struct nfs4_stid *stid

View File

@ -476,7 +476,6 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
* @rqstp: controlling RPC transaction
* @fhp: filehandle of target
* @attr: attributes to set
* @check_guard: set to 1 if guardtime is a valid timestamp
* @guardtime: do not act if ctime.tv_sec does not match this timestamp
*
* This call may adjust the contents of @attr (in particular, this
@ -488,8 +487,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
*/
__be32
nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct nfsd_attrs *attr,
int check_guard, time64_t guardtime)
struct nfsd_attrs *attr, const struct timespec64 *guardtime)
{
struct dentry *dentry;
struct inode *inode;
@ -497,7 +495,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int accmode = NFSD_MAY_SATTR;
umode_t ftype = 0;
__be32 err;
int host_err;
int host_err = 0;
bool get_write_count;
bool size_change = (iap->ia_valid & ATTR_SIZE);
int retries;
@ -538,9 +536,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_sanitize_attrs(inode, iap);
if (check_guard && guardtime != inode_get_ctime_sec(inode))
return nfserr_notsync;
/*
* The size case is special, it changes the file in addition to the
* attributes, and file systems don't expect it to be mixed with
@ -555,6 +550,19 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
inode_lock(inode);
err = fh_fill_pre_attrs(fhp);
if (err)
goto out_unlock;
if (guardtime) {
struct timespec64 ctime = inode_get_ctime(inode);
if ((u32)guardtime->tv_sec != (u32)ctime.tv_sec ||
guardtime->tv_nsec != ctime.tv_nsec) {
err = nfserr_notsync;
goto out_fill_attrs;
}
}
for (retries = 1;;) {
struct iattr attrs;
@ -582,13 +590,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
dentry, ACL_TYPE_DEFAULT,
attr->na_dpacl);
out_fill_attrs:
/*
* RFC 1813 Section 3.3.2 does not mandate that an NFS server
* returns wcc_data for SETATTR. Some client implementations
* depend on receiving wcc_data, however, to sort out partial
* updates (eg., the client requested that size and mode be
* modified, but the server changed only the file mode).
*/
fh_fill_post_attrs(fhp);
out_unlock:
inode_unlock(inode);
if (size_change)
put_write_access(inode);
out:
if (!host_err)
host_err = commit_metadata(fhp);
return nfserrno(host_err);
return err != 0 ? err : nfserrno(host_err);
}
#if defined(CONFIG_NFSD_V4)
@ -1002,7 +1020,9 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned long *count, u32 *eof, ssize_t host_err)
{
if (host_err >= 0) {
nfsd_stats_io_read_add(fhp->fh_export, host_err);
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
nfsd_stats_io_read_add(nn, fhp->fh_export, host_err);
*eof = nfsd_eof_on_read(file, offset, host_err, *count);
*count = host_err;
fsnotify_access(file);
@ -1185,7 +1205,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
goto out_nfserr;
}
*cnt = host_err;
nfsd_stats_io_write_add(exp, *cnt);
nfsd_stats_io_write_add(nn, exp, *cnt);
fsnotify_modify(file);
host_err = filemap_check_wb_err(file->f_mapping, since);
if (host_err < 0)
@ -1404,7 +1424,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
* if the attributes have not changed.
*/
if (iap->ia_valid)
status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0);
status = nfsd_setattr(rqstp, resfhp, attrs, NULL);
else
status = nfserrno(commit_metadata(resfhp));
@ -1906,10 +1926,10 @@ out_unlock:
fh_drop_write(ffhp);
/*
* If the target dentry has cached open files, then we need to try to
* close them prior to doing the rename. Flushing delayed fput
* shouldn't be done with locks held however, so we delay it until this
* point and then reattempt the whole shebang.
* If the target dentry has cached open files, then we need to
* try to close them prior to doing the rename. Final fput
* shouldn't be done with locks held however, so we delay it
* until this point and then reattempt the whole shebang.
*/
if (close_cached) {
close_cached = false;
@ -2177,11 +2197,43 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
if (err == nfserr_eof || err == nfserr_toosmall)
err = nfs_ok; /* can still be found in ->err */
out_close:
fput(file);
nfsd_filp_close(file);
out:
return err;
}
/**
* nfsd_filp_close: close a file synchronously
* @fp: the file to close
*
* nfsd_filp_close() is similar in behaviour to filp_close().
* The difference is that if this is the final close on the
* file, the that finalisation happens immediately, rather then
* being handed over to a work_queue, as it the case for
* filp_close().
* When a user-space process closes a file (even when using
* filp_close() the finalisation happens before returning to
* userspace, so it is effectively synchronous. When a kernel thread
* uses file_close(), on the other hand, the handling is completely
* asynchronous. This means that any cost imposed by that finalisation
* is not imposed on the nfsd thread, and nfsd could potentually
* close files more quickly than the work queue finalises the close,
* which would lead to unbounded growth in the queue.
*
* In some contexts is it not safe to synchronously wait for
* close finalisation (see comment for __fput_sync()), but nfsd
* does not match those contexts. In partcilarly it does not, at the
* time that this function is called, hold and locks and no finalisation
* of any file, socket, or device driver would have any cause to wait
* for nfsd to make progress.
*/
void nfsd_filp_close(struct file *fp)
{
get_file(fp);
filp_close(fp, NULL);
__fput_sync(fp);
}
/*
* Get file system stats
* N.B. After this call fhp needs an fh_put

View File

@ -69,7 +69,7 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
const char *, unsigned int,
struct svc_export **, struct dentry **);
__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
struct nfsd_attrs *, int, time64_t);
struct nfsd_attrs *, const struct timespec64 *);
int nfsd_mountpoint(struct dentry *, struct svc_export *);
#ifdef CONFIG_NFSD_V4
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
@ -148,6 +148,8 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int);
void nfsd_filp_close(struct file *fp);
static inline int fh_want_write(struct svc_fh *fh)
{
int ret;

View File

@ -14,7 +14,7 @@ struct nfsd3_sattrargs {
struct svc_fh fh;
struct iattr attrs;
int check_guard;
time64_t guardtime;
struct timespec64 guardtime;
};
struct nfsd3_diropargs {

View File

@ -54,3 +54,21 @@
#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
/*
* 1: CB_GETATTR opcode (32-bit)
* N: file_handle
* 1: number of entry in attribute array (32-bit)
* 1: entry 0 in attribute array (32-bit)
*/
#define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \
cb_sequence_enc_sz + \
1 + enc_nfs4_fh_sz + 1 + 1)
/*
* 4: fattr_bitmap_maxsz
* 1: attribute array len
* 2: change attr (64-bit)
* 2: size (64-bit)
*/
#define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz)

View File

@ -339,7 +339,6 @@ struct svc_program {
const struct svc_version **pg_vers; /* version array */
char * pg_name; /* service name */
char * pg_class; /* class name: services sharing authentication */
struct svc_stat * pg_stats; /* rpc statistics */
enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp);
__be32 (*pg_init_request)(struct svc_rqst *,
const struct svc_program *,
@ -411,7 +410,9 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp,
void svc_rqst_release_pages(struct svc_rqst *rqstp);
void svc_rqst_free(struct svc_rqst *);
void svc_exit_thread(struct svc_rqst *);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
struct svc_serv * svc_create_pooled(struct svc_program *prog,
struct svc_stat *stats,
unsigned int bufsize,
int (*threadfn)(void *data));
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
int svc_pool_stats_open(struct svc_info *si, struct file *file);

View File

@ -203,6 +203,30 @@ struct svc_rdma_recv_ctxt {
struct page *rc_pages[RPCSVC_MAXPAGES];
};
/*
* State for sending a Write chunk.
* - Tracks progress of writing one chunk over all its segments
* - Stores arguments for the SGL constructor functions
*/
struct svc_rdma_write_info {
struct svcxprt_rdma *wi_rdma;
struct list_head wi_list;
const struct svc_rdma_chunk *wi_chunk;
/* write state of this chunk */
unsigned int wi_seg_off;
unsigned int wi_seg_no;
/* SGL constructor arguments */
const struct xdr_buf *wi_xdr;
unsigned char *wi_base;
unsigned int wi_next_off;
struct svc_rdma_chunk_ctxt wi_cc;
struct work_struct wi_work;
};
struct svc_rdma_send_ctxt {
struct llist_node sc_node;
struct rpc_rdma_cid sc_cid;
@ -210,9 +234,15 @@ struct svc_rdma_send_ctxt {
struct svcxprt_rdma *sc_rdma;
struct ib_send_wr sc_send_wr;
struct ib_send_wr *sc_wr_chain;
int sc_sqecount;
struct ib_cqe sc_cqe;
struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream;
struct list_head sc_write_info_list;
struct svc_rdma_write_info sc_reply_info;
void *sc_xprt_buf;
int sc_page_count;
int sc_cur_sge_no;
@ -236,18 +266,27 @@ extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
extern int svc_rdma_recvfrom(struct svc_rqst *);
/* svc_rdma_rw.c */
extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc);
extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc);
extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc,
enum dma_data_direction dir);
extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_chunk *chunk,
const struct xdr_buf *xdr);
extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_recv_ctxt *rctxt,
const struct xdr_buf *xdr);
extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt);
extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt);
extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
const struct svc_rdma_pcl *write_pcl,
struct svc_rdma_send_ctxt *sctxt,
const struct xdr_buf *xdr);
extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_pcl *write_pcl,
const struct svc_rdma_pcl *reply_pcl,
struct svc_rdma_send_ctxt *sctxt,
const struct xdr_buf *xdr);
extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *head);
@ -258,8 +297,8 @@ extern struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma);
extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt);
extern int svc_rdma_send(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt);
extern int svc_rdma_post_send(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt);
extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
const struct svc_rdma_pcl *write_pcl,

View File

@ -2118,6 +2118,10 @@ DEFINE_SIMPLE_CID_EVENT(svcrdma_wc_write);
DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_flush);
DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_err);
DEFINE_SIMPLE_CID_EVENT(svcrdma_wc_reply);
DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_reply_flush);
DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_reply_err);
TRACE_EVENT(svcrdma_qp_error,
TP_PROTO(
const struct ib_event *event,

View File

@ -385,3 +385,37 @@ TRACE_DEFINE_ENUM(IOMODE_ANY);
{ SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \
{ SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \
{ SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" })
TRACE_DEFINE_ENUM(OP_CB_GETATTR);
TRACE_DEFINE_ENUM(OP_CB_RECALL);
TRACE_DEFINE_ENUM(OP_CB_LAYOUTRECALL);
TRACE_DEFINE_ENUM(OP_CB_NOTIFY);
TRACE_DEFINE_ENUM(OP_CB_PUSH_DELEG);
TRACE_DEFINE_ENUM(OP_CB_RECALL_ANY);
TRACE_DEFINE_ENUM(OP_CB_RECALLABLE_OBJ_AVAIL);
TRACE_DEFINE_ENUM(OP_CB_RECALL_SLOT);
TRACE_DEFINE_ENUM(OP_CB_SEQUENCE);
TRACE_DEFINE_ENUM(OP_CB_WANTS_CANCELLED);
TRACE_DEFINE_ENUM(OP_CB_NOTIFY_LOCK);
TRACE_DEFINE_ENUM(OP_CB_NOTIFY_DEVICEID);
TRACE_DEFINE_ENUM(OP_CB_OFFLOAD);
TRACE_DEFINE_ENUM(OP_CB_ILLEGAL);
#define show_nfs4_cb_op(x) \
__print_symbolic(x, \
{ 0, "CB_NULL" }, \
{ 1, "CB_COMPOUND" }, \
{ OP_CB_GETATTR, "CB_GETATTR" }, \
{ OP_CB_RECALL, "CB_RECALL" }, \
{ OP_CB_LAYOUTRECALL, "CB_LAYOUTRECALL" }, \
{ OP_CB_NOTIFY, "CB_NOTIFY" }, \
{ OP_CB_PUSH_DELEG, "CB_PUSH_DELEG" }, \
{ OP_CB_RECALL_ANY, "CB_RECALL_ANY" }, \
{ OP_CB_RECALLABLE_OBJ_AVAIL, "CB_RECALLABLE_OBJ_AVAIL" }, \
{ OP_CB_RECALL_SLOT, "CB_RECALL_SLOT" }, \
{ OP_CB_SEQUENCE, "CB_SEQUENCE" }, \
{ OP_CB_WANTS_CANCELLED, "CB_WANTS_CANCELLED" }, \
{ OP_CB_NOTIFY_LOCK, "CB_NOTIFY_LOCK" }, \
{ OP_CB_NOTIFY_DEVICEID, "CB_NOTIFY_DEVICEID" }, \
{ OP_CB_OFFLOAD, "CB_OFFLOAD" }, \
{ OP_CB_ILLEGAL, "CB_ILLEGAL" })

View File

@ -921,6 +921,8 @@ out_err:
* Caller provides the truncation length of the output token (h) in
* cksumout.len.
*
* Note that for RPCSEC, the "initial cipher state" is always all zeroes.
*
* Return values:
* %GSS_S_COMPLETE: Digest computed, @cksumout filled in
* %GSS_S_FAILURE: Call failed
@ -931,22 +933,19 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
int body_offset, struct xdr_netobj *cksumout)
{
unsigned int ivsize = crypto_sync_skcipher_ivsize(cipher);
static const u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
struct ahash_request *req;
struct scatterlist sg[1];
u8 *iv, *checksumdata;
int err = -ENOMEM;
u8 *checksumdata;
checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL);
if (!checksumdata)
return GSS_S_FAILURE;
/* For RPCSEC, the "initial cipher state" is always all zeroes. */
iv = kzalloc(ivsize, GFP_KERNEL);
if (!iv)
goto out_free_mem;
req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req)
goto out_free_mem;
goto out_free_cksumdata;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
err = crypto_ahash_init(req);
if (err)
@ -970,8 +969,7 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
out_free_ahash:
ahash_request_free(req);
out_free_mem:
kfree(iv);
out_free_cksumdata:
kfree_sensitive(checksumdata);
return err ? GSS_S_FAILURE : GSS_S_COMPLETE;
}

View File

@ -398,6 +398,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
u64 seq_send64;
int keylen;
u32 time32;
int ret;
p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
if (IS_ERR(p))
@ -450,8 +451,16 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
}
ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
return gss_krb5_import_ctx_v2(ctx, gfp_mask);
ret = gss_krb5_import_ctx_v2(ctx, gfp_mask);
if (ret) {
p = ERR_PTR(ret);
goto out_free;
}
return 0;
out_free:
kfree(ctx->mech_used.data);
out_err:
return PTR_ERR(p);
}

View File

@ -250,8 +250,8 @@ static int gssx_dec_option_array(struct xdr_stream *xdr,
creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL);
if (!creds) {
kfree(oa->data);
return -ENOMEM;
err = -ENOMEM;
goto free_oa;
}
oa->data[0].option.data = CREDS_VALUE;
@ -265,29 +265,40 @@ static int gssx_dec_option_array(struct xdr_stream *xdr,
/* option buffer */
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -ENOSPC;
if (unlikely(p == NULL)) {
err = -ENOSPC;
goto free_creds;
}
length = be32_to_cpup(p);
p = xdr_inline_decode(xdr, length);
if (unlikely(p == NULL))
return -ENOSPC;
if (unlikely(p == NULL)) {
err = -ENOSPC;
goto free_creds;
}
if (length == sizeof(CREDS_VALUE) &&
memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
/* We have creds here. parse them */
err = gssx_dec_linux_creds(xdr, creds);
if (err)
return err;
goto free_creds;
oa->data[0].value.len = 1; /* presence */
} else {
/* consume uninteresting buffer */
err = gssx_dec_buffer(xdr, &dummy);
if (err)
return err;
goto free_creds;
}
}
return 0;
free_creds:
kfree(creds);
free_oa:
kfree(oa->data);
oa->data = NULL;
return err;
}
static int gssx_dec_status(struct xdr_stream *xdr,

View File

@ -314,7 +314,7 @@ EXPORT_SYMBOL_GPL(rpc_proc_unregister);
struct proc_dir_entry *
svc_proc_register(struct net *net, struct svc_stat *statp, const struct proc_ops *proc_ops)
{
return do_register(net, statp->program->pg_name, statp, proc_ops);
return do_register(net, statp->program->pg_name, net, proc_ops);
}
EXPORT_SYMBOL_GPL(svc_proc_register);

View File

@ -451,8 +451,8 @@ __svc_init_bc(struct svc_serv *serv)
* Create an RPC service
*/
static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
int (*threadfn)(void *data))
__svc_create(struct svc_program *prog, struct svc_stat *stats,
unsigned int bufsize, int npools, int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int vers;
@ -463,7 +463,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
return NULL;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
serv->sv_stats = prog->pg_stats;
serv->sv_stats = stats;
if (bufsize > RPCSVC_MAXPAYLOAD)
bufsize = RPCSVC_MAXPAYLOAD;
serv->sv_max_payload = bufsize? bufsize : 4096;
@ -529,26 +529,28 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
int (*threadfn)(void *data))
{
return __svc_create(prog, bufsize, 1, threadfn);
return __svc_create(prog, NULL, bufsize, 1, threadfn);
}
EXPORT_SYMBOL_GPL(svc_create);
/**
* svc_create_pooled - Create an RPC service with pooled threads
* @prog: the RPC program the new service will handle
* @stats: the stats struct if desired
* @bufsize: maximum message size for @prog
* @threadfn: a function to service RPC requests for @prog
*
* Returns an instantiated struct svc_serv object or NULL.
*/
struct svc_serv *svc_create_pooled(struct svc_program *prog,
struct svc_stat *stats,
unsigned int bufsize,
int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
serv = __svc_create(prog, bufsize, npools, threadfn);
serv = __svc_create(prog, stats, bufsize, npools, threadfn);
if (!serv)
goto out_err;
return serv;
@ -1375,7 +1377,8 @@ svc_process_common(struct svc_rqst *rqstp)
goto err_bad_proc;
/* Syntactic check complete */
serv->sv_stats->rpccnt++;
if (serv->sv_stats)
serv->sv_stats->rpccnt++;
trace_svc_process(rqstp, progp->pg_name);
aoffset = xdr_stream_pos(xdr);
@ -1427,7 +1430,8 @@ err_short_len:
goto close_xprt;
err_bad_rpc:
serv->sv_stats->rpcbadfmt++;
if (serv->sv_stats)
serv->sv_stats->rpcbadfmt++;
xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
xdr_stream_encode_u32(xdr, RPC_MISMATCH);
/* Only RPCv2 supported */
@ -1438,7 +1442,8 @@ err_bad_rpc:
err_bad_auth:
dprintk("svc: authentication failed (%d)\n",
be32_to_cpu(rqstp->rq_auth_stat));
serv->sv_stats->rpcbadauth++;
if (serv->sv_stats)
serv->sv_stats->rpcbadauth++;
/* Restore write pointer to location of reply status: */
xdr_truncate_encode(xdr, XDR_UNIT * 2);
xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
@ -1448,7 +1453,8 @@ err_bad_auth:
err_bad_prog:
dprintk("svc: unknown program %d\n", rqstp->rq_prog);
serv->sv_stats->rpcbadfmt++;
if (serv->sv_stats)
serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_prog_unavail;
goto sendit;
@ -1456,7 +1462,8 @@ err_bad_vers:
svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
rqstp->rq_vers, rqstp->rq_prog, progp->pg_name);
serv->sv_stats->rpcbadfmt++;
if (serv->sv_stats)
serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_prog_mismatch;
/*
@ -1470,19 +1477,22 @@ err_bad_vers:
err_bad_proc:
svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc);
serv->sv_stats->rpcbadfmt++;
if (serv->sv_stats)
serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_proc_unavail;
goto sendit;
err_garbage_args:
svc_printk(rqstp, "failed to decode RPC header\n");
serv->sv_stats->rpcbadfmt++;
if (serv->sv_stats)
serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_garbage_args;
goto sendit;
err_system_err:
serv->sv_stats->rpcbadfmt++;
if (serv->sv_stats)
serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_system_err;
goto sendit;
}
@ -1534,7 +1544,8 @@ void svc_process(struct svc_rqst *rqstp)
out_baddir:
svc_printk(rqstp, "bad direction 0x%08x, dropping request\n",
be32_to_cpu(*p));
rqstp->rq_server->sv_stats->rpcbadfmt++;
if (rqstp->rq_server->sv_stats)
rqstp->rq_server->sv_stats->rpcbadfmt++;
out_drop:
svc_drop(rqstp);
}
@ -1612,7 +1623,6 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
rpc_put_task(task);
}
EXPORT_SYMBOL_GPL(svc_process_bc);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/**

View File

@ -90,7 +90,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/
get_page(virt_to_page(rqst->rq_buffer));
sctxt->sc_send_wr.opcode = IB_WR_SEND;
return svc_rdma_send(rdma, sctxt);
return svc_rdma_post_send(rdma, sctxt);
}
/* Server-side transport endpoint wants a whole page for its send

View File

@ -197,28 +197,6 @@ void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
llist_add_batch(first, last, &rdma->sc_rw_ctxts);
}
/* State for sending a Write or Reply chunk.
* - Tracks progress of writing one chunk over all its segments
* - Stores arguments for the SGL constructor functions
*/
struct svc_rdma_write_info {
struct svcxprt_rdma *wi_rdma;
const struct svc_rdma_chunk *wi_chunk;
/* write state of this chunk */
unsigned int wi_seg_off;
unsigned int wi_seg_no;
/* SGL constructor arguments */
const struct xdr_buf *wi_xdr;
unsigned char *wi_base;
unsigned int wi_next_off;
struct svc_rdma_chunk_ctxt wi_cc;
struct work_struct wi_work;
};
static struct svc_rdma_write_info *
svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
const struct svc_rdma_chunk *chunk)
@ -252,6 +230,71 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
queue_work(svcrdma_wq, &info->wi_work);
}
/**
* svc_rdma_write_chunk_release - Release Write chunk I/O resources
* @rdma: controlling transport
* @ctxt: Send context that is being released
*/
void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt)
{
struct svc_rdma_write_info *info;
struct svc_rdma_chunk_ctxt *cc;
while (!list_empty(&ctxt->sc_write_info_list)) {
info = list_first_entry(&ctxt->sc_write_info_list,
struct svc_rdma_write_info, wi_list);
list_del(&info->wi_list);
cc = &info->wi_cc;
svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
svc_rdma_write_info_free(info);
}
}
/**
* svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
* @rdma: controlling transport
* @ctxt: Send context that is being released
*/
void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt)
{
struct svc_rdma_chunk_ctxt *cc = &ctxt->sc_reply_info.wi_cc;
if (!cc->cc_sqecount)
return;
svc_rdma_cc_release(rdma, cc, DMA_TO_DEVICE);
}
/**
* svc_rdma_reply_done - Reply chunk Write completion handler
* @cq: controlling Completion Queue
* @wc: Work Completion report
*
* Pages under I/O are released by a subsequent Send completion.
*/
static void svc_rdma_reply_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
struct svcxprt_rdma *rdma = cq->cq_context;
switch (wc->status) {
case IB_WC_SUCCESS:
trace_svcrdma_wc_reply(&cc->cc_cid);
return;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_reply_flush(wc, &cc->cc_cid);
break;
default:
trace_svcrdma_wc_reply_err(wc, &cc->cc_cid);
}
svc_xprt_deferred_close(&rdma->sc_xprt);
}
/**
* svc_rdma_write_done - Write chunk completion
* @cq: controlling Completion Queue
@ -265,13 +308,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
struct svc_rdma_write_info *info =
container_of(cc, struct svc_rdma_write_info, wi_cc);
switch (wc->status) {
case IB_WC_SUCCESS:
trace_svcrdma_wc_write(&cc->cc_cid);
break;
return;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
break;
@ -279,12 +320,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_write_err(wc, &cc->cc_cid);
}
svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
if (unlikely(wc->status != IB_WC_SUCCESS))
svc_xprt_deferred_close(&rdma->sc_xprt);
svc_rdma_write_info_free(info);
/* The RDMA Write has flushed, so the client won't get
* some of the outgoing RPC message. Signal the loss
* to the client by closing the connection.
*/
svc_xprt_deferred_close(&rdma->sc_xprt);
}
/**
@ -580,41 +620,54 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
return xdr->len;
}
/**
* svc_rdma_send_write_chunk - Write all segments in a Write chunk
* @rdma: controlling RDMA transport
* @chunk: Write chunk provided by the client
* @xdr: xdr_buf containing the data payload
*
* Returns a non-negative number of bytes the chunk consumed, or
* %-E2BIG if the payload was larger than the Write chunk,
* %-EINVAL if client provided too many segments,
* %-ENOMEM if rdma_rw context pool was exhausted,
* %-ENOTCONN if posting failed (connection is lost),
* %-EIO if rdma_rw initialization failed (DMA mapping, etc).
/* Link Write WRs for @chunk onto @sctxt's WR chain.
*/
int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_chunk *chunk,
const struct xdr_buf *xdr)
static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
const struct svc_rdma_chunk *chunk,
const struct xdr_buf *xdr)
{
struct svc_rdma_write_info *info;
struct svc_rdma_chunk_ctxt *cc;
struct ib_send_wr *first_wr;
struct xdr_buf payload;
struct list_head *pos;
struct ib_cqe *cqe;
int ret;
if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position,
chunk->ch_payload_length))
return -EMSGSIZE;
info = svc_rdma_write_info_alloc(rdma, chunk);
if (!info)
return -ENOMEM;
cc = &info->wi_cc;
ret = svc_rdma_xb_write(xdr, info);
if (ret != xdr->len)
ret = svc_rdma_xb_write(&payload, info);
if (ret != payload.len)
goto out_err;
trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
ret = svc_rdma_post_chunk_ctxt(rdma, cc);
if (ret < 0)
ret = -EINVAL;
if (unlikely(cc->cc_sqecount > rdma->sc_sq_depth))
goto out_err;
return xdr->len;
first_wr = sctxt->sc_wr_chain;
cqe = &cc->cc_cqe;
list_for_each(pos, &cc->cc_rwctxts) {
struct svc_rdma_rw_ctxt *rwc;
rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, cqe, first_wr);
cqe = NULL;
}
sctxt->sc_wr_chain = first_wr;
sctxt->sc_sqecount += cc->cc_sqecount;
list_add(&info->wi_list, &sctxt->sc_write_info_list);
trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
return 0;
out_err:
svc_rdma_write_info_free(info);
@ -622,9 +675,39 @@ out_err:
}
/**
* svc_rdma_send_reply_chunk - Write all segments in the Reply chunk
* svc_rdma_prepare_write_list - Construct WR chain for sending Write list
* @rdma: controlling RDMA transport
* @rctxt: Write and Reply chunks from client
* @write_pcl: Write list provisioned by the client
* @sctxt: Send WR resources
* @xdr: xdr_buf containing an RPC Reply message
*
* Returns zero on success, or a negative errno if one or more
* Write chunks could not be sent.
*/
int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
const struct svc_rdma_pcl *write_pcl,
struct svc_rdma_send_ctxt *sctxt,
const struct xdr_buf *xdr)
{
struct svc_rdma_chunk *chunk;
int ret;
pcl_for_each_chunk(chunk, write_pcl) {
if (!chunk->ch_payload_length)
break;
ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr);
if (ret < 0)
return ret;
}
return 0;
}
/**
* svc_rdma_prepare_reply_chunk - Construct WR chain for writing the Reply chunk
* @rdma: controlling RDMA transport
* @write_pcl: Write chunk list provided by client
* @reply_pcl: Reply chunk provided by client
* @sctxt: Send WR resources
* @xdr: xdr_buf containing an RPC Reply
*
* Returns a non-negative number of bytes the chunk consumed, or
@ -634,39 +717,45 @@ out_err:
* %-ENOTCONN if posting failed (connection is lost),
* %-EIO if rdma_rw initialization failed (DMA mapping, etc).
*/
int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_recv_ctxt *rctxt,
const struct xdr_buf *xdr)
int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_pcl *write_pcl,
const struct svc_rdma_pcl *reply_pcl,
struct svc_rdma_send_ctxt *sctxt,
const struct xdr_buf *xdr)
{
struct svc_rdma_write_info *info;
struct svc_rdma_chunk_ctxt *cc;
struct svc_rdma_chunk *chunk;
struct svc_rdma_write_info *info = &sctxt->sc_reply_info;
struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
struct ib_send_wr *first_wr;
struct list_head *pos;
struct ib_cqe *cqe;
int ret;
if (pcl_is_empty(&rctxt->rc_reply_pcl))
return 0;
info->wi_rdma = rdma;
info->wi_chunk = pcl_first_chunk(reply_pcl);
info->wi_seg_off = 0;
info->wi_seg_no = 0;
info->wi_cc.cc_cqe.done = svc_rdma_reply_done;
chunk = pcl_first_chunk(&rctxt->rc_reply_pcl);
info = svc_rdma_write_info_alloc(rdma, chunk);
if (!info)
return -ENOMEM;
cc = &info->wi_cc;
ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
ret = pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_write, info);
if (ret < 0)
goto out_err;
return ret;
first_wr = sctxt->sc_wr_chain;
cqe = &cc->cc_cqe;
list_for_each(pos, &cc->cc_rwctxts) {
struct svc_rdma_rw_ctxt *rwc;
rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, cqe, first_wr);
cqe = NULL;
}
sctxt->sc_wr_chain = first_wr;
sctxt->sc_sqecount += cc->cc_sqecount;
trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
ret = svc_rdma_post_chunk_ctxt(rdma, cc);
if (ret < 0)
goto out_err;
return xdr->len;
out_err:
svc_rdma_write_info_free(info);
return ret;
}
/**

View File

@ -142,6 +142,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
ctxt->sc_cqe.done = svc_rdma_wc_send;
INIT_LIST_HEAD(&ctxt->sc_write_info_list);
ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
rdma->sc_max_req_size);
@ -205,9 +206,13 @@ out:
xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf,
ctxt->sc_xprt_buf, NULL);
svc_rdma_cc_init(rdma, &ctxt->sc_reply_info.wi_cc);
ctxt->sc_send_wr.num_sge = 0;
ctxt->sc_cur_sge_no = 0;
ctxt->sc_page_count = 0;
ctxt->sc_wr_chain = &ctxt->sc_send_wr;
ctxt->sc_sqecount = 1;
return ctxt;
out_empty:
@ -223,6 +228,9 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i;
svc_rdma_write_chunk_release(rdma, ctxt);
svc_rdma_reply_chunk_release(rdma, ctxt);
if (ctxt->sc_page_count)
release_pages(ctxt->sc_pages, ctxt->sc_page_count);
@ -293,7 +301,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_send_ctxt *ctxt =
container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
svc_rdma_wake_send_waiters(rdma, 1);
svc_rdma_wake_send_waiters(rdma, ctxt->sc_sqecount);
if (unlikely(wc->status != IB_WC_SUCCESS))
goto flushed;
@ -312,51 +320,76 @@ flushed:
}
/**
* svc_rdma_send - Post a single Send WR
* @rdma: transport on which to post the WR
* @ctxt: send ctxt with a Send WR ready to post
* svc_rdma_post_send - Post a WR chain to the Send Queue
* @rdma: transport context
* @ctxt: WR chain to post
*
* Returns zero if the Send WR was posted successfully. Otherwise, a
* negative errno is returned.
* Copy fields in @ctxt to stack variables in order to guarantee
* that these values remain available after the ib_post_send() call.
* In some error flow cases, svc_rdma_wc_send() releases @ctxt.
*
* Note there is potential for starvation when the Send Queue is
* full because there is no order to when waiting threads are
* awoken. The transport is typically provisioned with a deep
* enough Send Queue that SQ exhaustion should be a rare event.
*
* Return values:
* %0: @ctxt's WR chain was posted successfully
* %-ENOTCONN: The connection was lost
*/
int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
int svc_rdma_post_send(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt)
{
struct ib_send_wr *wr = &ctxt->sc_send_wr;
int ret;
struct ib_send_wr *first_wr = ctxt->sc_wr_chain;
struct ib_send_wr *send_wr = &ctxt->sc_send_wr;
const struct ib_send_wr *bad_wr = first_wr;
struct rpc_rdma_cid cid = ctxt->sc_cid;
int ret, sqecount = ctxt->sc_sqecount;
might_sleep();
/* Sync the transport header buffer */
ib_dma_sync_single_for_device(rdma->sc_pd->device,
wr->sg_list[0].addr,
wr->sg_list[0].length,
send_wr->sg_list[0].addr,
send_wr->sg_list[0].length,
DMA_TO_DEVICE);
/* If the SQ is full, wait until an SQ entry is available */
while (1) {
if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
while (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) {
if (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
svc_rdma_wake_send_waiters(rdma, sqecount);
/* When the transport is torn down, assume
* ib_drain_sq() will trigger enough Send
* completions to wake us. The XPT_CLOSE test
* above should then cause the while loop to
* exit.
*/
percpu_counter_inc(&svcrdma_stat_sq_starve);
trace_svcrdma_sq_full(rdma, &ctxt->sc_cid);
atomic_inc(&rdma->sc_sq_avail);
trace_svcrdma_sq_full(rdma, &cid);
wait_event(rdma->sc_send_wait,
atomic_read(&rdma->sc_sq_avail) > 1);
if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
return -ENOTCONN;
trace_svcrdma_sq_retry(rdma, &ctxt->sc_cid);
atomic_read(&rdma->sc_sq_avail) > 0);
trace_svcrdma_sq_retry(rdma, &cid);
continue;
}
trace_svcrdma_post_send(ctxt);
ret = ib_post_send(rdma->sc_qp, wr, NULL);
if (ret)
break;
ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
if (ret) {
trace_svcrdma_sq_post_err(rdma, &cid, ret);
svc_xprt_deferred_close(&rdma->sc_xprt);
/* If even one WR was posted, there will be a
* Send completion that bumps sc_sq_avail.
*/
if (bad_wr == first_wr) {
svc_rdma_wake_send_waiters(rdma, sqecount);
break;
}
}
return 0;
}
trace_svcrdma_sq_post_err(rdma, &ctxt->sc_cid, ret);
svc_xprt_deferred_close(&rdma->sc_xprt);
wake_up(&rdma->sc_send_wait);
return ret;
return -ENOTCONN;
}
/**
@ -839,16 +872,10 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
* in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
*
* Depending on whether a Write list or Reply chunk is present,
* the server may send all, a portion of, or none of the xdr_buf.
* the server may Send all, a portion of, or none of the xdr_buf.
* In the latter case, only the transport header (sc_sges[0]) is
* transmitted.
*
* RDMA Send is the last step of transmitting an RPC reply. Pages
* involved in the earlier RDMA Writes are here transferred out
* of the rqstp and into the sctxt's page array. These pages are
* DMA unmapped by each Write completion, but the subsequent Send
* completion finally releases these pages.
*
* Assumptions:
* - The Reply's transport header will never be larger than a page.
*/
@ -857,6 +884,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
const struct svc_rdma_recv_ctxt *rctxt,
struct svc_rqst *rqstp)
{
struct ib_send_wr *send_wr = &sctxt->sc_send_wr;
int ret;
ret = svc_rdma_map_reply_msg(rdma, sctxt, &rctxt->rc_write_pcl,
@ -864,16 +892,19 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
if (ret < 0)
return ret;
/* Transfer pages involved in RDMA Writes to the sctxt's
* page array. Completion handling releases these pages.
*/
svc_rdma_save_io_pages(rqstp, sctxt);
if (rctxt->rc_inv_rkey) {
sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
send_wr->opcode = IB_WR_SEND_WITH_INV;
send_wr->ex.invalidate_rkey = rctxt->rc_inv_rkey;
} else {
sctxt->sc_send_wr.opcode = IB_WR_SEND;
send_wr->opcode = IB_WR_SEND;
}
return svc_rdma_send(rdma, sctxt);
return svc_rdma_post_send(rdma, sctxt);
}
/**
@ -937,7 +968,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
sctxt->sc_send_wr.num_sge = 1;
sctxt->sc_send_wr.opcode = IB_WR_SEND;
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
if (svc_rdma_send(rdma, sctxt))
if (svc_rdma_post_send(rdma, sctxt))
goto put_ctxt;
return;
@ -984,10 +1015,20 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (!p)
goto put_ctxt;
ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res);
ret = svc_rdma_prepare_write_list(rdma, &rctxt->rc_write_pcl, sctxt,
&rqstp->rq_res);
if (ret < 0)
goto reply_chunk;
rc_size = ret;
goto put_ctxt;
rc_size = 0;
if (!pcl_is_empty(&rctxt->rc_reply_pcl)) {
ret = svc_rdma_prepare_reply_chunk(rdma, &rctxt->rc_write_pcl,
&rctxt->rc_reply_pcl, sctxt,
&rqstp->rq_res);
if (ret < 0)
goto reply_chunk;
rc_size = ret;
}
*p++ = *rdma_argp;
*p++ = *(rdma_argp + 1);
@ -1030,45 +1071,33 @@ drop_connection:
/**
* svc_rdma_result_payload - special processing for a result payload
* @rqstp: svc_rqst to operate on
* @offset: payload's byte offset in @xdr
* @rqstp: RPC transaction context
* @offset: payload's byte offset in @rqstp->rq_res
* @length: size of payload, in bytes
*
* Assign the passed-in result payload to the current Write chunk,
* and advance to cur_result_payload to the next Write chunk, if
* there is one.
*
* Return values:
* %0 if successful or nothing needed to be done
* %-EMSGSIZE on XDR buffer overflow
* %-E2BIG if the payload was larger than the Write chunk
* %-EINVAL if client provided too many segments
* %-ENOMEM if rdma_rw context pool was exhausted
* %-ENOTCONN if posting failed (connection is lost)
* %-EIO if rdma_rw initialization failed (DMA mapping, etc)
*/
int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
unsigned int length)
{
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
struct svc_rdma_chunk *chunk;
struct svcxprt_rdma *rdma;
struct xdr_buf subbuf;
int ret;
chunk = rctxt->rc_cur_result_payload;
if (!length || !chunk)
return 0;
rctxt->rc_cur_result_payload =
pcl_next_chunk(&rctxt->rc_write_pcl, chunk);
if (length > chunk->ch_length)
return -E2BIG;
chunk->ch_position = offset;
chunk->ch_payload_length = length;
if (xdr_buf_subsegment(&rqstp->rq_res, &subbuf, offset, length))
return -EMSGSIZE;
rdma = container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt);
ret = svc_rdma_send_write_chunk(rdma, chunk, &subbuf);
if (ret < 0)
return ret;
return 0;
}

View File

@ -415,15 +415,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests +
newxprt->sc_recv_batch;
newxprt->sc_recv_batch + 1 /* drain */;
if (rq_depth > dev->attrs.max_qp_wr) {
rq_depth = dev->attrs.max_qp_wr;
newxprt->sc_recv_batch = 1;
newxprt->sc_max_requests = rq_depth - 2;
newxprt->sc_max_bc_requests = 2;
}
ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
ctxts *= newxprt->sc_max_requests;
/* Arbitrarily estimate the number of rw_ctxs needed for
* this transport. This is enough rw_ctxs to make forward
* progress even if the client is using one rkey per page
* in each Read chunk.
*/
ctxts = 3 * RPCSVC_MAXPAGES;
newxprt->sc_sq_depth = rq_depth + ctxts;
if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
@ -460,12 +465,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr);
dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n",
qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge);
dprintk(" send CQ depth = %u, recv CQ depth = %u\n",
newxprt->sc_sq_depth, rq_depth);
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) {
trace_svcrdma_qp_err(newxprt, ret);
goto errout;
}
newxprt->sc_max_send_sges = qp_attr.cap.max_send_sge;
newxprt->sc_qp = newxprt->sc_cm_id->qp;
if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))

View File

@ -2987,20 +2987,11 @@ static int bc_send_request(struct rpc_rqst *req)
return len;
}
/*
* The close routine. Since this is client initiated, we do nothing
*/
static void bc_close(struct rpc_xprt *xprt)
{
xprt_disconnect_done(xprt);
}
/*
* The xprt destroy routine. Again, because this connection is client
* initiated, we do nothing
*/
static void bc_destroy(struct rpc_xprt *xprt)
{
dprintk("RPC: bc_destroy xprt %p\n", xprt);