linux-stable/fs/nfsd/nfs4callback.c

1466 lines
36 KiB
C
Raw Normal View History

/*
* Copyright (c) 2001 The Regents of the University of Michigan.
* All rights reserved.
*
* Kendrick Smith <kmsmith@umich.edu>
* Andy Adamson <andros@umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/svc_xprt.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/slab.h>
#include "nfsd.h"
#include "state.h"
#include "netns.h"
#include "trace.h"
#include "xdr4cb.h"
#include "xdr4.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason);
#define NFSPROC4_CB_NULL 0
#define NFSPROC4_CB_COMPOUND 1
/* Index of predefined Linux callback client operations */
struct nfs4_cb_compound_hdr {
/* args */
u32 ident; /* minorversion 0 only */
u32 nops;
__be32 *nops_p;
u32 minorversion;
/* res */
int status;
};
static __be32 *xdr_encode_empty_array(__be32 *p)
{
*p++ = xdr_zero;
return p;
}
/*
* Encode/decode NFSv4 CB basic data types
*
* Basic NFSv4 callback data types are defined in section 15 of RFC
* 3530: "Network File System (NFS) version 4 Protocol" and section
* 20 of RFC 5661: "Network File System (NFS) Version 4 Minor Version
* 1 Protocol"
*/
static void encode_uint32(struct xdr_stream *xdr, u32 n)
{
WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0);
}
static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap,
size_t len)
{
WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0);
}
/*
* nfs_cb_opnum4
*
* enum nfs_cb_opnum4 {
* OP_CB_GETATTR = 3,
* ...
* };
*/
enum nfs_cb_opnum4 {
OP_CB_GETATTR = 3,
OP_CB_RECALL = 4,
OP_CB_LAYOUTRECALL = 5,
OP_CB_NOTIFY = 6,
OP_CB_PUSH_DELEG = 7,
OP_CB_RECALL_ANY = 8,
OP_CB_RECALLABLE_OBJ_AVAIL = 9,
OP_CB_RECALL_SLOT = 10,
OP_CB_SEQUENCE = 11,
OP_CB_WANTS_CANCELLED = 12,
OP_CB_NOTIFY_LOCK = 13,
OP_CB_NOTIFY_DEVICEID = 14,
OP_CB_OFFLOAD = 15,
OP_CB_ILLEGAL = 10044
};
static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
{
__be32 *p;
p = xdr_reserve_space(xdr, 4);
*p = cpu_to_be32(op);
}
/*
* nfs_fh4
*
* typedef opaque nfs_fh4<NFS4_FHSIZE>;
*/
static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh)
{
u32 length = fh->fh_size;
__be32 *p;
BUG_ON(length > NFS4_FHSIZE);
p = xdr_reserve_space(xdr, 4 + length);
xdr_encode_opaque(p, &fh->fh_raw, length);
}
/*
* stateid4
*
* struct stateid4 {
* uint32_t seqid;
* opaque other[12];
* };
*/
static void encode_stateid4(struct xdr_stream *xdr, const stateid_t *sid)
{
__be32 *p;
p = xdr_reserve_space(xdr, NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(sid->si_generation);
xdr_encode_opaque_fixed(p, &sid->si_opaque, NFS4_STATEID_OTHER_SIZE);
}
/*
* sessionid4
*
* typedef opaque sessionid4[NFS4_SESSIONID_SIZE];
*/
static void encode_sessionid4(struct xdr_stream *xdr,
const struct nfsd4_session *session)
{
__be32 *p;
p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN);
xdr_encode_opaque_fixed(p, session->se_sessionid.data,
NFS4_MAX_SESSIONID_LEN);
}
/*
* nfsstat4
*/
static const struct {
int stat;
int errno;
} nfs_cb_errtbl[] = {
{ NFS4_OK, 0 },
{ NFS4ERR_PERM, -EPERM },
{ NFS4ERR_NOENT, -ENOENT },
{ NFS4ERR_IO, -EIO },
{ NFS4ERR_NXIO, -ENXIO },
{ NFS4ERR_ACCESS, -EACCES },
{ NFS4ERR_EXIST, -EEXIST },
{ NFS4ERR_XDEV, -EXDEV },
{ NFS4ERR_NOTDIR, -ENOTDIR },
{ NFS4ERR_ISDIR, -EISDIR },
{ NFS4ERR_INVAL, -EINVAL },
{ NFS4ERR_FBIG, -EFBIG },
{ NFS4ERR_NOSPC, -ENOSPC },
{ NFS4ERR_ROFS, -EROFS },
{ NFS4ERR_MLINK, -EMLINK },
{ NFS4ERR_NAMETOOLONG, -ENAMETOOLONG },
{ NFS4ERR_NOTEMPTY, -ENOTEMPTY },
{ NFS4ERR_DQUOT, -EDQUOT },
{ NFS4ERR_STALE, -ESTALE },
{ NFS4ERR_BADHANDLE, -EBADHANDLE },
{ NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
{ NFS4ERR_NOTSUPP, -ENOTSUPP },
{ NFS4ERR_TOOSMALL, -ETOOSMALL },
{ NFS4ERR_SERVERFAULT, -ESERVERFAULT },
{ NFS4ERR_BADTYPE, -EBADTYPE },
{ NFS4ERR_LOCKED, -EAGAIN },
{ NFS4ERR_RESOURCE, -EREMOTEIO },
{ NFS4ERR_SYMLINK, -ELOOP },
{ NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
{ NFS4ERR_DEADLOCK, -EDEADLK },
{ -1, -EIO }
};
/*
* If we cannot translate the error, the recovery routines should
* handle it.
*
* Note: remaining NFSv4 error codes have values > 10000, so should
* not conflict with native Linux error codes.
*/
static int nfs_cb_stat_to_errno(int status)
{
int i;
for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
if (nfs_cb_errtbl[i].stat == status)
return nfs_cb_errtbl[i].errno;
}
dprintk("NFSD: Unrecognized NFS CB status value: %u\n", status);
return -status;
}
static int decode_cb_op_status(struct xdr_stream *xdr,
enum nfs_cb_opnum4 expected, int *status)
{
__be32 *p;
u32 op;
p = xdr_inline_decode(xdr, 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
op = be32_to_cpup(p++);
if (unlikely(op != expected))
goto out_unexpected;
*status = nfs_cb_stat_to_errno(be32_to_cpup(p));
return 0;
out_overflow:
return -EIO;
out_unexpected:
dprintk("NFSD: Callback server returned operation %d but "
"we issued a request for %d\n", op, expected);
return -EIO;
}
/*
* CB_COMPOUND4args
*
* struct CB_COMPOUND4args {
* utf8str_cs tag;
* uint32_t minorversion;
* uint32_t callback_ident;
* nfs_cb_argop4 argarray<>;
* };
*/
static void encode_cb_compound4args(struct xdr_stream *xdr,
struct nfs4_cb_compound_hdr *hdr)
{
__be32 * p;
p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4);
p = xdr_encode_empty_array(p); /* empty tag */
*p++ = cpu_to_be32(hdr->minorversion);
*p++ = cpu_to_be32(hdr->ident);
hdr->nops_p = p;
*p = cpu_to_be32(hdr->nops); /* argarray element count */
}
/*
* Update argarray element count
*/
static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr)
{
BUG_ON(hdr->nops > NFS4_MAX_BACK_CHANNEL_OPS);
*hdr->nops_p = cpu_to_be32(hdr->nops);
}
/*
* CB_COMPOUND4res
*
* struct CB_COMPOUND4res {
* nfsstat4 status;
* utf8str_cs tag;
* nfs_cb_resop4 resarray<>;
* };
*/
static int decode_cb_compound4res(struct xdr_stream *xdr,
struct nfs4_cb_compound_hdr *hdr)
{
u32 length;
__be32 *p;
p = xdr_inline_decode(xdr, 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
hdr->status = be32_to_cpup(p++);
/* Ignore the tag */
length = be32_to_cpup(p++);
p = xdr_inline_decode(xdr, length + 4);
if (unlikely(p == NULL))
goto out_overflow;
p += XDR_QUADLEN(length);
hdr->nops = be32_to_cpup(p);
return 0;
out_overflow:
return -EIO;
}
/*
* CB_RECALL4args
*
* struct CB_RECALL4args {
* stateid4 stateid;
* bool truncate;
* nfs_fh4 fh;
* };
*/
static void encode_cb_recall4args(struct xdr_stream *xdr,
const struct nfs4_delegation *dp,
struct nfs4_cb_compound_hdr *hdr)
{
__be32 *p;
encode_nfs_cb_opnum4(xdr, OP_CB_RECALL);
encode_stateid4(xdr, &dp->dl_stid.sc_stateid);
p = xdr_reserve_space(xdr, 4);
*p++ = xdr_zero; /* truncate */
encode_nfs_fh4(xdr, &dp->dl_stid.sc_file->fi_fhandle);
hdr->nops++;
}
/*
* CB_RECALLANY4args
*
* struct CB_RECALLANY4args {
* uint32_t craa_objects_to_keep;
* bitmap4 craa_type_mask;
* };
*/
static void
encode_cb_recallany4args(struct xdr_stream *xdr,
struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
{
encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
encode_uint32(xdr, ra->ra_keep);
encode_bitmap4(xdr, ra->ra_bmval, ARRAY_SIZE(ra->ra_bmval));
hdr->nops++;
}
/*
* CB_SEQUENCE4args
*
* struct CB_SEQUENCE4args {
* sessionid4 csa_sessionid;
* sequenceid4 csa_sequenceid;
* slotid4 csa_slotid;
* slotid4 csa_highest_slotid;
* bool csa_cachethis;
* referring_call_list4 csa_referring_call_lists<>;
* };
*/
static void encode_cb_sequence4args(struct xdr_stream *xdr,
const struct nfsd4_callback *cb,
struct nfs4_cb_compound_hdr *hdr)
{
struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
__be32 *p;
if (hdr->minorversion == 0)
return;
encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE);
encode_sessionid4(xdr, session);
p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4);
*p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */
*p++ = xdr_zero; /* csa_slotid */
*p++ = xdr_zero; /* csa_highest_slotid */
*p++ = xdr_zero; /* csa_cachethis */
xdr_encode_empty_array(p); /* csa_referring_call_lists */
hdr->nops++;
}
/*
* CB_SEQUENCE4resok
*
* struct CB_SEQUENCE4resok {
* sessionid4 csr_sessionid;
* sequenceid4 csr_sequenceid;
* slotid4 csr_slotid;
* slotid4 csr_highest_slotid;
* slotid4 csr_target_highest_slotid;
* };
*
* union CB_SEQUENCE4res switch (nfsstat4 csr_status) {
* case NFS4_OK:
* CB_SEQUENCE4resok csr_resok4;
* default:
* void;
* };
*
* Our current back channel implmentation supports a single backchannel
* with a single slot.
*/
static int decode_cb_sequence4resok(struct xdr_stream *xdr,
struct nfsd4_callback *cb)
{
struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
int status = -ESERVERFAULT;
__be32 *p;
u32 dummy;
/*
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
*/
p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
if (memcmp(p, session->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
dprintk("NFS: %s Invalid session id\n", __func__);
goto out;
}
p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
dummy = be32_to_cpup(p++);
if (dummy != session->se_cb_seq_nr) {
dprintk("NFS: %s Invalid sequence number\n", __func__);
goto out;
}
dummy = be32_to_cpup(p++);
if (dummy != 0) {
dprintk("NFS: %s Invalid slotid\n", __func__);
goto out;
}
/*
* FIXME: process highest slotid and target highest slotid
*/
status = 0;
out:
cb->cb_seq_status = status;
return status;
out_overflow:
status = -EIO;
goto out;
}
static int decode_cb_sequence4res(struct xdr_stream *xdr,
struct nfsd4_callback *cb)
{
int status;
if (cb->cb_clp->cl_minorversion == 0)
return 0;
status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_seq_status);
if (unlikely(status || cb->cb_seq_status))
return status;
return decode_cb_sequence4resok(xdr, cb);
}
/*
* NFSv4.0 and NFSv4.1 XDR encode functions
*
* NFSv4.0 callback argument types are defined in section 15 of RFC
* 3530: "Network File System (NFS) version 4 Protocol" and section 20
* of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1
* Protocol".
*/
/*
* NB: Without this zero space reservation, callbacks over krb5p fail
*/
static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
const void *__unused)
{
xdr_reserve_space(xdr, 0);
}
/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
const void *data)
{
const struct nfsd4_callback *cb = data;
const struct nfs4_delegation *dp = cb_to_delegation(cb);
struct nfs4_cb_compound_hdr hdr = {
.ident = cb->cb_clp->cl_cb_ident,
.minorversion = cb->cb_clp->cl_minorversion,
};
encode_cb_compound4args(xdr, &hdr);
encode_cb_sequence4args(xdr, cb, &hdr);
encode_cb_recall4args(xdr, dp, &hdr);
encode_cb_nops(&hdr);
}
/*
* 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
*/
static void
nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
struct xdr_stream *xdr, const void *data)
{
const struct nfsd4_callback *cb = data;
struct nfsd4_cb_recall_any *ra;
struct nfs4_cb_compound_hdr hdr = {
.ident = cb->cb_clp->cl_cb_ident,
.minorversion = cb->cb_clp->cl_minorversion,
};
ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
encode_cb_compound4args(xdr, &hdr);
encode_cb_sequence4args(xdr, cb, &hdr);
encode_cb_recallany4args(xdr, &hdr, ra);
encode_cb_nops(&hdr);
}
/*
* NFSv4.0 and NFSv4.1 XDR decode functions
*
* NFSv4.0 callback result types are defined in section 15 of RFC
* 3530: "Network File System (NFS) version 4 Protocol" and section 20
* of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1
* Protocol".
*/
static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
void *__unused)
{
return 0;
}
/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
void *data)
{
struct nfsd4_callback *cb = data;
struct nfs4_cb_compound_hdr hdr;
int status;
status = decode_cb_compound4res(xdr, &hdr);
if (unlikely(status))
return status;
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
}
/*
* 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
*/
static int
nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
void *data)
{
struct nfsd4_callback *cb = data;
struct nfs4_cb_compound_hdr hdr;
int status;
status = decode_cb_compound4res(xdr, &hdr);
if (unlikely(status))
return status;
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
return status;
}
#ifdef CONFIG_NFSD_PNFS
/*
* CB_LAYOUTRECALL4args
*
* struct layoutrecall_file4 {
* nfs_fh4 lor_fh;
* offset4 lor_offset;
* length4 lor_length;
* stateid4 lor_stateid;
* };
*
* union layoutrecall4 switch(layoutrecall_type4 lor_recalltype) {
* case LAYOUTRECALL4_FILE:
* layoutrecall_file4 lor_layout;
* case LAYOUTRECALL4_FSID:
* fsid4 lor_fsid;
* case LAYOUTRECALL4_ALL:
* void;
* };
*
* struct CB_LAYOUTRECALL4args {
* layouttype4 clora_type;
* layoutiomode4 clora_iomode;
* bool clora_changed;
* layoutrecall4 clora_recall;
* };
*/
static void encode_cb_layout4args(struct xdr_stream *xdr,
const struct nfs4_layout_stateid *ls,
struct nfs4_cb_compound_hdr *hdr)
{
__be32 *p;
BUG_ON(hdr->minorversion == 0);
p = xdr_reserve_space(xdr, 5 * 4);
*p++ = cpu_to_be32(OP_CB_LAYOUTRECALL);
*p++ = cpu_to_be32(ls->ls_layout_type);
*p++ = cpu_to_be32(IOMODE_ANY);
*p++ = cpu_to_be32(1);
*p = cpu_to_be32(RETURN_FILE);
encode_nfs_fh4(xdr, &ls->ls_stid.sc_file->fi_fhandle);
p = xdr_reserve_space(xdr, 2 * 8);
p = xdr_encode_hyper(p, 0);
xdr_encode_hyper(p, NFS4_MAX_UINT64);
encode_stateid4(xdr, &ls->ls_recall_sid);
hdr->nops++;
}
static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req,
struct xdr_stream *xdr,
const void *data)
{
const struct nfsd4_callback *cb = data;
const struct nfs4_layout_stateid *ls =
container_of(cb, struct nfs4_layout_stateid, ls_recall);
struct nfs4_cb_compound_hdr hdr = {
.ident = 0,
.minorversion = cb->cb_clp->cl_minorversion,
};
encode_cb_compound4args(xdr, &hdr);
encode_cb_sequence4args(xdr, cb, &hdr);
encode_cb_layout4args(xdr, ls, &hdr);
encode_cb_nops(&hdr);
}
static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
void *data)
{
struct nfsd4_callback *cb = data;
struct nfs4_cb_compound_hdr hdr;
int status;
status = decode_cb_compound4res(xdr, &hdr);
if (unlikely(status))
return status;
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
}
#endif /* CONFIG_NFSD_PNFS */
static void encode_stateowner(struct xdr_stream *xdr, struct nfs4_stateowner *so)
{
__be32 *p;
p = xdr_reserve_space(xdr, 8 + 4 + so->so_owner.len);
p = xdr_encode_opaque_fixed(p, &so->so_client->cl_clientid, 8);
xdr_encode_opaque(p, so->so_owner.data, so->so_owner.len);
}
static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req,
struct xdr_stream *xdr,
const void *data)
{
const struct nfsd4_callback *cb = data;
const struct nfsd4_blocked_lock *nbl =
container_of(cb, struct nfsd4_blocked_lock, nbl_cb);
struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.fl_owner;
struct nfs4_cb_compound_hdr hdr = {
.ident = 0,
.minorversion = cb->cb_clp->cl_minorversion,
};
__be32 *p;
BUG_ON(hdr.minorversion == 0);
encode_cb_compound4args(xdr, &hdr);
encode_cb_sequence4args(xdr, cb, &hdr);
p = xdr_reserve_space(xdr, 4);
*p = cpu_to_be32(OP_CB_NOTIFY_LOCK);
encode_nfs_fh4(xdr, &nbl->nbl_fh);
encode_stateowner(xdr, &lo->lo_owner);
hdr.nops++;
encode_cb_nops(&hdr);
}
static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
void *data)
{
struct nfsd4_callback *cb = data;
struct nfs4_cb_compound_hdr hdr;
int status;
status = decode_cb_compound4res(xdr, &hdr);
if (unlikely(status))
return status;
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
}
/*
* struct write_response4 {
* stateid4 wr_callback_id<1>;
* length4 wr_count;
* stable_how4 wr_committed;
* verifier4 wr_writeverf;
* };
* union offload_info4 switch (nfsstat4 coa_status) {
* case NFS4_OK:
* write_response4 coa_resok4;
* default:
* length4 coa_bytes_copied;
* };
* struct CB_OFFLOAD4args {
* nfs_fh4 coa_fh;
* stateid4 coa_stateid;
* offload_info4 coa_offload_info;
* };
*/
static void encode_offload_info4(struct xdr_stream *xdr,
const struct nfsd4_cb_offload *cbo)
{
__be32 *p;
p = xdr_reserve_space(xdr, 4);
*p = cbo->co_nfserr;
switch (cbo->co_nfserr) {
case nfs_ok:
p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
p = xdr_encode_empty_array(p);
p = xdr_encode_hyper(p, cbo->co_res.wr_bytes_written);
*p++ = cpu_to_be32(cbo->co_res.wr_stable_how);
p = xdr_encode_opaque_fixed(p, cbo->co_res.wr_verifier.data,
NFS4_VERIFIER_SIZE);
break;
default:
p = xdr_reserve_space(xdr, 8);
/* We always return success if bytes were written */
p = xdr_encode_hyper(p, 0);
}
}
static void encode_cb_offload4args(struct xdr_stream *xdr,
const struct nfsd4_cb_offload *cbo,
struct nfs4_cb_compound_hdr *hdr)
{
__be32 *p;
p = xdr_reserve_space(xdr, 4);
*p = cpu_to_be32(OP_CB_OFFLOAD);
encode_nfs_fh4(xdr, &cbo->co_fh);
encode_stateid4(xdr, &cbo->co_res.cb_stateid);
encode_offload_info4(xdr, cbo);
hdr->nops++;
}
static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req,
struct xdr_stream *xdr,
const void *data)
{
const struct nfsd4_callback *cb = data;
const struct nfsd4_cb_offload *cbo =
container_of(cb, struct nfsd4_cb_offload, co_cb);
struct nfs4_cb_compound_hdr hdr = {
.ident = 0,
.minorversion = cb->cb_clp->cl_minorversion,
};
encode_cb_compound4args(xdr, &hdr);
encode_cb_sequence4args(xdr, cb, &hdr);
encode_cb_offload4args(xdr, cbo, &hdr);
encode_cb_nops(&hdr);
}
static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
void *data)
{
struct nfsd4_callback *cb = data;
struct nfs4_cb_compound_hdr hdr;
int status;
status = decode_cb_compound4res(xdr, &hdr);
if (unlikely(status))
return status;
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
}
/*
* RPC procedure tables
*/
#define PROC(proc, call, argtype, restype) \
[NFSPROC4_CLNT_##proc] = { \
.p_proc = NFSPROC4_CB_##call, \
.p_encode = nfs4_xdr_enc_##argtype, \
.p_decode = nfs4_xdr_dec_##restype, \
.p_arglen = NFS4_enc_##argtype##_sz, \
.p_replen = NFS4_dec_##restype##_sz, \
.p_statidx = NFSPROC4_CB_##call, \
.p_name = #proc, \
}
static const struct rpc_procinfo nfs4_cb_procedures[] = {
PROC(CB_NULL, NULL, cb_null, cb_null),
PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall),
#ifdef CONFIG_NFSD_PNFS
PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout),
#endif
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
};
static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
static const struct rpc_version nfs_cb_version4 = {
/*
* Note on the callback rpc program version number: despite language in rfc
* 5661 section 18.36.3 requiring servers to use 4 in this field, the
* official xdr descriptions for both 4.0 and 4.1 specify version 1, and
* in practice that appears to be what implementations use. The section
* 18.36.3 language is expected to be fixed in an erratum.
*/
.number = 1,
.nrprocs = ARRAY_SIZE(nfs4_cb_procedures),
.procs = nfs4_cb_procedures,
.counts = nfs4_cb_counts,
};
static const struct rpc_version *nfs_cb_version[2] = {
[1] = &nfs_cb_version4,
};
static const struct rpc_program cb_program;
static struct rpc_stat cb_stats = {
.program = &cb_program
};
#define NFS4_CALLBACK 0x40000000
static const struct rpc_program cb_program = {
.name = "nfs4_cb",
.number = NFS4_CALLBACK,
.nrvers = ARRAY_SIZE(nfs_cb_version),
.version = nfs_cb_version,
.stats = &cb_stats,
.pipe_dir_name = "nfsd4_cb",
};
static int max_cb_time(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
/*
* nfsd4_lease is set to at most one hour in __nfsd4_write_time,
* so we can use 32-bit math on it. Warn if that assumption
* ever stops being true.
*/
if (WARN_ON_ONCE(nn->nfsd4_lease > 3600))
return 360 * HZ;
return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ;
}
static struct workqueue_struct *callback_wq;
static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
{
return queue_work(callback_wq, &cb->cb_work);
}
static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
{
atomic_inc(&clp->cl_cb_inflight);
}
static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
{
if (atomic_dec_and_test(&clp->cl_cb_inflight))
wake_up_var(&clp->cl_cb_inflight);
}
static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp)
{
wait_var_event(&clp->cl_cb_inflight,
!atomic_read(&clp->cl_cb_inflight));
}
static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
{
if (clp->cl_minorversion == 0) {
client->cl_principal = clp->cl_cred.cr_targ_princ ?
clp->cl_cred.cr_targ_princ : "nfs";
return get_cred(rpc_machine_cred());
} else {
struct cred *kcred;
cred: Do not default to init_cred in prepare_kernel_cred() A common exploit pattern for ROP attacks is to abuse prepare_kernel_cred() in order to construct escalated privileges[1]. Instead of providing a short-hand argument (NULL) to the "daemon" argument to indicate using init_cred as the base cred, require that "daemon" is always set to an actual task. Replace all existing callers that were passing NULL with &init_task. Future attacks will need to have sufficiently powerful read/write primitives to have found an appropriately privileged task and written it to the ROP stack as an argument to succeed, which is similarly difficult to the prior effort needed to escalate privileges before struct cred existed: locate the current cred and overwrite the uid member. This has the added benefit of meaning that prepare_kernel_cred() can no longer exceed the privileges of the init task, which may have changed from the original init_cred (e.g. dropping capabilities from the bounding set). [1] https://google.com/search?q=commit_creds(prepare_kernel_cred(0)) Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: David Howells <dhowells@redhat.com> Cc: "Rafael J. Wysocki" <rafael@kernel.org> Cc: Steve French <sfrench@samba.org> Cc: Ronnie Sahlberg <lsahlber@redhat.com> Cc: Shyam Prasad N <sprasad@microsoft.com> Cc: Tom Talpey <tom@talpey.com> Cc: Namjae Jeon <linkinjeon@kernel.org> Cc: Trond Myklebust <trond.myklebust@hammerspace.com> Cc: Anna Schumaker <anna@kernel.org> Cc: Chuck Lever <chuck.lever@oracle.com> Cc: Jeff Layton <jlayton@kernel.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Eric Dumazet <edumazet@google.com> Cc: Jakub Kicinski <kuba@kernel.org> Cc: Paolo Abeni <pabeni@redhat.com> Cc: "Michal Koutný" <mkoutny@suse.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Cc: linux-nfs@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook <keescook@chromium.org> Acked-by: Luis Chamberlain <mcgrof@kernel.org> Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> Acked-by: Russ Weight <russell.h.weight@intel.com> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Paulo Alcantara (SUSE) <pc@cjr.nz> Link: https://lore.kernel.org/r/20221026232943.never.775-kees@kernel.org
2022-10-26 23:31:11 +00:00
kcred = prepare_kernel_cred(&init_task);
if (!kcred)
return NULL;
kcred->fsuid = ses->se_cb_sec.uid;
kcred->fsgid = ses->se_cb_sec.gid;
return kcred;
}
}
static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
{
int maxtime = max_cb_time(clp->net);
struct rpc_timeout timeparms = {
.to_initval = maxtime,
.to_retries = 0,
.to_maxval = maxtime,
};
struct rpc_create_args args = {
.net = clp->net,
.address = (struct sockaddr *) &conn->cb_addr,
.addrsize = conn->cb_addrlen,
.saddress = (struct sockaddr *) &conn->cb_saddr,
.timeout = &timeparms,
.program = &cb_program,
.version = 1,
.flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
.cred = current_cred(),
};
struct rpc_clnt *client;
const struct cred *cred;
if (clp->cl_minorversion == 0) {
if (!clp->cl_cred.cr_principal &&
(clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) {
trace_nfsd_cb_setup_err(clp, -EINVAL);
return -EINVAL;
}
args.client_name = clp->cl_cred.cr_principal;
args.prognumber = conn->cb_prog;
args.protocol = XPRT_TRANSPORT_TCP;
args.authflavor = clp->cl_cred.cr_flavor;
clp->cl_cb_ident = conn->cb_ident;
} else {
if (!conn->cb_xprt)
return -EINVAL;
clp->cl_cb_session = ses;
args.bc_xprt = conn->cb_xprt;
args.prognumber = clp->cl_cb_session->se_cb_prog;
args.protocol = conn->cb_xprt->xpt_class->xcl_ident |
XPRT_TRANSPORT_BC;
args.authflavor = ses->se_cb_sec.flavor;
}
/* Create RPC client */
client = rpc_create(&args);
if (IS_ERR(client)) {
trace_nfsd_cb_setup_err(clp, PTR_ERR(client));
return PTR_ERR(client);
}
cred = get_backchannel_cred(clp, client, ses);
if (!cred) {
trace_nfsd_cb_setup_err(clp, -ENOMEM);
rpc_shutdown_client(client);
return -ENOMEM;
}
nfsd: under NFSv4.1, fix double svc_xprt_put on rpc_create failure On error situation `clp->cl_cb_conn.cb_xprt` should not be given a reference to the xprt otherwise both client cleanup and the error handling path of the caller call to put it. Better to delay handing over the reference to a later branch. [ 72.530665] refcount_t: underflow; use-after-free. [ 72.531933] WARNING: CPU: 0 PID: 173 at lib/refcount.c:28 refcount_warn_saturate+0xcf/0x120 [ 72.533075] Modules linked in: nfsd(OE) nfsv4(OE) nfsv3(OE) nfs(OE) lockd(OE) compat_nfs_ssc(OE) nfs_acl(OE) rpcsec_gss_krb5(OE) auth_rpcgss(OE) rpcrdma(OE) dns_resolver fscache netfs grace rdma_cm iw_cm ib_cm sunrpc(OE) mlx5_ib mlx5_core mlxfw pci_hyperv_intf ib_uverbs ib_core xt_MASQUERADE nf_conntrack_netlink nft_counter xt_addrtype nft_compat br_netfilter bridge stp llc nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set overlay nf_tables nfnetlink crct10dif_pclmul crc32_pclmul ghash_clmulni_intel xfs serio_raw virtio_net virtio_blk net_failover failover fuse [last unloaded: sunrpc] [ 72.540389] CPU: 0 PID: 173 Comm: kworker/u16:5 Tainted: G OE 5.15.82-dan #1 [ 72.541511] Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.16.0-3.module+el8.7.0+1084+97b81f61 04/01/2014 [ 72.542717] Workqueue: nfsd4_callbacks nfsd4_run_cb_work [nfsd] [ 72.543575] RIP: 0010:refcount_warn_saturate+0xcf/0x120 [ 72.544299] Code: 55 00 0f 0b 5d e9 01 50 98 00 80 3d 75 9e 39 08 00 0f 85 74 ff ff ff 48 c7 c7 e8 d1 60 8e c6 05 61 9e 39 08 01 e8 f6 51 55 00 <0f> 0b 5d e9 d9 4f 98 00 80 3d 4b 9e 39 08 00 0f 85 4c ff ff ff 48 [ 72.546666] RSP: 0018:ffffb3f841157cf0 EFLAGS: 00010286 [ 72.547393] RAX: 0000000000000026 RBX: ffff89ac6231d478 RCX: 0000000000000000 [ 72.548324] RDX: ffff89adb7c2c2c0 RSI: ffff89adb7c205c0 RDI: ffff89adb7c205c0 [ 72.549271] RBP: ffffb3f841157cf0 R08: 0000000000000000 R09: c0000000ffefffff [ 72.550209] R10: 0000000000000001 R11: ffffb3f841157ad0 R12: ffff89ac6231d180 [ 72.551142] R13: ffff89ac6231d478 R14: ffff89ac40c06180 R15: ffff89ac6231d4b0 [ 72.552089] FS: 0000000000000000(0000) GS:ffff89adb7c00000(0000) knlGS:0000000000000000 [ 72.553175] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 72.553934] CR2: 0000563a310506a8 CR3: 0000000109a66000 CR4: 0000000000350ef0 [ 72.554874] Call Trace: [ 72.555278] <TASK> [ 72.555614] svc_xprt_put+0xaf/0xe0 [sunrpc] [ 72.556276] nfsd4_process_cb_update.isra.11+0xb7/0x410 [nfsd] [ 72.557087] ? update_load_avg+0x82/0x610 [ 72.557652] ? cpuacct_charge+0x60/0x70 [ 72.558212] ? dequeue_entity+0xdb/0x3e0 [ 72.558765] ? queued_spin_unlock+0x9/0x20 [ 72.559358] nfsd4_run_cb_work+0xfc/0x270 [nfsd] [ 72.560031] process_one_work+0x1df/0x390 [ 72.560600] worker_thread+0x37/0x3b0 [ 72.561644] ? process_one_work+0x390/0x390 [ 72.562247] kthread+0x12f/0x150 [ 72.562710] ? set_kthread_struct+0x50/0x50 [ 72.563309] ret_from_fork+0x22/0x30 [ 72.563818] </TASK> [ 72.564189] ---[ end trace 031117b1c72ec616 ]--- [ 72.566019] list_add corruption. next->prev should be prev (ffff89ac4977e538), but was ffff89ac4763e018. (next=ffff89ac4763e018). [ 72.567647] ------------[ cut here ]------------ Fixes: a4abc6b12eb1 ("nfsd: Fix svc_xprt refcnt leak when setup callback client failed") Cc: Xiyu Yang <xiyuyang19@fudan.edu.cn> Cc: J. Bruce Fields <bfields@redhat.com> Signed-off-by: Dan Aloni <dan.aloni@vastdata.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2022-12-12 11:11:06 +00:00
if (clp->cl_minorversion != 0)
clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
clp->cl_cb_client = client;
clp->cl_cb_cred = cred;
rcu_read_lock();
trace_nfsd_cb_setup(clp, rpc_peeraddr2str(client, RPC_DISPLAY_NETID),
args.authflavor);
rcu_read_unlock();
return 0;
}
static void nfsd4_mark_cb_state(struct nfs4_client *clp, int newstate)
{
if (clp->cl_cb_state != newstate) {
clp->cl_cb_state = newstate;
trace_nfsd_cb_state(clp);
}
}
static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
{
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
nfsd4_mark_cb_state(clp, NFSD4_CB_DOWN);
}
static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
{
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
nfsd4_mark_cb_state(clp, NFSD4_CB_FAULT);
}
static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
{
struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
if (task->tk_status)
nfsd4_mark_cb_down(clp, task->tk_status);
else
nfsd4_mark_cb_state(clp, NFSD4_CB_UP);
}
static void nfsd4_cb_probe_release(void *calldata)
{
struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
nfsd41_cb_inflight_end(clp);
}
static const struct rpc_call_ops nfsd4_cb_probe_ops = {
/* XXX: release method to ensure we set the cb channel down if
* necessary on early failure? */
.rpc_call_done = nfsd4_cb_probe_done,
.rpc_release = nfsd4_cb_probe_release,
};
/*
* Poke the callback thread to process any updates to the callback
* parameters, and send a null probe.
*/
void nfsd4_probe_callback(struct nfs4_client *clp)
{
trace_nfsd_cb_probe(clp);
nfsd4_mark_cb_state(clp, NFSD4_CB_UNKNOWN);
set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
nfsd4_run_cb(&clp->cl_cb_null);
}
void nfsd4_probe_callback_sync(struct nfs4_client *clp)
{
nfsd4_probe_callback(clp);
flush_workqueue(callback_wq);
}
void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
{
nfsd4_mark_cb_state(clp, NFSD4_CB_UNKNOWN);
spin_lock(&clp->cl_lock);
memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn));
spin_unlock(&clp->cl_lock);
}
/*
* There's currently a single callback channel slot.
* If the slot is available, then mark it busy. Otherwise, set the
* thread for sleeping on the callback RPC wait queue.
*/
static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
{
struct nfs4_client *clp = cb->cb_clp;
if (!cb->cb_holds_slot &&
test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
/* Race breaker */
if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
dprintk("%s slot is busy\n", __func__);
return false;
}
rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
}
cb->cb_holds_slot = true;
return true;
}
static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
if (cb->cb_holds_slot) {
cb->cb_holds_slot = false;
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_wake_up_next(&clp->cl_cb_waitq);
}
}
static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
nfsd41_cb_release_slot(cb);
if (cb->cb_ops && cb->cb_ops->release)
cb->cb_ops->release(cb);
nfsd41_cb_inflight_end(clp);
}
/*
* TODO: cb_sequence should support referring call lists, cachethis, multiple
* slots, and mark callback channel down on communication errors.
*/
static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
{
struct nfsd4_callback *cb = calldata;
struct nfs4_client *clp = cb->cb_clp;
u32 minorversion = clp->cl_minorversion;
/*
* cb_seq_status is only set in decode_cb_sequence4res,
* and so will remain 1 if an rpc level failure occurs.
*/
cb->cb_seq_status = 1;
cb->cb_status = 0;
if (minorversion && !nfsd41_cb_get_slot(cb, task))
return;
rpc_call_start(task);
}
static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
struct nfsd4_session *session = clp->cl_cb_session;
bool ret = true;
if (!clp->cl_minorversion) {
/*
* If the backchannel connection was shut down while this
* task was queued, we need to resubmit it after setting up
* a new backchannel connection.
*
* Note that if we lost our callback connection permanently
* the submission code will error out, so we don't need to
* handle that case here.
*/
if (RPC_SIGNALLED(task))
goto need_restart;
return true;
}
if (!cb->cb_holds_slot)
goto need_restart;
switch (cb->cb_seq_status) {
case 0:
/*
* No need for lock, access serialized in nfsd4_cb_prepare
*
* RFC5661 20.9.3
* If CB_SEQUENCE returns an error, then the state of the slot
* (sequence ID, cached reply) MUST NOT change.
*/
++session->se_cb_seq_nr;
break;
case -ESERVERFAULT:
++session->se_cb_seq_nr;
fallthrough;
case 1:
case -NFS4ERR_BADSESSION:
nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
ret = false;
break;
case -NFS4ERR_DELAY:
if (!rpc_restart_call(task))
goto out;
rpc_delay(task, 2 * HZ);
return false;
case -NFS4ERR_BADSLOT:
goto retry_nowait;
case -NFS4ERR_SEQ_MISORDERED:
if (session->se_cb_seq_nr != 1) {
session->se_cb_seq_nr = 1;
goto retry_nowait;
}
break;
default:
nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
dprintk("%s: unprocessed error %d\n", __func__,
cb->cb_seq_status);
}
nfsd41_cb_release_slot(cb);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_session->se_cb_seq_nr);
if (RPC_SIGNALLED(task))
goto need_restart;
out:
return ret;
retry_nowait:
if (rpc_restart_call_prepare(task))
ret = false;
goto out;
need_restart:
if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
task->tk_status = 0;
cb->cb_need_restart = true;
}
return false;
}
static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
{
struct nfsd4_callback *cb = calldata;
struct nfs4_client *clp = cb->cb_clp;
if (!nfsd4_cb_sequence_done(task, cb))
return;
if (cb->cb_status) {
WARN_ON_ONCE(task->tk_status);
task->tk_status = cb->cb_status;
}
switch (cb->cb_ops->done(cb, task)) {
case 0:
task->tk_status = 0;
rpc_restart_call_prepare(task);
return;
case 1:
switch (task->tk_status) {
case -EIO:
case -ETIMEDOUT:
case -EACCES:
nfsd4_mark_cb_down(clp, task->tk_status);
}
break;
default:
BUG();
}
}
static void nfsd4_cb_release(void *calldata)
{
struct nfsd4_callback *cb = calldata;
if (cb->cb_need_restart)
nfsd4_queue_cb(cb);
else
nfsd41_destroy_cb(cb);
}
static const struct rpc_call_ops nfsd4_cb_ops = {
.rpc_call_prepare = nfsd4_cb_prepare,
.rpc_call_done = nfsd4_cb_done,
.rpc_release = nfsd4_cb_release,
};
int nfsd4_create_callback_queue(void)
{
callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0);
if (!callback_wq)
return -ENOMEM;
return 0;
}
void nfsd4_destroy_callback_queue(void)
{
destroy_workqueue(callback_wq);
}
/* must be called under the state lock */
void nfsd4_shutdown_callback(struct nfs4_client *clp)
{
if (clp->cl_cb_state != NFSD4_CB_UNKNOWN)
trace_nfsd_cb_shutdown(clp);
set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags);
/*
* Note this won't actually result in a null callback;
* instead, nfsd4_run_cb_null() will detect the killed
* client, destroy the rpc client, and stop:
*/
nfsd4_run_cb(&clp->cl_cb_null);
flush_workqueue(callback_wq);
nfsd41_cb_inflight_wait_complete(clp);
}
/* requires cl_lock: */
static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
{
struct nfsd4_session *s;
struct nfsd4_conn *c;
list_for_each_entry(s, &clp->cl_sessions, se_perclnt) {
list_for_each_entry(c, &s->se_conns, cn_persession) {
if (c->cn_flags & NFS4_CDFC4_BACK)
return c;
}
}
return NULL;
}
/*
* Note there isn't a lot of locking in this code; instead we depend on
* the fact that it is run from the callback_wq, which won't run two
* work items at once. So, for example, callback_wq handles all access
* of cl_cb_client and all calls to rpc_create or rpc_shutdown_client.
*/
static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
{
struct nfs4_cb_conn conn;
struct nfs4_client *clp = cb->cb_clp;
struct nfsd4_session *ses = NULL;
struct nfsd4_conn *c;
int err;
/*
* This is either an update, or the client dying; in either case,
* kill the old client:
*/
if (clp->cl_cb_client) {
rpc_shutdown_client(clp->cl_cb_client);
clp->cl_cb_client = NULL;
put_cred(clp->cl_cb_cred);
clp->cl_cb_cred = NULL;
}
if (clp->cl_cb_conn.cb_xprt) {
svc_xprt_put(clp->cl_cb_conn.cb_xprt);
clp->cl_cb_conn.cb_xprt = NULL;
}
if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
return;
spin_lock(&clp->cl_lock);
/*
* Only serialized callback code is allowed to clear these
* flags; main nfsd code can only set them:
*/
BUG_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK));
clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
c = __nfsd4_find_backchannel(clp);
if (c) {
svc_xprt_get(c->cn_xprt);
conn.cb_xprt = c->cn_xprt;
ses = c->cn_session;
}
spin_unlock(&clp->cl_lock);
err = setup_callback_client(clp, &conn, ses);
if (err) {
nfsd4_mark_cb_down(clp, err);
if (c)
svc_xprt_put(c->cn_xprt);
return;
}
}
static void
nfsd4_run_cb_work(struct work_struct *work)
{
struct nfsd4_callback *cb =
container_of(work, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
int flags;
if (cb->cb_need_restart) {
cb->cb_need_restart = false;
} else {
if (cb->cb_ops && cb->cb_ops->prepare)
cb->cb_ops->prepare(cb);
}
if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client;
if (!clnt) {
/* Callback channel broken, or client killed; give up: */
nfsd41_destroy_cb(cb);
return;
}
/*
* Don't send probe messages for 4.1 or later.
*/
if (!cb->cb_ops && clp->cl_minorversion) {
nfsd4_mark_cb_state(clp, NFSD4_CB_UP);
nfsd41_destroy_cb(cb);
return;
}
cb->cb_msg.rpc_cred = clp->cl_cb_cred;
flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN;
rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
}
void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op)
{
cb->cb_clp = clp;
cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op];
cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb;
cb->cb_ops = ops;
INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_seq_status = 1;
cb->cb_status = 0;
cb->cb_need_restart = false;
cb->cb_holds_slot = false;
}
/**
* nfsd4_run_cb - queue up a callback job to run
* @cb: callback to queue
*
* Kick off a callback to do its thing. Returns false if it was already
* on a queue, true otherwise.
*/
bool nfsd4_run_cb(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
bool queued;
nfsd41_cb_inflight_begin(clp);
queued = nfsd4_queue_cb(cb);
if (!queued)
nfsd41_cb_inflight_end(clp);
return queued;
}