Merge branch 'for-linus' of git://linux-nfs.org/~bfields/linux

* 'for-linus' of git://linux-nfs.org/~bfields/linux: (100 commits)
  SUNRPC: RPC program information is stored in unsigned integers
  SUNRPC: Move exported symbol definitions after function declaration part 2
  NLM: tear down RPC clients in nlm_shutdown_hosts
  SUNRPC: spin svc_rqst initialization to its own function
  nfsd: more careful input validation in nfsctl write methods
  lockd: minor log message fix
  knfsd: don't bother mapping putrootfh enoent to eperm
  rdma: makefile
  rdma: ONCRPC RDMA protocol marshalling
  rdma: SVCRDMA sendto
  rdma: SVCRDMA recvfrom
  rdma: SVCRDMA Core Transport Services
  rdma: SVCRDMA Transport Module
  rdma: SVCRMDA Header File
  svc: Add svc_xprt_names service to replace svc_sock_names
  knfsd: Support adding transports by writing portlist file
  svc: Add svc API that queries for a transport instance
  svc: Add /proc/sys/sunrpc/transport files
  svc: Add transport hdr size for defer/revisit
  svc: Move the xprt independent code to the svc_xprt.c file
  ...
This commit is contained in:
Linus Torvalds 2008-02-02 14:31:28 +11:00
commit 63e9b66e29
60 changed files with 5449 additions and 1663 deletions

View file

@ -2247,7 +2247,7 @@ P: J. Bruce Fields
M: bfields@fieldses.org
P: Neil Brown
M: neilb@suse.de
L: nfs@lists.sourceforge.net
L: linux-nfs@vger.kernel.org
W: http://nfs.sourceforge.net/
S: Supported

View file

@ -1674,6 +1674,8 @@ config NFSD
select CRYPTO_MD5 if NFSD_V4
select CRYPTO if NFSD_V4
select FS_POSIX_ACL if NFSD_V4
select PROC_FS if NFSD_V4
select PROC_FS if SUNRPC_GSS
help
If you want your Linux box to act as an NFS *server*, so that other
computers on your local network which support NFS can access certain

View file

@ -34,10 +34,10 @@ static DEFINE_MUTEX(nlm_host_mutex);
static void nlm_gc_hosts(void);
static struct nsm_handle * __nsm_find(const struct sockaddr_in *,
const char *, int, int);
const char *, unsigned int, int);
static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
const char *hostname,
int hostname_len);
unsigned int hostname_len);
/*
* Common host lookup routine for server & client
@ -45,7 +45,8 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
static struct nlm_host *
nlm_lookup_host(int server, const struct sockaddr_in *sin,
int proto, int version, const char *hostname,
int hostname_len, const struct sockaddr_in *ssin)
unsigned int hostname_len,
const struct sockaddr_in *ssin)
{
struct hlist_head *chain;
struct hlist_node *pos;
@ -176,7 +177,7 @@ nlm_destroy_host(struct nlm_host *host)
*/
struct nlm_host *
nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
const char *hostname, int hostname_len)
const char *hostname, unsigned int hostname_len)
{
struct sockaddr_in ssin = {0};
@ -189,7 +190,7 @@ nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
*/
struct nlm_host *
nlmsvc_lookup_host(struct svc_rqst *rqstp,
const char *hostname, int hostname_len)
const char *hostname, unsigned int hostname_len)
{
struct sockaddr_in ssin = {0};
@ -307,7 +308,8 @@ void nlm_release_host(struct nlm_host *host)
* Release all resources held by that peer.
*/
void nlm_host_rebooted(const struct sockaddr_in *sin,
const char *hostname, int hostname_len,
const char *hostname,
unsigned int hostname_len,
u32 new_state)
{
struct hlist_head *chain;
@ -377,8 +379,13 @@ nlm_shutdown_hosts(void)
/* First, make all hosts eligible for gc */
dprintk("lockd: nuking all hosts...\n");
for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
hlist_for_each_entry(host, pos, chain, h_hash)
hlist_for_each_entry(host, pos, chain, h_hash) {
host->h_expires = jiffies - 1;
if (host->h_rpcclnt) {
rpc_shutdown_client(host->h_rpcclnt);
host->h_rpcclnt = NULL;
}
}
}
/* Then, perform a garbage collection pass */
@ -449,7 +456,7 @@ static DEFINE_MUTEX(nsm_mutex);
static struct nsm_handle *
__nsm_find(const struct sockaddr_in *sin,
const char *hostname, int hostname_len,
const char *hostname, unsigned int hostname_len,
int create)
{
struct nsm_handle *nsm = NULL;
@ -503,7 +510,8 @@ __nsm_find(const struct sockaddr_in *sin,
}
static struct nsm_handle *
nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len)
nsm_find(const struct sockaddr_in *sin, const char *hostname,
unsigned int hostname_len)
{
return __nsm_find(sin, hostname, hostname_len, 1);
}

View file

@ -219,19 +219,6 @@ lockd(struct svc_rqst *rqstp)
module_put_and_exit(0);
}
static int find_socket(struct svc_serv *serv, int proto)
{
struct svc_sock *svsk;
int found = 0;
list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
if (svsk->sk_sk->sk_protocol == proto) {
found = 1;
break;
}
return found;
}
/*
* Make any sockets that are needed but not present.
* If nlm_udpport or nlm_tcpport were set as module
@ -240,17 +227,25 @@ static int find_socket(struct svc_serv *serv, int proto)
static int make_socks(struct svc_serv *serv, int proto)
{
static int warned;
struct svc_xprt *xprt;
int err = 0;
if (proto == IPPROTO_UDP || nlm_udpport)
if (!find_socket(serv, IPPROTO_UDP))
err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport,
SVC_SOCK_DEFAULTS);
if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport))
if (!find_socket(serv, IPPROTO_TCP))
err = svc_makesock(serv, IPPROTO_TCP, nlm_tcpport,
SVC_SOCK_DEFAULTS);
if (proto == IPPROTO_UDP || nlm_udpport) {
xprt = svc_find_xprt(serv, "udp", 0, 0);
if (!xprt)
err = svc_create_xprt(serv, "udp", nlm_udpport,
SVC_SOCK_DEFAULTS);
else
svc_xprt_put(xprt);
}
if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport)) {
xprt = svc_find_xprt(serv, "tcp", 0, 0);
if (!xprt)
err = svc_create_xprt(serv, "tcp", nlm_tcpport,
SVC_SOCK_DEFAULTS);
else
svc_xprt_put(xprt);
}
if (err >= 0) {
warned = 0;
err = 0;

View file

@ -84,6 +84,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
int rc = rpc_success;
dprintk("lockd: TEST4 called\n");
resp->cookie = argp->cookie;
@ -91,7 +92,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Don't accept test requests during grace period */
if (nlmsvc_grace_period) {
resp->status = nlm_lck_denied_grace_period;
return rpc_success;
return rc;
}
/* Obtain client and file */
@ -101,12 +102,13 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Now check for conflicting locks */
resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie);
if (resp->status == nlm_drop_reply)
return rpc_drop_reply;
rc = rpc_drop_reply;
else
dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
nlm_release_host(host);
nlm_release_file(file);
return rpc_success;
return rc;
}
static __be32
@ -115,6 +117,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
int rc = rpc_success;
dprintk("lockd: LOCK called\n");
@ -123,7 +126,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Don't accept new lock requests during grace period */
if (nlmsvc_grace_period && !argp->reclaim) {
resp->status = nlm_lck_denied_grace_period;
return rpc_success;
return rc;
}
/* Obtain client and file */
@ -146,12 +149,13 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = nlmsvc_lock(rqstp, file, &argp->lock,
argp->block, &argp->cookie);
if (resp->status == nlm_drop_reply)
return rpc_drop_reply;
rc = rpc_drop_reply;
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
nlm_release_host(host);
nlm_release_file(file);
return rpc_success;
return rc;
}
static __be32

View file

@ -501,25 +501,29 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
block, block->b_flags, block->b_fl);
if (block->b_flags & B_TIMED_OUT) {
nlmsvc_unlink_block(block);
return nlm_lck_denied;
ret = nlm_lck_denied;
goto out;
}
if (block->b_flags & B_GOT_CALLBACK) {
nlmsvc_unlink_block(block);
if (block->b_fl != NULL
&& block->b_fl->fl_type != F_UNLCK) {
lock->fl = *block->b_fl;
goto conf_lock;
}
else {
nlmsvc_unlink_block(block);
return nlm_granted;
} else {
ret = nlm_granted;
goto out;
}
}
return nlm_drop_reply;
ret = nlm_drop_reply;
goto out;
}
error = vfs_test_lock(file->f_file, &lock->fl);
if (error == -EINPROGRESS)
return nlmsvc_defer_lock_rqst(rqstp, block);
if (error == -EINPROGRESS) {
ret = nlmsvc_defer_lock_rqst(rqstp, block);
goto out;
}
if (error) {
ret = nlm_lck_denied_nolocks;
goto out;

View file

@ -113,6 +113,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
int rc = rpc_success;
dprintk("lockd: TEST called\n");
resp->cookie = argp->cookie;
@ -120,7 +121,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Don't accept test requests during grace period */
if (nlmsvc_grace_period) {
resp->status = nlm_lck_denied_grace_period;
return rpc_success;
return rc;
}
/* Obtain client and file */
@ -130,13 +131,14 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Now check for conflicting locks */
resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie));
if (resp->status == nlm_drop_reply)
return rpc_drop_reply;
rc = rpc_drop_reply;
else
dprintk("lockd: TEST status %d vers %d\n",
ntohl(resp->status), rqstp->rq_vers);
dprintk("lockd: TEST status %d vers %d\n",
ntohl(resp->status), rqstp->rq_vers);
nlm_release_host(host);
nlm_release_file(file);
return rpc_success;
return rc;
}
static __be32
@ -145,6 +147,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
int rc = rpc_success;
dprintk("lockd: LOCK called\n");
@ -153,7 +156,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Don't accept new lock requests during grace period */
if (nlmsvc_grace_period && !argp->reclaim) {
resp->status = nlm_lck_denied_grace_period;
return rpc_success;
return rc;
}
/* Obtain client and file */
@ -176,12 +179,13 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock,
argp->block, &argp->cookie));
if (resp->status == nlm_drop_reply)
return rpc_drop_reply;
rc = rpc_drop_reply;
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
nlm_release_host(host);
nlm_release_file(file);
return rpc_success;
return rc;
}
static __be32

View file

@ -87,7 +87,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
unsigned int hash;
__be32 nfserr;
nlm_debug_print_fh("nlm_file_lookup", f);
nlm_debug_print_fh("nlm_lookup_file", f);
hash = file_hash(f);

View file

@ -119,8 +119,8 @@ int nfs_callback_up(void)
if (!serv)
goto out_err;
ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport,
SVC_SOCK_ANONYMOUS);
ret = svc_create_xprt(serv, "tcp", nfs_callback_set_tcpport,
SVC_SOCK_ANONYMOUS);
if (ret <= 0)
goto out_destroy;
nfs_callback_tcpport = ret;

View file

@ -1,6 +1,4 @@
/*
* include/linux/nfsd/auth.h
*
* nfsd-specific authentication stuff.
* uid/gid mapping not yet implemented.
*
@ -10,8 +8,6 @@
#ifndef LINUX_NFSD_AUTH_H
#define LINUX_NFSD_AUTH_H
#ifdef __KERNEL__
#define nfsd_luid(rq, uid) ((u32)(uid))
#define nfsd_lgid(rq, gid) ((u32)(gid))
#define nfsd_ruid(rq, uid) ((u32)(uid))
@ -23,5 +19,4 @@
*/
int nfsd_setuser(struct svc_rqst *, struct svc_export *);
#endif /* __KERNEL__ */
#endif /* LINUX_NFSD_AUTH_H */

View file

@ -1357,8 +1357,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
exp = rqst_exp_find(rqstp, FSID_NUM, fsidv);
if (PTR_ERR(exp) == -ENOENT)
return nfserr_perm;
if (IS_ERR(exp))
return nfserrno(PTR_ERR(exp));
rv = fh_compose(fhp, exp, exp->ex_dentry, NULL);
@ -1637,13 +1635,19 @@ exp_verify_string(char *cp, int max)
/*
* Initialize the exports module.
*/
void
int
nfsd_export_init(void)
{
int rv;
dprintk("nfsd: initializing export module.\n");
cache_register(&svc_export_cache);
cache_register(&svc_expkey_cache);
rv = cache_register(&svc_export_cache);
if (rv)
return rv;
rv = cache_register(&svc_expkey_cache);
if (rv)
cache_unregister(&svc_export_cache);
return rv;
}
@ -1670,10 +1674,8 @@ nfsd_export_shutdown(void)
exp_writelock();
if (cache_unregister(&svc_expkey_cache))
printk(KERN_ERR "nfsd: failed to unregister expkey cache\n");
if (cache_unregister(&svc_export_cache))
printk(KERN_ERR "nfsd: failed to unregister export cache\n");
cache_unregister(&svc_expkey_cache);
cache_unregister(&svc_export_cache);
svcauth_unix_purge();
exp_writeunlock();

View file

@ -221,12 +221,17 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_getaclres *resp)
{
struct dentry *dentry = resp->fh.fh_dentry;
struct inode *inode = dentry->d_inode;
struct inode *inode;
struct kvec *head = rqstp->rq_res.head;
unsigned int base;
int n;
int w;
/*
* Since this is version 2, the check for nfserr in
* nfsd_dispatch actually ensures the following cannot happen.
* However, it seems fragile to depend on that.
*/
if (dentry == NULL || dentry->d_inode == NULL)
return 0;
inode = dentry->d_inode;

View file

@ -21,6 +21,7 @@
#include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/xdr3.h>
#include "auth.h"
#define NFSDDBG_FACILITY NFSDDBG_XDR
@ -88,10 +89,10 @@ encode_fh(__be32 *p, struct svc_fh *fhp)
* no slashes or null bytes.
*/
static __be32 *
decode_filename(__be32 *p, char **namp, int *lenp)
decode_filename(__be32 *p, char **namp, unsigned int *lenp)
{
char *name;
int i;
unsigned int i;
if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) {
for (i = 0, name = *namp; i < *lenp; i++, name++) {
@ -452,8 +453,7 @@ int
nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_symlinkargs *args)
{
unsigned int len;
int avail;
unsigned int len, avail;
char *old, *new;
struct kvec *vec;
@ -486,7 +486,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
/* now copy next page if there is one */
if (len && !avail && rqstp->rq_arg.page_len) {
avail = rqstp->rq_arg.page_len;
if (avail > PAGE_SIZE) avail = PAGE_SIZE;
if (avail > PAGE_SIZE)
avail = PAGE_SIZE;
old = page_address(rqstp->rq_arg.pages[0]);
}
while (len && avail && *old) {
@ -816,11 +817,11 @@ static __be32 *
encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p,
struct svc_fh *fhp)
{
p = encode_post_op_attr(cd->rqstp, p, fhp);
*p++ = xdr_one; /* yes, a file handle follows */
p = encode_fh(p, fhp);
fh_put(fhp);
return p;
p = encode_post_op_attr(cd->rqstp, p, fhp);
*p++ = xdr_one; /* yes, a file handle follows */
p = encode_fh(p, fhp);
fh_put(fhp);
return p;
}
static int

View file

@ -350,30 +350,6 @@ static struct rpc_version * nfs_cb_version[] = {
static int do_probe_callback(void *data)
{
struct nfs4_client *clp = data;
struct nfs4_callback *cb = &clp->cl_callback;
struct rpc_message msg = {
.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
.rpc_argp = clp,
};
int status;
status = rpc_call_sync(cb->cb_client, &msg, RPC_TASK_SOFT);
if (status) {
rpc_shutdown_client(cb->cb_client);
cb->cb_client = NULL;
} else
atomic_set(&cb->cb_set, 1);
put_nfs4_client(clp);
return 0;
}
/*
* Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
*/
void
nfsd4_probe_callback(struct nfs4_client *clp)
{
struct sockaddr_in addr;
struct nfs4_callback *cb = &clp->cl_callback;
struct rpc_timeout timeparms = {
@ -390,13 +366,15 @@ nfsd4_probe_callback(struct nfs4_client *clp)
.timeout = &timeparms,
.program = program,
.version = nfs_cb_version[1]->number,
.authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
.authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
.flags = (RPC_CLNT_CREATE_NOPING),
};
struct task_struct *t;
if (atomic_read(&cb->cb_set))
return;
struct rpc_message msg = {
.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
.rpc_argp = clp,
};
struct rpc_clnt *client;
int status;
/* Initialize address */
memset(&addr, 0, sizeof(addr));
@ -416,29 +394,50 @@ nfsd4_probe_callback(struct nfs4_client *clp)
program->stats->program = program;
/* Create RPC client */
cb->cb_client = rpc_create(&args);
if (IS_ERR(cb->cb_client)) {
client = rpc_create(&args);
if (IS_ERR(client)) {
dprintk("NFSD: couldn't create callback client\n");
status = PTR_ERR(client);
goto out_err;
}
status = rpc_call_sync(client, &msg, RPC_TASK_SOFT);
if (status)
goto out_release_client;
cb->cb_client = client;
atomic_set(&cb->cb_set, 1);
put_nfs4_client(clp);
return 0;
out_release_client:
rpc_shutdown_client(client);
out_err:
put_nfs4_client(clp);
dprintk("NFSD: warning: no callback path to client %.*s\n",
(int)clp->cl_name.len, clp->cl_name.data);
return status;
}
/*
* Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
*/
void
nfsd4_probe_callback(struct nfs4_client *clp)
{
struct task_struct *t;
BUG_ON(atomic_read(&clp->cl_callback.cb_set));
/* the task holds a reference to the nfs4_client struct */
atomic_inc(&clp->cl_count);
t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe");
if (IS_ERR(t))
goto out_release_clp;
atomic_dec(&clp->cl_count);
return;
out_release_clp:
atomic_dec(&clp->cl_count);
rpc_shutdown_client(cb->cb_client);
out_err:
cb->cb_client = NULL;
dprintk("NFSD: warning: no callback path to client %.*s\n",
(int)clp->cl_name.len, clp->cl_name.data);
}
/*
@ -458,9 +457,6 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
int retries = 1;
int status = 0;
if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt)
return;
cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */
cbr->cbr_dp = dp;
@ -469,6 +465,7 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
switch (status) {
case -EIO:
/* Network partition? */
atomic_set(&clp->cl_callback.cb_set, 0);
case -EBADHANDLE:
case -NFS4ERR_BAD_STATEID:
/* Race: client probably got cb_recall
@ -481,11 +478,10 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
}
out_put_cred:
if (status == -EIO)
atomic_set(&clp->cl_callback.cb_set, 0);
/* Success or failure, now we're either waiting for lease expiration
* or deleg_return. */
dprintk("NFSD: nfs4_cb_recall: dp %p dl_flock %p dl_count %d\n",dp, dp->dl_flock, atomic_read(&dp->dl_count));
/*
* Success or failure, now we're either waiting for lease expiration
* or deleg_return.
*/
put_nfs4_client(clp);
nfs4_put_delegation(dp);
return;

View file

@ -255,13 +255,10 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
goto out;
if (len == 0)
set_bit(CACHE_NEGATIVE, &ent.h.flags);
else {
if (error >= IDMAP_NAMESZ) {
error = -EINVAL;
goto out;
}
else if (len >= IDMAP_NAMESZ)
goto out;
else
memcpy(ent.name, buf1, sizeof(ent.name));
}
error = -ENOMEM;
res = idtoname_update(&ent, res);
if (res == NULL)
@ -467,20 +464,25 @@ nametoid_update(struct ent *new, struct ent *old)
* Exported API
*/
void
int
nfsd_idmap_init(void)
{
cache_register(&idtoname_cache);
cache_register(&nametoid_cache);
int rv;
rv = cache_register(&idtoname_cache);
if (rv)
return rv;
rv = cache_register(&nametoid_cache);
if (rv)
cache_unregister(&idtoname_cache);
return rv;
}
void
nfsd_idmap_shutdown(void)
{
if (cache_unregister(&idtoname_cache))
printk(KERN_ERR "nfsd: failed to unregister idtoname cache\n");
if (cache_unregister(&nametoid_cache))
printk(KERN_ERR "nfsd: failed to unregister nametoid cache\n");
cache_unregister(&idtoname_cache);
cache_unregister(&nametoid_cache);
}
/*

View file

@ -750,7 +750,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cstate->current_fh.fh_export,
cstate->current_fh.fh_dentry, buf,
&count, verify->ve_bmval,
rqstp);
rqstp, 0);
/* this means that nfsd4_encode_fattr() ran out of space */
if (status == nfserr_resource && count == 0)

View file

@ -61,7 +61,6 @@ static time_t lease_time = 90; /* default lease time */
static time_t user_lease_time = 90;
static time_t boot_time;
static int in_grace = 1;
static u32 current_clientid = 1;
static u32 current_ownerid = 1;
static u32 current_fileid = 1;
static u32 current_delegid = 1;
@ -340,21 +339,20 @@ STALE_CLIENTID(clientid_t *clid)
* This type of memory management is somewhat inefficient, but we use it
* anyway since SETCLIENTID is not a common operation.
*/
static inline struct nfs4_client *
alloc_client(struct xdr_netobj name)
static struct nfs4_client *alloc_client(struct xdr_netobj name)
{
struct nfs4_client *clp;
if ((clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) {
if ((clp->cl_name.data = kmalloc(name.len, GFP_KERNEL)) != NULL) {
memcpy(clp->cl_name.data, name.data, name.len);
clp->cl_name.len = name.len;
}
else {
kfree(clp);
clp = NULL;
}
clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
if (clp == NULL)
return NULL;
clp->cl_name.data = kmalloc(name.len, GFP_KERNEL);
if (clp->cl_name.data == NULL) {
kfree(clp);
return NULL;
}
memcpy(clp->cl_name.data, name.data, name.len);
clp->cl_name.len = name.len;
return clp;
}
@ -363,8 +361,11 @@ shutdown_callback_client(struct nfs4_client *clp)
{
struct rpc_clnt *clnt = clp->cl_callback.cb_client;
/* shutdown rpc client, ending any outstanding recall rpcs */
if (clnt) {
/*
* Callback threads take a reference on the client, so there
* should be no outstanding callbacks at this point.
*/
clp->cl_callback.cb_client = NULL;
rpc_shutdown_client(clnt);
}
@ -422,12 +423,13 @@ expire_client(struct nfs4_client *clp)
put_nfs4_client(clp);
}
static struct nfs4_client *
create_client(struct xdr_netobj name, char *recdir) {
static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
{
struct nfs4_client *clp;
if (!(clp = alloc_client(name)))
goto out;
clp = alloc_client(name);
if (clp == NULL)
return NULL;
memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
atomic_set(&clp->cl_count, 1);
atomic_set(&clp->cl_callback.cb_set, 0);
@ -436,32 +438,30 @@ create_client(struct xdr_netobj name, char *recdir) {
INIT_LIST_HEAD(&clp->cl_openowners);
INIT_LIST_HEAD(&clp->cl_delegations);
INIT_LIST_HEAD(&clp->cl_lru);
out:
return clp;
}
static void
copy_verf(struct nfs4_client *target, nfs4_verifier *source) {
memcpy(target->cl_verifier.data, source->data, sizeof(target->cl_verifier.data));
static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
{
memcpy(target->cl_verifier.data, source->data,
sizeof(target->cl_verifier.data));
}
static void
copy_clid(struct nfs4_client *target, struct nfs4_client *source) {
static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
{
target->cl_clientid.cl_boot = source->cl_clientid.cl_boot;
target->cl_clientid.cl_id = source->cl_clientid.cl_id;
}
static void
copy_cred(struct svc_cred *target, struct svc_cred *source) {
static void copy_cred(struct svc_cred *target, struct svc_cred *source)
{
target->cr_uid = source->cr_uid;
target->cr_gid = source->cr_gid;
target->cr_group_info = source->cr_group_info;
get_group_info(target->cr_group_info);
}
static inline int
same_name(const char *n1, const char *n2)
static int same_name(const char *n1, const char *n2)
{
return 0 == memcmp(n1, n2, HEXDIR_LEN);
}
@ -485,26 +485,26 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
return cr1->cr_uid == cr2->cr_uid;
}
static void
gen_clid(struct nfs4_client *clp) {
static void gen_clid(struct nfs4_client *clp)
{
static u32 current_clientid = 1;
clp->cl_clientid.cl_boot = boot_time;
clp->cl_clientid.cl_id = current_clientid++;
}
static void
gen_confirm(struct nfs4_client *clp) {
struct timespec tv;
u32 * p;
static void gen_confirm(struct nfs4_client *clp)
{
static u32 i;
u32 *p;
tv = CURRENT_TIME;
p = (u32 *)clp->cl_confirm.data;
*p++ = tv.tv_sec;
*p++ = tv.tv_nsec;
*p++ = get_seconds();
*p++ = i++;
}
static int
check_name(struct xdr_netobj name) {
static int check_name(struct xdr_netobj name)
{
if (name.len == 0)
return 0;
if (name.len > NFS4_OPAQUE_LIMIT) {
@ -683,39 +683,6 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
return;
}
/*
* RFC 3010 has a complex implmentation description of processing a
* SETCLIENTID request consisting of 5 bullets, labeled as
* CASE0 - CASE4 below.
*
* NOTES:
* callback information will be processed in a future patch
*
* an unconfirmed record is added when:
* NORMAL (part of CASE 4): there is no confirmed nor unconfirmed record.
* CASE 1: confirmed record found with matching name, principal,
* verifier, and clientid.
* CASE 2: confirmed record found with matching name, principal,
* and there is no unconfirmed record with matching
* name and principal
*
* an unconfirmed record is replaced when:
* CASE 3: confirmed record found with matching name, principal,
* and an unconfirmed record is found with matching
* name, principal, and with clientid and
* confirm that does not match the confirmed record.
* CASE 4: there is no confirmed record with matching name and
* principal. there is an unconfirmed record with
* matching name, principal.
*
* an unconfirmed record is deleted when:
* CASE 1: an unconfirmed record that matches input name, verifier,
* and confirmed clientid.
* CASE 4: any unconfirmed records with matching name and principal
* that exist after an unconfirmed record has been replaced
* as described above.
*
*/
__be32
nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_setclientid *setclid)
@ -748,11 +715,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfs4_lock_state();
conf = find_confirmed_client_by_str(dname, strhashval);
if (conf) {
/*
* CASE 0:
* clname match, confirmed, different principal
* or different ip_address
*/
/* RFC 3530 14.2.33 CASE 0: */
status = nfserr_clid_inuse;
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)
|| conf->cl_addr != sin->sin_addr.s_addr) {
@ -761,12 +724,17 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
}
/*
* section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION")
* has a description of SETCLIENTID request processing consisting
* of 5 bullet points, labeled as CASE0 - CASE4 below.
*/
unconf = find_unconfirmed_client_by_str(dname, strhashval);
status = nfserr_resource;
if (!conf) {
/*
* CASE 4:
* placed first, because it is the normal case.
/*
* RFC 3530 14.2.33 CASE 4:
* placed first, because it is the normal case
*/
if (unconf)
expire_client(unconf);
@ -776,17 +744,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
gen_clid(new);
} else if (same_verf(&conf->cl_verifier, &clverifier)) {
/*
* CASE 1:
* cl_name match, confirmed, principal match
* verifier match: probable callback update
*
* remove any unconfirmed nfs4_client with
* matching cl_name, cl_verifier, and cl_clientid
*
* create and insert an unconfirmed nfs4_client with same
* cl_name, cl_verifier, and cl_clientid as existing
* nfs4_client, but with the new callback info and a
* new cl_confirm
* RFC 3530 14.2.33 CASE 1:
* probable callback update
*/
if (unconf) {
/* Note this is removing unconfirmed {*x***},
@ -802,43 +761,25 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
copy_clid(new, conf);
} else if (!unconf) {
/*
* CASE 2:
* clname match, confirmed, principal match
* verfier does not match
* no unconfirmed. create a new unconfirmed nfs4_client
* using input clverifier, clname, and callback info
* and generate a new cl_clientid and cl_confirm.
* RFC 3530 14.2.33 CASE 2:
* probable client reboot; state will be removed if
* confirmed.
*/
new = create_client(clname, dname);
if (new == NULL)
goto out;
gen_clid(new);
} else if (!same_verf(&conf->cl_confirm, &unconf->cl_confirm)) {
/*
* CASE3:
* confirmed found (name, principal match)
* confirmed verifier does not match input clverifier
*
* unconfirmed found (name match)
* confirmed->cl_confirm != unconfirmed->cl_confirm
*
* remove unconfirmed.
*
* create an unconfirmed nfs4_client
* with same cl_name as existing confirmed nfs4_client,
* but with new callback info, new cl_clientid,
* new cl_verifier and a new cl_confirm
} else {
/*
* RFC 3530 14.2.33 CASE 3:
* probable client reboot; state will be removed if
* confirmed.
*/
expire_client(unconf);
new = create_client(clname, dname);
if (new == NULL)
goto out;
gen_clid(new);
} else {
/* No cases hit !!! */
status = nfserr_inval;
goto out;
}
copy_verf(new, &clverifier);
new->cl_addr = sin->sin_addr.s_addr;
@ -857,11 +798,9 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/*
* RFC 3010 has a complex implmentation description of processing a
* SETCLIENTID_CONFIRM request consisting of 4 bullets describing
* processing on a DRC miss, labeled as CASE1 - CASE4 below.
*
* NOTE: callback information will be processed here in a future patch
* Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has
* a description of SETCLIENTID_CONFIRM request processing consisting of 4
* bullets, labeled as CASE1 - CASE4 below.
*/
__be32
nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
@ -892,16 +831,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
if (unconf && unconf->cl_addr != sin->sin_addr.s_addr)
goto out;
if ((conf && unconf) &&
(same_verf(&unconf->cl_confirm, &confirm)) &&
(same_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
(same_name(conf->cl_recdir,unconf->cl_recdir)) &&
(!same_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
/* CASE 1:
* unconf record that matches input clientid and input confirm.
* conf record that matches input clientid.
* conf and unconf records match names, verifiers
*/
/*
* section 14.2.34 of RFC 3530 has a description of
* SETCLIENTID_CONFIRM request processing consisting
* of 4 bullet points, labeled as CASE1 - CASE4 below.
*/
if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) {
/*
* RFC 3530 14.2.34 CASE 1:
* callback update
*/
if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
status = nfserr_clid_inuse;
else {
@ -914,15 +853,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
status = nfs_ok;
}
} else if ((conf && !unconf) ||
((conf && unconf) &&
(!same_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
!same_name(conf->cl_recdir, unconf->cl_recdir)))) {
/* CASE 2:
* conf record that matches input clientid.
* if unconf record matches input clientid, then
* unconf->cl_name or unconf->cl_verifier don't match the
* conf record.
} else if (conf && !unconf) {
/*
* RFC 3530 14.2.34 CASE 2:
* probable retransmitted request; play it safe and
* do nothing.
*/
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred))
status = nfserr_clid_inuse;
@ -930,10 +865,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
status = nfs_ok;
} else if (!conf && unconf
&& same_verf(&unconf->cl_confirm, &confirm)) {
/* CASE 3:
* conf record not found.
* unconf record found.
* unconf->cl_confirm matches input confirm
/*
* RFC 3530 14.2.34 CASE 3:
* Normal case; new or rebooted client:
*/
if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
status = nfserr_clid_inuse;
@ -948,16 +882,15 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
}
move_to_confirmed(unconf);
conf = unconf;
nfsd4_probe_callback(conf);
status = nfs_ok;
}
} else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
&& (!unconf || (unconf && !same_verf(&unconf->cl_confirm,
&confirm)))) {
/* CASE 4:
* conf record not found, or if conf, conf->cl_confirm does not
* match input confirm.
* unconf record not found, or if unconf, unconf->cl_confirm
* does not match input confirm.
/*
* RFC 3530 14.2.34 CASE 4:
* Client probably hasn't noticed that we rebooted yet.
*/
status = nfserr_stale_clientid;
} else {
@ -965,8 +898,6 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
status = nfserr_clid_inuse;
}
out:
if (!status)
nfsd4_probe_callback(conf);
nfs4_unlock_state();
return status;
}
@ -1226,14 +1157,19 @@ find_file(struct inode *ino)
return NULL;
}
static int access_valid(u32 x)
static inline int access_valid(u32 x)
{
return (x > 0 && x < 4);
if (x < NFS4_SHARE_ACCESS_READ)
return 0;
if (x > NFS4_SHARE_ACCESS_BOTH)
return 0;
return 1;
}
static int deny_valid(u32 x)
static inline int deny_valid(u32 x)
{
return (x >= 0 && x < 5);
/* Note: unlike access bits, deny bits may be zero. */
return x <= NFS4_SHARE_DENY_BOTH;
}
static void
@ -2162,8 +2098,10 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
goto check_replay;
}
*stpp = stp;
*sopp = sop = stp->st_stateowner;
if (lock) {
struct nfs4_stateowner *sop = stp->st_stateowner;
clientid_t *lockclid = &lock->v.new.clientid;
struct nfs4_client *clp = sop->so_client;
int lkflg = 0;
@ -2193,9 +2131,6 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
return nfserr_bad_stateid;
}
*stpp = stp;
*sopp = sop = stp->st_stateowner;
/*
* We now validate the seqid and stateid generation numbers.
* For the moment, we ignore the possibility of

View file

@ -148,12 +148,12 @@ xdr_error: \
} \
} while (0)
static __be32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
{
/* We want more bytes than seem to be available.
* Maybe we need a new page, maybe we have just run out
*/
int avail = (char*)argp->end - (char*)argp->p;
unsigned int avail = (char *)argp->end - (char *)argp->p;
__be32 *p;
if (avail + argp->pagelen < nbytes)
return NULL;
@ -169,6 +169,11 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
return NULL;
}
/*
* The following memcpy is safe because read_buf is always
* called with nbytes > avail, and the two cases above both
* guarantee p points to at least nbytes bytes.
*/
memcpy(p, argp->p, avail);
/* step to next page */
argp->p = page_address(argp->pagelist[0]);
@ -1448,7 +1453,7 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
__be32
nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval,
struct svc_rqst *rqstp)
struct svc_rqst *rqstp, int ignore_crossmnt)
{
u32 bmval0 = bmval[0];
u32 bmval1 = bmval[1];
@ -1828,7 +1833,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
if ((buflen -= 8) < 0)
goto out_resource;
if (exp->ex_mnt->mnt_root->d_inode == dentry->d_inode) {
/*
* Get parent's attributes if not ignoring crossmount
* and this is the root of a cross-mounted filesystem.
*/
if (ignore_crossmnt == 0 &&
exp->ex_mnt->mnt_root->d_inode == dentry->d_inode) {
err = vfs_getattr(exp->ex_mnt->mnt_parent,
exp->ex_mnt->mnt_mountpoint, &stat);
if (err)
@ -1864,13 +1874,25 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
struct svc_export *exp = cd->rd_fhp->fh_export;
struct dentry *dentry;
__be32 nfserr;
int ignore_crossmnt = 0;
dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
if (IS_ERR(dentry))
return nfserrno(PTR_ERR(dentry));
exp_get(exp);
if (d_mountpoint(dentry)) {
/*
* In the case of a mountpoint, the client may be asking for
* attributes that are only properties of the underlying filesystem
* as opposed to the cross-mounted file system. In such a case,
* we will not follow the cross mount and will fill the attribtutes
* directly from the mountpoint dentry.
*/
if (d_mountpoint(dentry) &&
(cd->rd_bmval[0] & ~FATTR4_WORD0_RDATTR_ERROR) == 0 &&
(cd->rd_bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) == 0)
ignore_crossmnt = 1;
else if (d_mountpoint(dentry)) {
int err;
/*
@ -1889,7 +1911,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
}
nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
cd->rd_rqstp);
cd->rd_rqstp, ignore_crossmnt);
out_put:
dput(dentry);
exp_put(exp);
@ -2043,7 +2065,7 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2);
nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
resp->p, &buflen, getattr->ga_bmval,
resp->rqstp);
resp->rqstp, 0);
if (!nfserr)
resp->p += buflen;
return nfserr;

View file

@ -44,17 +44,17 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
*/
static DEFINE_SPINLOCK(cache_lock);
void
nfsd_cache_init(void)
int nfsd_reply_cache_init(void)
{
struct svc_cacherep *rp;
int i;
INIT_LIST_HEAD(&lru_head);
i = CACHESIZE;
while(i) {
while (i) {
rp = kmalloc(sizeof(*rp), GFP_KERNEL);
if (!rp) break;
if (!rp)
goto out_nomem;
list_add(&rp->c_lru, &lru_head);
rp->c_state = RC_UNUSED;
rp->c_type = RC_NOCACHE;
@ -62,23 +62,19 @@ nfsd_cache_init(void)
i--;
}
if (i)
printk (KERN_ERR "nfsd: cannot allocate all %d cache entries, only got %d\n",
CACHESIZE, CACHESIZE-i);
hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
if (!hash_list) {
nfsd_cache_shutdown();
printk (KERN_ERR "nfsd: cannot allocate %Zd bytes for hash list\n",
HASHSIZE * sizeof(struct hlist_head));
return;
}
if (!hash_list)
goto out_nomem;
cache_disabled = 0;
return 0;
out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
nfsd_reply_cache_shutdown();
return -ENOMEM;
}
void
nfsd_cache_shutdown(void)
void nfsd_reply_cache_shutdown(void)
{
struct svc_cacherep *rp;

View file

@ -304,6 +304,9 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
struct auth_domain *dom;
struct knfsd_fh fh;
if (size == 0)
return -EINVAL;
if (buf[size-1] != '\n')
return -EINVAL;
buf[size-1] = 0;
@ -503,7 +506,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
int len = 0;
lock_kernel();
if (nfsd_serv)
len = svc_sock_names(buf, nfsd_serv, NULL);
len = svc_xprt_names(nfsd_serv, buf, 0);
unlock_kernel();
return len;
}
@ -540,7 +543,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
}
return err < 0 ? err : 0;
}
if (buf[0] == '-') {
if (buf[0] == '-' && isdigit(buf[1])) {
char *toclose = kstrdup(buf+1, GFP_KERNEL);
int len = 0;
if (!toclose)
@ -554,6 +557,53 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
kfree(toclose);
return len;
}
/*
* Add a transport listener by writing it's transport name
*/
if (isalpha(buf[0])) {
int err;
char transport[16];
int port;
if (sscanf(buf, "%15s %4d", transport, &port) == 2) {
err = nfsd_create_serv();
if (!err) {
err = svc_create_xprt(nfsd_serv,
transport, port,
SVC_SOCK_ANONYMOUS);
if (err == -ENOENT)
/* Give a reasonable perror msg for
* bad transport string */
err = -EPROTONOSUPPORT;
}
return err < 0 ? err : 0;
}
}
/*
* Remove a transport by writing it's transport name and port number
*/
if (buf[0] == '-' && isalpha(buf[1])) {
struct svc_xprt *xprt;
int err = -EINVAL;
char transport[16];
int port;
if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
if (port == 0)
return -EINVAL;
lock_kernel();
if (nfsd_serv) {
xprt = svc_find_xprt(nfsd_serv, transport,
AF_UNSPEC, port);
if (xprt) {
svc_close_xprt(xprt);
svc_xprt_put(xprt);
err = 0;
} else
err = -ENOTCONN;
}
unlock_kernel();
return err < 0 ? err : 0;
}
}
return -EINVAL;
}
@ -616,7 +666,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
char *recdir;
int len, status;
if (size > PATH_MAX || buf[size-1] != '\n')
if (size == 0 || size > PATH_MAX || buf[size-1] != '\n')
return -EINVAL;
buf[size-1] = 0;
@ -674,6 +724,27 @@ static struct file_system_type nfsd_fs_type = {
.kill_sb = kill_litter_super,
};
#ifdef CONFIG_PROC_FS
static int create_proc_exports_entry(void)
{
struct proc_dir_entry *entry;
entry = proc_mkdir("fs/nfs", NULL);
if (!entry)
return -ENOMEM;
entry = create_proc_entry("fs/nfs/exports", 0, NULL);
if (!entry)
return -ENOMEM;
entry->proc_fops = &exports_operations;
return 0;
}
#else /* CONFIG_PROC_FS */
static int create_proc_exports_entry(void)
{
return 0;
}
#endif
static int __init init_nfsd(void)
{
int retval;
@ -683,32 +754,43 @@ static int __init init_nfsd(void)
if (retval)
return retval;
nfsd_stat_init(); /* Statistics */
nfsd_cache_init(); /* RPC reply cache */
nfsd_export_init(); /* Exports table */
retval = nfsd_reply_cache_init();
if (retval)
goto out_free_stat;
retval = nfsd_export_init();
if (retval)
goto out_free_cache;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
nfsd_idmap_init(); /* Name to ID mapping */
if (proc_mkdir("fs/nfs", NULL)) {
struct proc_dir_entry *entry;
entry = create_proc_entry("fs/nfs/exports", 0, NULL);
if (entry)
entry->proc_fops = &exports_operations;
}
retval = nfsd_idmap_init();
if (retval)
goto out_free_lockd;
retval = create_proc_exports_entry();
if (retval)
goto out_free_idmap;
retval = register_filesystem(&nfsd_fs_type);
if (retval) {
nfsd_export_shutdown();
nfsd_cache_shutdown();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown();
nfsd_lockd_shutdown();
}
if (retval)
goto out_free_all;
return 0;
out_free_all:
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
out_free_idmap:
nfsd_idmap_shutdown();
out_free_lockd:
nfsd_lockd_shutdown();
nfsd_export_shutdown();
out_free_cache:
nfsd_reply_cache_shutdown();
out_free_stat:
nfsd_stat_shutdown();
nfsd4_free_slabs();
return retval;
}
static void __exit exit_nfsd(void)
{
nfsd_export_shutdown();
nfsd_cache_shutdown();
nfsd_reply_cache_shutdown();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown();

View file

@ -22,6 +22,7 @@
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/nfsd/nfsd.h>
#include "auth.h"
#define NFSDDBG_FACILITY NFSDDBG_FH

View file

@ -155,8 +155,8 @@ static int killsig; /* signal that was used to kill last nfsd */
static void nfsd_last_thread(struct svc_serv *serv)
{
/* When last nfsd thread exits we need to do some clean-up */
struct svc_sock *svsk;
list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
struct svc_xprt *xprt;
list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list)
lockd_down();
nfsd_serv = NULL;
nfsd_racache_shutdown();
@ -236,7 +236,7 @@ static int nfsd_init_socks(int port)
error = lockd_up(IPPROTO_UDP);
if (error >= 0) {
error = svc_makesock(nfsd_serv, IPPROTO_UDP, port,
error = svc_create_xprt(nfsd_serv, "udp", port,
SVC_SOCK_DEFAULTS);
if (error < 0)
lockd_down();
@ -247,7 +247,7 @@ static int nfsd_init_socks(int port)
#ifdef CONFIG_NFSD_TCP
error = lockd_up(IPPROTO_TCP);
if (error >= 0) {
error = svc_makesock(nfsd_serv, IPPROTO_TCP, port,
error = svc_create_xprt(nfsd_serv, "tcp", port,
SVC_SOCK_DEFAULTS);
if (error < 0)
lockd_down();

View file

@ -15,6 +15,7 @@
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/xdr.h>
#include <linux/mm.h>
#include "auth.h"
#define NFSDDBG_FACILITY NFSDDBG_XDR
@ -62,10 +63,10 @@ encode_fh(__be32 *p, struct svc_fh *fhp)
* no slashes or null bytes.
*/
static __be32 *
decode_filename(__be32 *p, char **namp, int *lenp)
decode_filename(__be32 *p, char **namp, unsigned int *lenp)
{
char *name;
int i;
unsigned int i;
if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) {
for (i = 0, name = *namp; i < *lenp; i++, name++) {
@ -78,10 +79,10 @@ decode_filename(__be32 *p, char **namp, int *lenp)
}
static __be32 *
decode_pathname(__be32 *p, char **namp, int *lenp)
decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
{
char *name;
int i;
unsigned int i;
if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) {
for (i = 0, name = *namp; i < *lenp; i++, name++) {

View file

@ -132,7 +132,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
__be32
nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
const char *name, int len,
const char *name, unsigned int len,
struct svc_export **exp_ret, struct dentry **dentry_ret)
{
struct svc_export *exp;
@ -226,7 +226,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
*/
__be32
nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
int len, struct svc_fh *resfh)
unsigned int len, struct svc_fh *resfh)
{
struct svc_export *exp;
struct dentry *dentry;
@ -1151,6 +1151,26 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif /* CONFIG_NFSD_V3 */
__be32
nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
struct iattr *iap)
{
/*
* Mode has already been set earlier in create:
*/
iap->ia_valid &= ~ATTR_MODE;
/*
* Setting uid/gid works only for root. Irix appears to
* send along the gid on create when it tries to implement
* setgid directories via NFS:
*/
if (current->fsuid != 0)
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
if (iap->ia_valid)
return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
return 0;
}
/*
* Create a file (regular, directory, device, fifo); UNIX sockets
* not yet implemented.
@ -1167,6 +1187,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dchild = NULL;
struct inode *dirp;
__be32 err;
__be32 err2;
int host_err;
err = nfserr_perm;
@ -1257,16 +1278,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
/* Set file attributes. Mode has already been set and
* setting uid/gid works only for root. Irix appears to
* send along the gid when it tries to implement setgid
* directories via NFS.
*/
if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) {
__be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
if (err2)
err = err2;
}
err2 = nfsd_create_setattr(rqstp, resfhp, iap);
if (err2)
err = err2;
/*
* Update the file handle to get the new inode info.
*/
@ -1295,6 +1309,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dchild = NULL;
struct inode *dirp;
__be32 err;
__be32 err2;
int host_err;
__u32 v_mtime=0, v_atime=0;
@ -1399,16 +1414,10 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
iap->ia_atime.tv_nsec = 0;
}
/* Set file attributes.
* Irix appears to send along the gid when it tries to
* implement setgid directories via NFS. Clear out all that cruft.
*/
set_attr:
if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) {
__be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
if (err2)
err = err2;
}
err2 = nfsd_create_setattr(rqstp, resfhp, iap);
if (err2)
err = err2;
/*
* Update the filehandle to get the new inode info.

View file

@ -173,14 +173,17 @@ void nlmclnt_next_cookie(struct nlm_cookie *);
/*
* Host cache
*/
struct nlm_host * nlmclnt_lookup_host(const struct sockaddr_in *, int, int, const char *, int);
struct nlm_host * nlmsvc_lookup_host(struct svc_rqst *, const char *, int);
struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *, int, int,
const char *, unsigned int);
struct nlm_host *nlmsvc_lookup_host(struct svc_rqst *, const char *,
unsigned int);
struct rpc_clnt * nlm_bind_host(struct nlm_host *);
void nlm_rebind_host(struct nlm_host *);
struct nlm_host * nlm_get_host(struct nlm_host *);
void nlm_release_host(struct nlm_host *);
void nlm_shutdown_hosts(void);
extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, int, u32);
extern void nlm_host_rebooted(const struct sockaddr_in *, const char *,
unsigned int, u32);
void nsm_release(struct nsm_handle *);

View file

@ -29,7 +29,7 @@ struct svc_rqst;
/* Lock info passed via NLM */
struct nlm_lock {
char * caller;
int len; /* length of "caller" */
unsigned int len; /* length of "caller" */
struct nfs_fh fh;
struct xdr_netobj oh;
u32 svid;
@ -78,7 +78,7 @@ struct nlm_res {
*/
struct nlm_reboot {
char * mon;
int len;
unsigned int len;
u32 state;
__be32 addr;
__be32 vers;

View file

@ -4,4 +4,3 @@ unifdef-y += stats.h
unifdef-y += syscall.h
unifdef-y += nfsfh.h
unifdef-y += debug.h
unifdef-y += auth.h

View file

@ -72,8 +72,8 @@ enum {
*/
#define RC_DELAY (HZ/5)
void nfsd_cache_init(void);
void nfsd_cache_shutdown(void);
int nfsd_reply_cache_init(void);
void nfsd_reply_cache_shutdown(void);
int nfsd_cache_lookup(struct svc_rqst *, int);
void nfsd_cache_update(struct svc_rqst *, int, __be32 *);

View file

@ -122,7 +122,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
/*
* Function declarations
*/
void nfsd_export_init(void);
int nfsd_export_init(void);
void nfsd_export_shutdown(void);
void nfsd_export_flush(void);
void exp_readlock(void);

View file

@ -20,7 +20,6 @@
#include <linux/nfsd/debug.h>
#include <linux/nfsd/nfsfh.h>
#include <linux/nfsd/export.h>
#include <linux/nfsd/auth.h>
#include <linux/nfsd/stats.h>
/*
* nfsd version
@ -70,9 +69,9 @@ void nfsd_racache_shutdown(void);
int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
struct svc_export **expp);
__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
const char *, int, struct svc_fh *);
const char *, unsigned int, struct svc_fh *);
__be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
const char *, int,
const char *, unsigned int,
struct svc_export **, struct dentry **);
__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
struct iattr *, int, time_t);

View file

@ -18,7 +18,6 @@
#include <linux/nfsd/const.h>
#include <linux/nfsd/export.h>
#include <linux/nfsd/nfsfh.h>
#include <linux/nfsd/auth.h>
/*
* Version of the syscall interface

View file

@ -23,7 +23,7 @@ struct nfsd_sattrargs {
struct nfsd_diropargs {
struct svc_fh fh;
char * name;
int len;
unsigned int len;
};
struct nfsd_readargs {
@ -43,17 +43,17 @@ struct nfsd_writeargs {
struct nfsd_createargs {
struct svc_fh fh;
char * name;
int len;
unsigned int len;
struct iattr attrs;
};
struct nfsd_renameargs {
struct svc_fh ffh;
char * fname;
int flen;
unsigned int flen;
struct svc_fh tfh;
char * tname;
int tlen;
unsigned int tlen;
};
struct nfsd_readlinkargs {
@ -65,15 +65,15 @@ struct nfsd_linkargs {
struct svc_fh ffh;
struct svc_fh tfh;
char * tname;
int tlen;
unsigned int tlen;
};
struct nfsd_symlinkargs {
struct svc_fh ffh;
char * fname;
int flen;
unsigned int flen;
char * tname;
int tlen;
unsigned int tlen;
struct iattr attrs;
};

View file

@ -21,7 +21,7 @@ struct nfsd3_sattrargs {
struct nfsd3_diropargs {
struct svc_fh fh;
char * name;
int len;
unsigned int len;
};
struct nfsd3_accessargs {
@ -48,7 +48,7 @@ struct nfsd3_writeargs {
struct nfsd3_createargs {
struct svc_fh fh;
char * name;
int len;
unsigned int len;
int createmode;
struct iattr attrs;
__be32 * verf;
@ -57,7 +57,7 @@ struct nfsd3_createargs {
struct nfsd3_mknodargs {
struct svc_fh fh;
char * name;
int len;
unsigned int len;
__u32 ftype;
__u32 major, minor;
struct iattr attrs;
@ -66,10 +66,10 @@ struct nfsd3_mknodargs {
struct nfsd3_renameargs {
struct svc_fh ffh;
char * fname;
int flen;
unsigned int flen;
struct svc_fh tfh;
char * tname;
int tlen;
unsigned int tlen;
};
struct nfsd3_readlinkargs {
@ -81,15 +81,15 @@ struct nfsd3_linkargs {
struct svc_fh ffh;
struct svc_fh tfh;
char * tname;
int tlen;
unsigned int tlen;
};
struct nfsd3_symlinkargs {
struct svc_fh ffh;
char * fname;
int flen;
unsigned int flen;
char * tname;
int tlen;
unsigned int tlen;
struct iattr attrs;
};

View file

@ -441,7 +441,7 @@ void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
struct dentry *dentry, __be32 *buffer, int *countp,
u32 *bmval, struct svc_rqst *);
u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *,
struct nfsd4_setclientid *setclid);

View file

@ -44,11 +44,16 @@
#define IDMAP_NAMESZ 128
#ifdef CONFIG_NFSD_V4
void nfsd_idmap_init(void);
int nfsd_idmap_init(void);
void nfsd_idmap_shutdown(void);
#else
static inline void nfsd_idmap_init(void) {};
static inline void nfsd_idmap_shutdown(void) {};
static inline int nfsd_idmap_init(void)
{
return 0;
}
static inline void nfsd_idmap_shutdown(void)
{
}
#endif
int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *);

View file

@ -169,8 +169,8 @@ extern int cache_check(struct cache_detail *detail,
extern void cache_flush(void);
extern void cache_purge(struct cache_detail *detail);
#define NEVER (0x7FFFFFFF)
extern void cache_register(struct cache_detail *cd);
extern int cache_unregister(struct cache_detail *cd);
extern int cache_register(struct cache_detail *cd);
extern void cache_unregister(struct cache_detail *cd);
extern void qword_add(char **bpp, int *lp, char *str);
extern void qword_addhex(char **bpp, int *lp, char *buf, int blen);

View file

@ -20,7 +20,7 @@
#define RPCDBG_BIND 0x0020
#define RPCDBG_SCHED 0x0040
#define RPCDBG_TRANS 0x0080
#define RPCDBG_SVCSOCK 0x0100
#define RPCDBG_SVCXPRT 0x0100
#define RPCDBG_SVCDSP 0x0200
#define RPCDBG_MISC 0x0400
#define RPCDBG_CACHE 0x0800

View file

@ -204,7 +204,7 @@ union svc_addr_u {
struct svc_rqst {
struct list_head rq_list; /* idle list */
struct list_head rq_all; /* all threads list */
struct svc_sock * rq_sock; /* socket */
struct svc_xprt * rq_xprt; /* transport ptr */
struct sockaddr_storage rq_addr; /* peer address */
size_t rq_addrlen;
@ -214,9 +214,10 @@ struct svc_rqst {
struct auth_ops * rq_authop; /* authentication flavour */
u32 rq_flavor; /* pseudoflavor */
struct svc_cred rq_cred; /* auth info */
struct sk_buff * rq_skbuff; /* fast recv inet buffer */
void * rq_xprt_ctxt; /* transport specific context ptr */
struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
size_t rq_xprt_hlen; /* xprt header len */
struct xdr_buf rq_arg;
struct xdr_buf rq_res;
struct page * rq_pages[RPCSVC_MAXPAGES];
@ -317,11 +318,12 @@ static inline void svc_free_res_pages(struct svc_rqst *rqstp)
struct svc_deferred_req {
u32 prot; /* protocol (UDP or TCP) */
struct svc_sock *svsk;
struct svc_xprt *xprt;
struct sockaddr_storage addr; /* where reply must go */
size_t addrlen;
union svc_addr_u daddr; /* where reply must come from */
struct cache_deferred_req handle;
size_t xprt_hlen;
int argslen;
__be32 args[0];
};
@ -382,6 +384,8 @@ struct svc_procedure {
*/
struct svc_serv * svc_create(struct svc_program *, unsigned int,
void (*shutdown)(struct svc_serv*));
struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
struct svc_pool *pool);
int svc_create_thread(svc_thread_fn, struct svc_serv *);
void svc_exit_thread(struct svc_rqst *);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,

View file

@ -0,0 +1,262 @@
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#ifndef SVC_RDMA_H
#define SVC_RDMA_H
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#define SVCRDMA_DEBUG
/* RPC/RDMA parameters and stats */
extern unsigned int svcrdma_ord;
extern unsigned int svcrdma_max_requests;
extern unsigned int svcrdma_max_req_size;
extern atomic_t rdma_stat_recv;
extern atomic_t rdma_stat_read;
extern atomic_t rdma_stat_write;
extern atomic_t rdma_stat_sq_starve;
extern atomic_t rdma_stat_rq_starve;
extern atomic_t rdma_stat_rq_poll;
extern atomic_t rdma_stat_rq_prod;
extern atomic_t rdma_stat_sq_poll;
extern atomic_t rdma_stat_sq_prod;
#define RPCRDMA_VERSION 1
/*
* Contexts are built when an RDMA request is created and are a
* record of the resources that can be recovered when the request
* completes.
*/
struct svc_rdma_op_ctxt {
struct svc_rdma_op_ctxt *next;
struct xdr_buf arg;
struct list_head dto_q;
enum ib_wr_opcode wr_op;
enum ib_wc_status wc_status;
u32 byte_len;
struct svcxprt_rdma *xprt;
unsigned long flags;
enum dma_data_direction direction;
int count;
struct ib_sge sge[RPCSVC_MAXPAGES];
struct page *pages[RPCSVC_MAXPAGES];
};
#define RDMACTXT_F_READ_DONE 1
#define RDMACTXT_F_LAST_CTXT 2
struct svcxprt_rdma {
struct svc_xprt sc_xprt; /* SVC transport structure */
struct rdma_cm_id *sc_cm_id; /* RDMA connection id */
struct list_head sc_accept_q; /* Conn. waiting accept */
int sc_ord; /* RDMA read limit */
wait_queue_head_t sc_read_wait;
int sc_max_sge;
int sc_sq_depth; /* Depth of SQ */
atomic_t sc_sq_count; /* Number of SQ WR on queue */
int sc_max_requests; /* Depth of RQ */
int sc_max_req_size; /* Size of each RQ WR buf */
struct ib_pd *sc_pd;
struct svc_rdma_op_ctxt *sc_ctxt_head;
int sc_ctxt_cnt;
int sc_ctxt_bump;
int sc_ctxt_max;
spinlock_t sc_ctxt_lock;
struct list_head sc_rq_dto_q;
spinlock_t sc_rq_dto_lock;
struct ib_qp *sc_qp;
struct ib_cq *sc_rq_cq;
struct ib_cq *sc_sq_cq;
struct ib_mr *sc_phys_mr; /* MR for server memory */
spinlock_t sc_lock; /* transport lock */
wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */
unsigned long sc_flags;
struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */
struct list_head sc_read_complete_q;
spinlock_t sc_read_complete_lock;
};
/* sc_flags */
#define RDMAXPRT_RQ_PENDING 1
#define RDMAXPRT_SQ_PENDING 2
#define RDMAXPRT_CONN_PENDING 3
#define RPCRDMA_LISTEN_BACKLOG 10
/* The default ORD value is based on two outstanding full-size writes with a
* page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */
#define RPCRDMA_ORD (64/4)
#define RPCRDMA_SQ_DEPTH_MULT 8
#define RPCRDMA_MAX_THREADS 16
#define RPCRDMA_MAX_REQUESTS 16
#define RPCRDMA_MAX_REQ_SIZE 4096
/* svc_rdma_marshal.c */
extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *,
int *, int *);
extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *);
extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
struct rpcrdma_msg *,
enum rpcrdma_errcode, u32 *);
extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
u32, u64, u32);
extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *,
struct rpcrdma_msg *,
struct rpcrdma_msg *,
enum rpcrdma_proc);
extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *);
/* svc_rdma_recvfrom.c */
extern int svc_rdma_recvfrom(struct svc_rqst *);
/* svc_rdma_sendto.c */
extern int svc_rdma_sendto(struct svc_rqst *);
/* svc_rdma_transport.c */
extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
extern int svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
enum rpcrdma_errcode);
struct page *svc_rdma_get_page(void);
extern int svc_rdma_post_recv(struct svcxprt_rdma *);
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
extern void svc_sq_reap(struct svcxprt_rdma *);
extern void svc_rq_reap(struct svcxprt_rdma *);
extern struct svc_xprt_class svc_rdma_class;
extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);
/* svc_rdma.c */
extern int svc_rdma_init(void);
extern void svc_rdma_cleanup(void);
/*
* Returns the address of the first read chunk or <nul> if no read chunk is
* present
*/
static inline struct rpcrdma_read_chunk *
svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
{
struct rpcrdma_read_chunk *ch =
(struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
if (ch->rc_discrim == 0)
return NULL;
return ch;
}
/*
* Returns the address of the first read write array element or <nul> if no
* write array list is present
*/
static inline struct rpcrdma_write_array *
svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
{
if (rmsgp->rm_body.rm_chunks[0] != 0
|| rmsgp->rm_body.rm_chunks[1] == 0)
return NULL;
return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1];
}
/*
* Returns the address of the first reply array element or <nul> if no
* reply array is present
*/
static inline struct rpcrdma_write_array *
svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp)
{
struct rpcrdma_read_chunk *rch;
struct rpcrdma_write_array *wr_ary;
struct rpcrdma_write_array *rp_ary;
/* XXX: Need to fix when reply list may occur with read-list and/or
* write list */
if (rmsgp->rm_body.rm_chunks[0] != 0 ||
rmsgp->rm_body.rm_chunks[1] != 0)
return NULL;
rch = svc_rdma_get_read_chunk(rmsgp);
if (rch) {
while (rch->rc_discrim)
rch++;
/* The reply list follows an empty write array located
* at 'rc_position' here. The reply array is at rc_target.
*/
rp_ary = (struct rpcrdma_write_array *)&rch->rc_target;
goto found_it;
}
wr_ary = svc_rdma_get_write_array(rmsgp);
if (wr_ary) {
rp_ary = (struct rpcrdma_write_array *)
&wr_ary->
wc_array[wr_ary->wc_nchunks].wc_target.rs_length;
goto found_it;
}
/* No read list, no write list */
rp_ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[2];
found_it:
if (rp_ary->wc_discrim == 0)
return NULL;
return rp_ary;
}
#endif

View file

@ -0,0 +1,159 @@
/*
* linux/include/linux/sunrpc/svc_xprt.h
*
* RPC server transport I/O
*/
#ifndef SUNRPC_SVC_XPRT_H
#define SUNRPC_SVC_XPRT_H
#include <linux/sunrpc/svc.h>
#include <linux/module.h>
struct svc_xprt_ops {
struct svc_xprt *(*xpo_create)(struct svc_serv *,
struct sockaddr *, int,
int);
struct svc_xprt *(*xpo_accept)(struct svc_xprt *);
int (*xpo_has_wspace)(struct svc_xprt *);
int (*xpo_recvfrom)(struct svc_rqst *);
void (*xpo_prep_reply_hdr)(struct svc_rqst *);
int (*xpo_sendto)(struct svc_rqst *);
void (*xpo_release_rqst)(struct svc_rqst *);
void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *);
};
struct svc_xprt_class {
const char *xcl_name;
struct module *xcl_owner;
struct svc_xprt_ops *xcl_ops;
struct list_head xcl_list;
u32 xcl_max_payload;
};
struct svc_xprt {
struct svc_xprt_class *xpt_class;
struct svc_xprt_ops *xpt_ops;
struct kref xpt_ref;
struct list_head xpt_list;
struct list_head xpt_ready;
unsigned long xpt_flags;
#define XPT_BUSY 0 /* enqueued/receiving */
#define XPT_CONN 1 /* conn pending */
#define XPT_CLOSE 2 /* dead or dying */
#define XPT_DATA 3 /* data pending */
#define XPT_TEMP 4 /* connected transport */
#define XPT_DEAD 6 /* transport closed */
#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */
#define XPT_DEFERRED 8 /* deferred request pending */
#define XPT_OLD 9 /* used for xprt aging mark+sweep */
#define XPT_DETACHED 10 /* detached from tempsocks list */
#define XPT_LISTENER 11 /* listening endpoint */
#define XPT_CACHE_AUTH 12 /* cache auth info */
struct svc_pool *xpt_pool; /* current pool iff queued */
struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
struct mutex xpt_mutex; /* to serialize sending data */
spinlock_t xpt_lock; /* protects sk_deferred
* and xpt_auth_cache */
void *xpt_auth_cache;/* auth cache */
struct list_head xpt_deferred; /* deferred requests that need
* to be revisted */
struct sockaddr_storage xpt_local; /* local address */
size_t xpt_locallen; /* length of address */
struct sockaddr_storage xpt_remote; /* remote peer's address */
size_t xpt_remotelen; /* length of address */
};
int svc_reg_xprt_class(struct svc_xprt_class *);
void svc_unreg_xprt_class(struct svc_xprt_class *);
void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
struct svc_serv *);
int svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
void svc_xprt_enqueue(struct svc_xprt *xprt);
void svc_xprt_received(struct svc_xprt *);
void svc_xprt_put(struct svc_xprt *xprt);
void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
void svc_close_xprt(struct svc_xprt *xprt);
void svc_delete_xprt(struct svc_xprt *xprt);
int svc_port_is_privileged(struct sockaddr *sin);
int svc_print_xprts(char *buf, int maxlen);
struct svc_xprt *svc_find_xprt(struct svc_serv *, char *, int, int);
int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen);
static inline void svc_xprt_get(struct svc_xprt *xprt)
{
kref_get(&xprt->xpt_ref);
}
static inline void svc_xprt_set_local(struct svc_xprt *xprt,
struct sockaddr *sa, int salen)
{
memcpy(&xprt->xpt_local, sa, salen);
xprt->xpt_locallen = salen;
}
static inline void svc_xprt_set_remote(struct svc_xprt *xprt,
struct sockaddr *sa, int salen)
{
memcpy(&xprt->xpt_remote, sa, salen);
xprt->xpt_remotelen = salen;
}
static inline unsigned short svc_addr_port(struct sockaddr *sa)
{
unsigned short ret = 0;
switch (sa->sa_family) {
case AF_INET:
ret = ntohs(((struct sockaddr_in *)sa)->sin_port);
break;
case AF_INET6:
ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
break;
}
return ret;
}
static inline size_t svc_addr_len(struct sockaddr *sa)
{
switch (sa->sa_family) {
case AF_INET:
return sizeof(struct sockaddr_in);
case AF_INET6:
return sizeof(struct sockaddr_in6);
}
return -EAFNOSUPPORT;
}
static inline unsigned short svc_xprt_local_port(struct svc_xprt *xprt)
{
return svc_addr_port((struct sockaddr *)&xprt->xpt_local);
}
static inline unsigned short svc_xprt_remote_port(struct svc_xprt *xprt)
{
return svc_addr_port((struct sockaddr *)&xprt->xpt_remote);
}
static inline char *__svc_print_addr(struct sockaddr *addr,
char *buf, size_t len)
{
switch (addr->sa_family) {
case AF_INET:
snprintf(buf, len, "%u.%u.%u.%u, port=%u",
NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
ntohs(((struct sockaddr_in *) addr)->sin_port));
break;
case AF_INET6:
snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
ntohs(((struct sockaddr_in6 *) addr)->sin6_port));
break;
default:
snprintf(buf, len, "unknown address type: %d", addr->sa_family);
break;
}
return buf;
}
#endif /* SUNRPC_SVC_XPRT_H */

View file

@ -10,42 +10,16 @@
#define SUNRPC_SVCSOCK_H
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svc_xprt.h>
/*
* RPC server socket.
*/
struct svc_sock {
struct list_head sk_ready; /* list of ready sockets */
struct list_head sk_list; /* list of all sockets */
struct svc_xprt sk_xprt;
struct socket * sk_sock; /* berkeley socket layer */
struct sock * sk_sk; /* INET layer */
struct svc_pool * sk_pool; /* current pool iff queued */
struct svc_serv * sk_server; /* service for this socket */
atomic_t sk_inuse; /* use count */
unsigned long sk_flags;
#define SK_BUSY 0 /* enqueued/receiving */
#define SK_CONN 1 /* conn pending */
#define SK_CLOSE 2 /* dead or dying */
#define SK_DATA 3 /* data pending */
#define SK_TEMP 4 /* temp (TCP) socket */
#define SK_DEAD 6 /* socket closed */
#define SK_CHNGBUF 7 /* need to change snd/rcv buffer sizes */
#define SK_DEFERRED 8 /* request on sk_deferred */
#define SK_OLD 9 /* used for temp socket aging mark+sweep */
#define SK_DETACHED 10 /* detached from tempsocks list */
atomic_t sk_reserved; /* space on outq that is reserved */
spinlock_t sk_lock; /* protects sk_deferred and
* sk_info_authunix */
struct list_head sk_deferred; /* deferred requests that need to
* be revisted */
struct mutex sk_mutex; /* to serialize sending data */
int (*sk_recvfrom)(struct svc_rqst *rqstp);
int (*sk_sendto)(struct svc_rqst *rqstp);
/* We keep the old state_change and data_ready CB's here */
void (*sk_ostate)(struct sock *);
void (*sk_odata)(struct sock *, int bytes);
@ -54,21 +28,12 @@ struct svc_sock {
/* private TCP part */
int sk_reclen; /* length of record */
int sk_tcplen; /* current read length */
time_t sk_lastrecv; /* time of last received request */
/* cache of various info for TCP sockets */
void *sk_info_authunix;
struct sockaddr_storage sk_local; /* local address */
struct sockaddr_storage sk_remote; /* remote peer's address */
int sk_remotelen; /* length of address */
};
/*
* Function prototypes.
*/
int svc_makesock(struct svc_serv *, int, unsigned short, int flags);
void svc_force_close_socket(struct svc_sock *);
void svc_close_all(struct list_head *);
int svc_recv(struct svc_rqst *, long);
int svc_send(struct svc_rqst *);
void svc_drop(struct svc_rqst *);
@ -78,6 +43,8 @@ int svc_addsock(struct svc_serv *serv,
int fd,
char *name_return,
int *proto);
void svc_init_xprt_sock(void);
void svc_cleanup_xprt_sock(void);
/*
* svc_makesock socket characteristics

View file

@ -112,7 +112,8 @@ struct xdr_buf {
__be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len);
__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len);
__be32 *xdr_encode_string(__be32 *p, const char *s);
__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen);
__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp,
unsigned int maxlen);
__be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *);
__be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *);

View file

@ -11,6 +11,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o \
svc.o svcsock.o svcauth.o svcauth_unix.o \
rpcb_clnt.o timer.o xdr.o \
sunrpc_syms.o cache.o rpc_pipe.o
sunrpc_syms.o cache.o rpc_pipe.o \
svc_xprt.o
sunrpc-$(CONFIG_PROC_FS) += stats.o
sunrpc-$(CONFIG_SYSCTL) += sysctl.o

View file

@ -224,38 +224,34 @@ static int rsi_parse(struct cache_detail *cd,
/* major/minor */
len = qword_get(&mesg, buf, mlen);
if (len <= 0)
goto out;
rsii.major_status = simple_strtoul(buf, &ep, 10);
if (*ep)
goto out;
len = qword_get(&mesg, buf, mlen);
if (len <= 0)
goto out;
rsii.minor_status = simple_strtoul(buf, &ep, 10);
if (*ep)
goto out;
/* out_handle */
len = qword_get(&mesg, buf, mlen);
if (len < 0)
goto out;
if (len == 0) {
status = -ENOMEM;
if (dup_to_netobj(&rsii.out_handle, buf, len))
goto out;
} else {
rsii.major_status = simple_strtoul(buf, &ep, 10);
if (*ep)
goto out;
len = qword_get(&mesg, buf, mlen);
if (len <= 0)
goto out;
rsii.minor_status = simple_strtoul(buf, &ep, 10);
if (*ep)
goto out;
/* out_handle */
len = qword_get(&mesg, buf, mlen);
if (len < 0)
goto out;
status = -ENOMEM;
if (dup_to_netobj(&rsii.out_handle, buf, len))
goto out;
/* out_token */
len = qword_get(&mesg, buf, mlen);
status = -EINVAL;
if (len < 0)
goto out;
status = -ENOMEM;
if (dup_to_netobj(&rsii.out_token, buf, len))
goto out;
}
/* out_token */
len = qword_get(&mesg, buf, mlen);
status = -EINVAL;
if (len < 0)
goto out;
status = -ENOMEM;
if (dup_to_netobj(&rsii.out_token, buf, len))
goto out;
rsii.h.expiry_time = expiry;
rsip = rsi_update(&rsii, rsip);
status = 0;
@ -975,6 +971,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
struct kvec *resv = &rqstp->rq_res.head[0];
struct xdr_netobj tmpobj;
struct rsi *rsip, rsikey;
int ret;
/* Read the verifier; should be NULL: */
*authp = rpc_autherr_badverf;
@ -1014,23 +1011,27 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
/* No upcall result: */
return SVC_DROP;
case 0:
ret = SVC_DROP;
/* Got an answer to the upcall; use it: */
if (gss_write_init_verf(rqstp, rsip))
return SVC_DROP;
goto out;
if (resv->iov_len + 4 > PAGE_SIZE)
return SVC_DROP;
goto out;
svc_putnl(resv, RPC_SUCCESS);
if (svc_safe_putnetobj(resv, &rsip->out_handle))
return SVC_DROP;
goto out;
if (resv->iov_len + 3 * 4 > PAGE_SIZE)
return SVC_DROP;
goto out;
svc_putnl(resv, rsip->major_status);
svc_putnl(resv, rsip->minor_status);
svc_putnl(resv, GSS_SEQ_WIN);
if (svc_safe_putnetobj(resv, &rsip->out_token))
return SVC_DROP;
goto out;
}
return SVC_COMPLETE;
ret = SVC_COMPLETE;
out:
cache_put(&rsip->h, &rsi_cache);
return ret;
}
/*
@ -1125,6 +1126,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
case RPC_GSS_PROC_DESTROY:
if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
goto auth_err;
rsci->h.expiry_time = get_seconds();
set_bit(CACHE_NEGATIVE, &rsci->h.flags);
if (resv->iov_len + 4 > PAGE_SIZE)
goto drop;
@ -1386,19 +1388,26 @@ int
gss_svc_init(void)
{
int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss);
if (rv == 0) {
cache_register(&rsc_cache);
cache_register(&rsi_cache);
}
if (rv)
return rv;
rv = cache_register(&rsc_cache);
if (rv)
goto out1;
rv = cache_register(&rsi_cache);
if (rv)
goto out2;
return 0;
out2:
cache_unregister(&rsc_cache);
out1:
svc_auth_unregister(RPC_AUTH_GSS);
return rv;
}
void
gss_svc_shutdown(void)
{
if (cache_unregister(&rsc_cache))
printk(KERN_ERR "auth_rpcgss: failed to unregister rsc cache\n");
if (cache_unregister(&rsi_cache))
printk(KERN_ERR "auth_rpcgss: failed to unregister rsi cache\n");
cache_unregister(&rsc_cache);
cache_unregister(&rsi_cache);
svc_auth_unregister(RPC_AUTH_GSS);
}

View file

@ -245,6 +245,7 @@ int cache_check(struct cache_detail *detail,
cache_put(h, detail);
return rv;
}
EXPORT_SYMBOL(cache_check);
/*
* caches need to be periodically cleaned.
@ -290,44 +291,78 @@ static const struct file_operations cache_flush_operations;
static void do_cache_clean(struct work_struct *work);
static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean);
void cache_register(struct cache_detail *cd)
static void remove_cache_proc_entries(struct cache_detail *cd)
{
if (cd->proc_ent == NULL)
return;
if (cd->flush_ent)
remove_proc_entry("flush", cd->proc_ent);
if (cd->channel_ent)
remove_proc_entry("channel", cd->proc_ent);
if (cd->content_ent)
remove_proc_entry("content", cd->proc_ent);
cd->proc_ent = NULL;
remove_proc_entry(cd->name, proc_net_rpc);
}
#ifdef CONFIG_PROC_FS
static int create_cache_proc_entries(struct cache_detail *cd)
{
struct proc_dir_entry *p;
cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc);
if (cd->proc_ent) {
struct proc_dir_entry *p;
cd->proc_ent->owner = cd->owner;
cd->channel_ent = cd->content_ent = NULL;
if (cd->proc_ent == NULL)
goto out_nomem;
cd->proc_ent->owner = cd->owner;
cd->channel_ent = cd->content_ent = NULL;
p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR,
p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, cd->proc_ent);
cd->flush_ent = p;
if (p == NULL)
goto out_nomem;
p->proc_fops = &cache_flush_operations;
p->owner = cd->owner;
p->data = cd;
if (cd->cache_request || cd->cache_parse) {
p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR,
cd->proc_ent);
cd->flush_ent = p;
if (p) {
p->proc_fops = &cache_flush_operations;
p->owner = cd->owner;
p->data = cd;
}
if (cd->cache_request || cd->cache_parse) {
p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR,
cd->proc_ent);
cd->channel_ent = p;
if (p) {
p->proc_fops = &cache_file_operations;
p->owner = cd->owner;
p->data = cd;
}
}
if (cd->cache_show) {
p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR,
cd->proc_ent);
cd->content_ent = p;
if (p) {
p->proc_fops = &content_file_operations;
p->owner = cd->owner;
p->data = cd;
}
}
cd->channel_ent = p;
if (p == NULL)
goto out_nomem;
p->proc_fops = &cache_file_operations;
p->owner = cd->owner;
p->data = cd;
}
if (cd->cache_show) {
p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR,
cd->proc_ent);
cd->content_ent = p;
if (p == NULL)
goto out_nomem;
p->proc_fops = &content_file_operations;
p->owner = cd->owner;
p->data = cd;
}
return 0;
out_nomem:
remove_cache_proc_entries(cd);
return -ENOMEM;
}
#else /* CONFIG_PROC_FS */
static int create_cache_proc_entries(struct cache_detail *cd)
{
return 0;
}
#endif
int cache_register(struct cache_detail *cd)
{
int ret;
ret = create_cache_proc_entries(cd);
if (ret)
return ret;
rwlock_init(&cd->hash_lock);
INIT_LIST_HEAD(&cd->queue);
spin_lock(&cache_list_lock);
@ -341,9 +376,11 @@ void cache_register(struct cache_detail *cd)
/* start the cleaning process */
schedule_delayed_work(&cache_cleaner, 0);
return 0;
}
EXPORT_SYMBOL(cache_register);
int cache_unregister(struct cache_detail *cd)
void cache_unregister(struct cache_detail *cd)
{
cache_purge(cd);
spin_lock(&cache_list_lock);
@ -351,30 +388,23 @@ int cache_unregister(struct cache_detail *cd)
if (cd->entries || atomic_read(&cd->inuse)) {
write_unlock(&cd->hash_lock);
spin_unlock(&cache_list_lock);
return -EBUSY;
goto out;
}
if (current_detail == cd)
current_detail = NULL;
list_del_init(&cd->others);
write_unlock(&cd->hash_lock);
spin_unlock(&cache_list_lock);
if (cd->proc_ent) {
if (cd->flush_ent)
remove_proc_entry("flush", cd->proc_ent);
if (cd->channel_ent)
remove_proc_entry("channel", cd->proc_ent);
if (cd->content_ent)
remove_proc_entry("content", cd->proc_ent);
cd->proc_ent = NULL;
remove_proc_entry(cd->name, proc_net_rpc);
}
remove_cache_proc_entries(cd);
if (list_empty(&cache_list)) {
/* module must be being unloaded so its safe to kill the worker */
cancel_delayed_work_sync(&cache_cleaner);
}
return 0;
return;
out:
printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name);
}
EXPORT_SYMBOL(cache_unregister);
/* clean cache tries to find something to clean
* and cleans it.
@ -489,6 +519,7 @@ void cache_flush(void)
while (cache_clean() != -1)
cond_resched();
}
EXPORT_SYMBOL(cache_flush);
void cache_purge(struct cache_detail *detail)
{
@ -497,7 +528,7 @@ void cache_purge(struct cache_detail *detail)
cache_flush();
detail->flush_time = 1;
}
EXPORT_SYMBOL(cache_purge);
/*
@ -634,13 +665,13 @@ void cache_clean_deferred(void *owner)
/*
* communicate with user-space
*
* We have a magic /proc file - /proc/sunrpc/cache
* On read, you get a full request, or block
* On write, an update request is processed
* Poll works if anything to read, and always allows write
* We have a magic /proc file - /proc/sunrpc/<cachename>/channel.
* On read, you get a full request, or block.
* On write, an update request is processed.
* Poll works if anything to read, and always allows write.
*
* Implemented by linked list of requests. Each open file has
* a ->private that also exists in this list. New request are added
* a ->private that also exists in this list. New requests are added
* to the end and may wakeup and preceding readers.
* New readers are added to the head. If, on read, an item is found with
* CACHE_UPCALLING clear, we free it from the list.
@ -963,6 +994,7 @@ void qword_add(char **bpp, int *lp, char *str)
*bpp = bp;
*lp = len;
}
EXPORT_SYMBOL(qword_add);
void qword_addhex(char **bpp, int *lp, char *buf, int blen)
{
@ -991,6 +1023,7 @@ void qword_addhex(char **bpp, int *lp, char *buf, int blen)
*bpp = bp;
*lp = len;
}
EXPORT_SYMBOL(qword_addhex);
static void warn_no_listener(struct cache_detail *detail)
{
@ -1113,6 +1146,7 @@ int qword_get(char **bpp, char *dest, int bufsize)
*dest = '\0';
return len;
}
EXPORT_SYMBOL(qword_get);
/*
@ -1244,18 +1278,18 @@ static ssize_t read_flush(struct file *file, char __user *buf,
struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data;
char tbuf[20];
unsigned long p = *ppos;
int len;
size_t len;
sprintf(tbuf, "%lu\n", cd->flush_time);
len = strlen(tbuf);
if (p >= len)
return 0;
len -= p;
if (len > count) len = count;
if (len > count)
len = count;
if (copy_to_user(buf, (void*)(tbuf+p), len))
len = -EFAULT;
else
*ppos += len;
return -EFAULT;
*ppos += len;
return len;
}

View file

@ -33,7 +33,7 @@ struct proc_dir_entry *proc_net_rpc = NULL;
static int rpc_proc_show(struct seq_file *seq, void *v) {
const struct rpc_stat *statp = seq->private;
const struct rpc_program *prog = statp->program;
int i, j;
unsigned int i, j;
seq_printf(seq,
"net %u %u %u %u\n",
@ -81,7 +81,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
const struct svc_program *prog = statp->program;
const struct svc_procedure *proc;
const struct svc_version *vers;
int i, j;
unsigned int i, j;
seq_printf(seq,
"net %u %u %u %u\n",
@ -106,6 +106,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
seq_putc(seq, '\n');
}
}
EXPORT_SYMBOL(svc_seq_show);
/**
* rpc_alloc_iostats - allocate an rpc_iostats structure
@ -255,12 +256,14 @@ svc_proc_register(struct svc_stat *statp, const struct file_operations *fops)
{
return do_register(statp->program->pg_name, statp, fops);
}
EXPORT_SYMBOL(svc_proc_register);
void
svc_proc_unregister(const char *name)
{
remove_proc_entry(name, proc_net_rpc);
}
EXPORT_SYMBOL(svc_proc_unregister);
void
rpc_proc_init(void)

View file

@ -22,48 +22,6 @@
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/xprtsock.h>
/* RPC server stuff */
EXPORT_SYMBOL(svc_create);
EXPORT_SYMBOL(svc_create_thread);
EXPORT_SYMBOL(svc_create_pooled);
EXPORT_SYMBOL(svc_set_num_threads);
EXPORT_SYMBOL(svc_exit_thread);
EXPORT_SYMBOL(svc_destroy);
EXPORT_SYMBOL(svc_drop);
EXPORT_SYMBOL(svc_process);
EXPORT_SYMBOL(svc_recv);
EXPORT_SYMBOL(svc_wake_up);
EXPORT_SYMBOL(svc_makesock);
EXPORT_SYMBOL(svc_reserve);
EXPORT_SYMBOL(svc_auth_register);
EXPORT_SYMBOL(auth_domain_lookup);
EXPORT_SYMBOL(svc_authenticate);
EXPORT_SYMBOL(svc_set_client);
/* RPC statistics */
#ifdef CONFIG_PROC_FS
EXPORT_SYMBOL(svc_proc_register);
EXPORT_SYMBOL(svc_proc_unregister);
EXPORT_SYMBOL(svc_seq_show);
#endif
/* caching... */
EXPORT_SYMBOL(auth_domain_find);
EXPORT_SYMBOL(auth_domain_put);
EXPORT_SYMBOL(auth_unix_add_addr);
EXPORT_SYMBOL(auth_unix_forget_old);
EXPORT_SYMBOL(auth_unix_lookup);
EXPORT_SYMBOL(cache_check);
EXPORT_SYMBOL(cache_flush);
EXPORT_SYMBOL(cache_purge);
EXPORT_SYMBOL(cache_register);
EXPORT_SYMBOL(cache_unregister);
EXPORT_SYMBOL(qword_add);
EXPORT_SYMBOL(qword_addhex);
EXPORT_SYMBOL(qword_get);
EXPORT_SYMBOL(svcauth_unix_purge);
EXPORT_SYMBOL(unix_domain_find);
extern struct cache_detail ip_map_cache, unix_gid_cache;
static int __init
@ -85,7 +43,8 @@ init_sunrpc(void)
#endif
cache_register(&ip_map_cache);
cache_register(&unix_gid_cache);
init_socket_xprt();
svc_init_xprt_sock(); /* svc sock transport */
init_socket_xprt(); /* clnt sock transport */
rpcauth_init_module();
out:
return err;
@ -96,12 +55,11 @@ cleanup_sunrpc(void)
{
rpcauth_remove_module();
cleanup_socket_xprt();
svc_cleanup_xprt_sock();
unregister_rpc_pipefs();
rpc_destroy_mempool();
if (cache_unregister(&ip_map_cache))
printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n");
if (cache_unregister(&unix_gid_cache))
printk(KERN_ERR "sunrpc: failed to unregister unix_gid cache\n");
cache_unregister(&ip_map_cache);
cache_unregister(&unix_gid_cache);
#ifdef RPC_DEBUG
rpc_unregister_sysctl();
#endif

View file

@ -364,7 +364,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
void (*shutdown)(struct svc_serv *serv))
{
struct svc_serv *serv;
int vers;
unsigned int vers;
unsigned int xdrsize;
unsigned int i;
@ -433,6 +433,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize,
{
return __svc_create(prog, bufsize, /*npools*/1, shutdown);
}
EXPORT_SYMBOL(svc_create);
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
@ -452,6 +453,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
return serv;
}
EXPORT_SYMBOL(svc_create_pooled);
/*
* Destroy an RPC service. Should be called with the BKL held
@ -459,9 +461,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
void
svc_destroy(struct svc_serv *serv)
{
struct svc_sock *svsk;
struct svc_sock *tmp;
dprintk("svc: svc_destroy(%s, %d)\n",
serv->sv_program->pg_name,
serv->sv_nrthreads);
@ -476,14 +475,12 @@ svc_destroy(struct svc_serv *serv)
del_timer_sync(&serv->sv_temptimer);
list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list)
svc_force_close_socket(svsk);
svc_close_all(&serv->sv_tempsocks);
if (serv->sv_shutdown)
serv->sv_shutdown(serv);
list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list)
svc_force_close_socket(svsk);
svc_close_all(&serv->sv_permsocks);
BUG_ON(!list_empty(&serv->sv_permsocks));
BUG_ON(!list_empty(&serv->sv_tempsocks));
@ -498,6 +495,7 @@ svc_destroy(struct svc_serv *serv)
kfree(serv->sv_pools);
kfree(serv);
}
EXPORT_SYMBOL(svc_destroy);
/*
* Allocate an RPC server's buffer space.
@ -536,6 +534,44 @@ svc_release_buffer(struct svc_rqst *rqstp)
put_page(rqstp->rq_pages[i]);
}
struct svc_rqst *
svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool)
{
struct svc_rqst *rqstp;
rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
if (!rqstp)
goto out_enomem;
init_waitqueue_head(&rqstp->rq_wait);
serv->sv_nrthreads++;
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads++;
list_add(&rqstp->rq_all, &pool->sp_all_threads);
spin_unlock_bh(&pool->sp_lock);
rqstp->rq_server = serv;
rqstp->rq_pool = pool;
rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
if (!rqstp->rq_argp)
goto out_thread;
rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
if (!rqstp->rq_resp)
goto out_thread;
if (!svc_init_buffer(rqstp, serv->sv_max_mesg))
goto out_thread;
return rqstp;
out_thread:
svc_exit_thread(rqstp);
out_enomem:
return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(svc_prepare_thread);
/*
* Create a thread in the given pool. Caller must hold BKL.
* On a NUMA or SMP machine, with a multi-pool serv, the thread
@ -550,24 +586,11 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
int have_oldmask = 0;
cpumask_t oldmask;
rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
if (!rqstp)
rqstp = svc_prepare_thread(serv, pool);
if (IS_ERR(rqstp)) {
error = PTR_ERR(rqstp);
goto out;
init_waitqueue_head(&rqstp->rq_wait);
if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !(rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !svc_init_buffer(rqstp, serv->sv_max_mesg))
goto out_thread;
serv->sv_nrthreads++;
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads++;
list_add(&rqstp->rq_all, &pool->sp_all_threads);
spin_unlock_bh(&pool->sp_lock);
rqstp->rq_server = serv;
rqstp->rq_pool = pool;
}
if (serv->sv_nrpools > 1)
have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
@ -597,6 +620,7 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
{
return __svc_create_thread(func, serv, &serv->sv_pools[0]);
}
EXPORT_SYMBOL(svc_create_thread);
/*
* Choose a pool in which to create a new thread, for svc_set_num_threads
@ -700,6 +724,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
return error;
}
EXPORT_SYMBOL(svc_set_num_threads);
/*
* Called from a server thread as it's exiting. Caller must hold BKL.
@ -726,6 +751,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
if (serv)
svc_destroy(serv);
}
EXPORT_SYMBOL(svc_exit_thread);
/*
* Register an RPC service with the local portmapper.
@ -737,7 +763,8 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
{
struct svc_program *progp;
unsigned long flags;
int i, error = 0, dummy;
unsigned int i;
int error = 0, dummy;
if (!port)
clear_thread_flag(TIF_SIGPENDING);
@ -840,9 +867,9 @@ svc_process(struct svc_rqst *rqstp)
rqstp->rq_res.tail[0].iov_len = 0;
/* Will be turned off only in gss privacy case: */
rqstp->rq_splice_ok = 1;
/* tcp needs a space for the record length... */
if (rqstp->rq_prot == IPPROTO_TCP)
svc_putnl(resv, 0);
/* Setup reply header */
rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
rqstp->rq_xid = svc_getu32(argv);
svc_putu32(resv, rqstp->rq_xid);
@ -1049,16 +1076,15 @@ svc_process(struct svc_rqst *rqstp)
svc_putnl(resv, ntohl(rpc_stat));
goto sendit;
}
EXPORT_SYMBOL(svc_process);
/*
* Return (transport-specific) limit on the rpc payload.
*/
u32 svc_max_payload(const struct svc_rqst *rqstp)
{
int max = RPCSVC_MAXPAYLOAD_TCP;
u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload;
if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM)
max = RPCSVC_MAXPAYLOAD_UDP;
if (rqstp->rq_server->sv_max_payload < max)
max = rqstp->rq_server->sv_max_payload;
return max;

1055
net/sunrpc/svc_xprt.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -57,11 +57,13 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
rqstp->rq_authop = aops;
return aops->accept(rqstp, authp);
}
EXPORT_SYMBOL(svc_authenticate);
int svc_set_client(struct svc_rqst *rqstp)
{
return rqstp->rq_authop->set_client(rqstp);
}
EXPORT_SYMBOL(svc_set_client);
/* A request, which was authenticated, has now executed.
* Time to finalise the credentials and verifier
@ -93,6 +95,7 @@ svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops)
spin_unlock(&authtab_lock);
return rv;
}
EXPORT_SYMBOL(svc_auth_register);
void
svc_auth_unregister(rpc_authflavor_t flavor)
@ -129,6 +132,7 @@ void auth_domain_put(struct auth_domain *dom)
spin_unlock(&auth_domain_lock);
}
}
EXPORT_SYMBOL(auth_domain_put);
struct auth_domain *
auth_domain_lookup(char *name, struct auth_domain *new)
@ -153,8 +157,10 @@ auth_domain_lookup(char *name, struct auth_domain *new)
spin_unlock(&auth_domain_lock);
return new;
}
EXPORT_SYMBOL(auth_domain_lookup);
struct auth_domain *auth_domain_find(char *name)
{
return auth_domain_lookup(name, NULL);
}
EXPORT_SYMBOL(auth_domain_find);

View file

@ -63,6 +63,7 @@ struct auth_domain *unix_domain_find(char *name)
rv = auth_domain_lookup(name, &new->h);
}
}
EXPORT_SYMBOL(unix_domain_find);
static void svcauth_unix_domain_release(struct auth_domain *dom)
{
@ -340,6 +341,7 @@ int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
else
return -ENOMEM;
}
EXPORT_SYMBOL(auth_unix_add_addr);
int auth_unix_forget_old(struct auth_domain *dom)
{
@ -351,6 +353,7 @@ int auth_unix_forget_old(struct auth_domain *dom)
udom->addr_changes++;
return 0;
}
EXPORT_SYMBOL(auth_unix_forget_old);
struct auth_domain *auth_unix_lookup(struct in_addr addr)
{
@ -375,50 +378,56 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr)
cache_put(&ipm->h, &ip_map_cache);
return rv;
}
EXPORT_SYMBOL(auth_unix_lookup);
void svcauth_unix_purge(void)
{
cache_purge(&ip_map_cache);
}
EXPORT_SYMBOL(svcauth_unix_purge);
static inline struct ip_map *
ip_map_cached_get(struct svc_rqst *rqstp)
{
struct ip_map *ipm;
struct svc_sock *svsk = rqstp->rq_sock;
spin_lock(&svsk->sk_lock);
ipm = svsk->sk_info_authunix;
if (ipm != NULL) {
if (!cache_valid(&ipm->h)) {
/*
* The entry has been invalidated since it was
* remembered, e.g. by a second mount from the
* same IP address.
*/
svsk->sk_info_authunix = NULL;
spin_unlock(&svsk->sk_lock);
cache_put(&ipm->h, &ip_map_cache);
return NULL;
struct ip_map *ipm = NULL;
struct svc_xprt *xprt = rqstp->rq_xprt;
if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
spin_lock(&xprt->xpt_lock);
ipm = xprt->xpt_auth_cache;
if (ipm != NULL) {
if (!cache_valid(&ipm->h)) {
/*
* The entry has been invalidated since it was
* remembered, e.g. by a second mount from the
* same IP address.
*/
xprt->xpt_auth_cache = NULL;
spin_unlock(&xprt->xpt_lock);
cache_put(&ipm->h, &ip_map_cache);
return NULL;
}
cache_get(&ipm->h);
}
cache_get(&ipm->h);
spin_unlock(&xprt->xpt_lock);
}
spin_unlock(&svsk->sk_lock);
return ipm;
}
static inline void
ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
{
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_xprt *xprt = rqstp->rq_xprt;
spin_lock(&svsk->sk_lock);
if (svsk->sk_sock->type == SOCK_STREAM &&
svsk->sk_info_authunix == NULL) {
/* newly cached, keep the reference */
svsk->sk_info_authunix = ipm;
ipm = NULL;
if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
spin_lock(&xprt->xpt_lock);
if (xprt->xpt_auth_cache == NULL) {
/* newly cached, keep the reference */
xprt->xpt_auth_cache = ipm;
ipm = NULL;
}
spin_unlock(&xprt->xpt_lock);
}
spin_unlock(&svsk->sk_lock);
if (ipm)
cache_put(&ipm->h, &ip_map_cache);
}

File diff suppressed because it is too large Load diff

View file

@ -18,6 +18,7 @@
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svc_xprt.h>
/*
* Declare the debug flags here
@ -55,6 +56,30 @@ rpc_unregister_sysctl(void)
}
}
static int proc_do_xprt(ctl_table *table, int write, struct file *file,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char tmpbuf[256];
int len;
if ((*ppos && !write) || !*lenp) {
*lenp = 0;
return 0;
}
if (write)
return -EINVAL;
else {
len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
if (!access_ok(VERIFY_WRITE, buffer, len))
return -EFAULT;
if (__copy_to_user(buffer, tmpbuf, len))
return -EFAULT;
}
*lenp -= len;
*ppos += len;
return 0;
}
static int
proc_dodebug(ctl_table *table, int write, struct file *file,
void __user *buffer, size_t *lenp, loff_t *ppos)
@ -147,6 +172,12 @@ static ctl_table debug_table[] = {
.mode = 0644,
.proc_handler = &proc_dodebug
},
{
.procname = "transports",
.maxlen = 256,
.mode = 0444,
.proc_handler = &proc_do_xprt,
},
{ .ctl_name = 0 }
};

View file

@ -96,11 +96,13 @@ xdr_encode_string(__be32 *p, const char *string)
EXPORT_SYMBOL(xdr_encode_string);
__be32 *
xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen)
xdr_decode_string_inplace(__be32 *p, char **sp,
unsigned int *lenp, unsigned int maxlen)
{
unsigned int len;
u32 len;
if ((len = ntohl(*p++)) > maxlen)
len = ntohl(*p++);
if (len > maxlen)
return NULL;
*lenp = len;
*sp = (char *) p;

View file

@ -1,3 +1,8 @@
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o
xprtrdma-y := transport.o rpc_rdma.o verbs.o
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += svcrdma.o
svcrdma-y := svc_rdma.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o

View file

@ -0,0 +1,266 @@
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/svc_rdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/* RPC/RDMA parameters */
unsigned int svcrdma_ord = RPCRDMA_ORD;
static unsigned int min_ord = 1;
static unsigned int max_ord = 4096;
unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
static unsigned int min_max_requests = 4;
static unsigned int max_max_requests = 16384;
unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
static unsigned int min_max_inline = 4096;
static unsigned int max_max_inline = 65536;
atomic_t rdma_stat_recv;
atomic_t rdma_stat_read;
atomic_t rdma_stat_write;
atomic_t rdma_stat_sq_starve;
atomic_t rdma_stat_rq_starve;
atomic_t rdma_stat_rq_poll;
atomic_t rdma_stat_rq_prod;
atomic_t rdma_stat_sq_poll;
atomic_t rdma_stat_sq_prod;
/*
* This function implements reading and resetting an atomic_t stat
* variable through read/write to a proc file. Any write to the file
* resets the associated statistic to zero. Any read returns it's
* current value.
*/
static int read_reset_stat(ctl_table *table, int write,
struct file *filp, void __user *buffer, size_t *lenp,
loff_t *ppos)
{
atomic_t *stat = (atomic_t *)table->data;
if (!stat)
return -EINVAL;
if (write)
atomic_set(stat, 0);
else {
char str_buf[32];
char *data;
int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat));
if (len >= 32)
return -EFAULT;
len = strlen(str_buf);
if (*ppos > len) {
*lenp = 0;
return 0;
}
data = &str_buf[*ppos];
len -= *ppos;
if (len > *lenp)
len = *lenp;
if (len && copy_to_user(buffer, str_buf, len))
return -EFAULT;
*lenp = len;
*ppos += len;
}
return 0;
}
static struct ctl_table_header *svcrdma_table_header;
static ctl_table svcrdma_parm_table[] = {
{
.procname = "max_requests",
.data = &svcrdma_max_requests,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &min_max_requests,
.extra2 = &max_max_requests
},
{
.procname = "max_req_size",
.data = &svcrdma_max_req_size,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &min_max_inline,
.extra2 = &max_max_inline
},
{
.procname = "max_outbound_read_requests",
.data = &svcrdma_ord,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &min_ord,
.extra2 = &max_ord,
},
{
.procname = "rdma_stat_read",
.data = &rdma_stat_read,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_recv",
.data = &rdma_stat_recv,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_write",
.data = &rdma_stat_write,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_sq_starve",
.data = &rdma_stat_sq_starve,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_rq_starve",
.data = &rdma_stat_rq_starve,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_rq_poll",
.data = &rdma_stat_rq_poll,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_rq_prod",
.data = &rdma_stat_rq_prod,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_sq_poll",
.data = &rdma_stat_sq_poll,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_sq_prod",
.data = &rdma_stat_sq_prod,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.ctl_name = 0,
},
};
static ctl_table svcrdma_table[] = {
{
.procname = "svc_rdma",
.mode = 0555,
.child = svcrdma_parm_table
},
{
.ctl_name = 0,
},
};
static ctl_table svcrdma_root_table[] = {
{
.ctl_name = CTL_SUNRPC,
.procname = "sunrpc",
.mode = 0555,
.child = svcrdma_table
},
{
.ctl_name = 0,
},
};
void svc_rdma_cleanup(void)
{
dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
if (svcrdma_table_header) {
unregister_sysctl_table(svcrdma_table_header);
svcrdma_table_header = NULL;
}
svc_unreg_xprt_class(&svc_rdma_class);
}
int svc_rdma_init(void)
{
dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
dprintk("\tmax_requests : %d\n", svcrdma_max_requests);
dprintk("\tsq_depth : %d\n",
svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
if (!svcrdma_table_header)
svcrdma_table_header =
register_sysctl_table(svcrdma_root_table);
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
return 0;
}
MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
MODULE_DESCRIPTION("SVC RDMA Transport");
MODULE_LICENSE("Dual BSD/GPL");
module_init(svc_rdma_init);
module_exit(svc_rdma_cleanup);

View file

@ -0,0 +1,412 @@
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/debug.h>
#include <asm/unaligned.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/*
* Decodes a read chunk list. The expected format is as follows:
* descrim : xdr_one
* position : u32 offset into XDR stream
* handle : u32 RKEY
* . . .
* end-of-list: xdr_zero
*/
static u32 *decode_read_list(u32 *va, u32 *vaend)
{
struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
while (ch->rc_discrim != xdr_zero) {
u64 ch_offset;
if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
(unsigned long)vaend) {
dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
return NULL;
}
ch->rc_discrim = ntohl(ch->rc_discrim);
ch->rc_position = ntohl(ch->rc_position);
ch->rc_target.rs_handle = ntohl(ch->rc_target.rs_handle);
ch->rc_target.rs_length = ntohl(ch->rc_target.rs_length);
va = (u32 *)&ch->rc_target.rs_offset;
xdr_decode_hyper(va, &ch_offset);
put_unaligned(ch_offset, (u64 *)va);
ch++;
}
return (u32 *)&ch->rc_position;
}
/*
* Determine number of chunks and total bytes in chunk list. The chunk
* list has already been verified to fit within the RPCRDMA header.
*/
void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
int *ch_count, int *byte_count)
{
/* compute the number of bytes represented by read chunks */
*byte_count = 0;
*ch_count = 0;
for (; ch->rc_discrim != 0; ch++) {
*byte_count = *byte_count + ch->rc_target.rs_length;
*ch_count = *ch_count + 1;
}
}
/*
* Decodes a write chunk list. The expected format is as follows:
* descrim : xdr_one
* nchunks : <count>
* handle : u32 RKEY ---+
* length : u32 <len of segment> |
* offset : remove va + <count>
* . . . |
* ---+
*/
static u32 *decode_write_list(u32 *va, u32 *vaend)
{
int ch_no;
struct rpcrdma_write_array *ary =
(struct rpcrdma_write_array *)va;
/* Check for not write-array */
if (ary->wc_discrim == xdr_zero)
return (u32 *)&ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
ary->wc_discrim = ntohl(ary->wc_discrim);
ary->wc_nchunks = ntohl(ary->wc_nchunks);
if (((unsigned long)&ary->wc_array[0] +
(sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, ary->wc_nchunks, vaend);
return NULL;
}
for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) {
u64 ch_offset;
ary->wc_array[ch_no].wc_target.rs_handle =
ntohl(ary->wc_array[ch_no].wc_target.rs_handle);
ary->wc_array[ch_no].wc_target.rs_length =
ntohl(ary->wc_array[ch_no].wc_target.rs_length);
va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset;
xdr_decode_hyper(va, &ch_offset);
put_unaligned(ch_offset, (u64 *)va);
}
/*
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
return (u32 *)&ary->wc_array[ch_no].wc_target.rs_length;
}
static u32 *decode_reply_array(u32 *va, u32 *vaend)
{
int ch_no;
struct rpcrdma_write_array *ary =
(struct rpcrdma_write_array *)va;
/* Check for no reply-array */
if (ary->wc_discrim == xdr_zero)
return (u32 *)&ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
ary->wc_discrim = ntohl(ary->wc_discrim);
ary->wc_nchunks = ntohl(ary->wc_nchunks);
if (((unsigned long)&ary->wc_array[0] +
(sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, ary->wc_nchunks, vaend);
return NULL;
}
for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) {
u64 ch_offset;
ary->wc_array[ch_no].wc_target.rs_handle =
ntohl(ary->wc_array[ch_no].wc_target.rs_handle);
ary->wc_array[ch_no].wc_target.rs_length =
ntohl(ary->wc_array[ch_no].wc_target.rs_length);
va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset;
xdr_decode_hyper(va, &ch_offset);
put_unaligned(ch_offset, (u64 *)va);
}
return (u32 *)&ary->wc_array[ch_no];
}
int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
struct svc_rqst *rqstp)
{
struct rpcrdma_msg *rmsgp = NULL;
u32 *va;
u32 *vaend;
u32 hdr_len;
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
/* Verify that there's enough bytes for header + something */
if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
dprintk("svcrdma: header too short = %d\n",
rqstp->rq_arg.len);
return -EINVAL;
}
/* Decode the header */
rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
rmsgp->rm_type = ntohl(rmsgp->rm_type);
if (rmsgp->rm_vers != RPCRDMA_VERSION)
return -ENOSYS;
/* Pull in the extra for the padded case and bump our pointer */
if (rmsgp->rm_type == RDMA_MSGP) {
int hdrlen;
rmsgp->rm_body.rm_padded.rm_align =
ntohl(rmsgp->rm_body.rm_padded.rm_align);
rmsgp->rm_body.rm_padded.rm_thresh =
ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
rqstp->rq_arg.head[0].iov_len -= hdrlen;
if (hdrlen > rqstp->rq_arg.len)
return -EINVAL;
return hdrlen;
}
/* The chunk list may contain either a read chunk list or a write
* chunk list and a reply chunk list.
*/
va = &rmsgp->rm_body.rm_chunks[0];
vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
va = decode_read_list(va, vaend);
if (!va)
return -EINVAL;
va = decode_write_list(va, vaend);
if (!va)
return -EINVAL;
va = decode_reply_array(va, vaend);
if (!va)
return -EINVAL;
rqstp->rq_arg.head[0].iov_base = va;
hdr_len = (unsigned long)va - (unsigned long)rmsgp;
rqstp->rq_arg.head[0].iov_len -= hdr_len;
*rdma_req = rmsgp;
return hdr_len;
}
int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
{
struct rpcrdma_msg *rmsgp = NULL;
struct rpcrdma_read_chunk *ch;
struct rpcrdma_write_array *ary;
u32 *va;
u32 hdrlen;
dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
rqstp);
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
/* Pull in the extra for the padded case and bump our pointer */
if (rmsgp->rm_type == RDMA_MSGP) {
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
rqstp->rq_arg.head[0].iov_len -= hdrlen;
return hdrlen;
}
/*
* Skip all chunks to find RPC msg. These were previously processed
*/
va = &rmsgp->rm_body.rm_chunks[0];
/* Skip read-list */
for (ch = (struct rpcrdma_read_chunk *)va;
ch->rc_discrim != xdr_zero; ch++);
va = (u32 *)&ch->rc_position;
/* Skip write-list */
ary = (struct rpcrdma_write_array *)va;
if (ary->wc_discrim == xdr_zero)
va = (u32 *)&ary->wc_nchunks;
else
/*
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
/* Skip reply-array */
ary = (struct rpcrdma_write_array *)va;
if (ary->wc_discrim == xdr_zero)
va = (u32 *)&ary->wc_nchunks;
else
va = (u32 *)&ary->wc_array[ary->wc_nchunks];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (unsigned long)va - (unsigned long)rmsgp;
rqstp->rq_arg.head[0].iov_len -= hdrlen;
return hdrlen;
}
int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
enum rpcrdma_errcode err, u32 *va)
{
u32 *startp = va;
*va++ = htonl(rmsgp->rm_xid);
*va++ = htonl(rmsgp->rm_vers);
*va++ = htonl(xprt->sc_max_requests);
*va++ = htonl(RDMA_ERROR);
*va++ = htonl(err);
if (err == ERR_VERS) {
*va++ = htonl(RPCRDMA_VERSION);
*va++ = htonl(RPCRDMA_VERSION);
}
return (int)((unsigned long)va - (unsigned long)startp);
}
int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
{
struct rpcrdma_write_array *wr_ary;
/* There is no read-list in a reply */
/* skip write list */
wr_ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[1];
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
wc_target.rs_length;
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
/* skip reply array */
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
return (unsigned long) wr_ary - (unsigned long) rmsgp;
}
void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
{
struct rpcrdma_write_array *ary;
/* no read-list */
rmsgp->rm_body.rm_chunks[0] = xdr_zero;
/* write-array discrim */
ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[1];
ary->wc_discrim = xdr_one;
ary->wc_nchunks = htonl(chunks);
/* write-list terminator */
ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
/* reply-array discriminator */
ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
}
void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
int chunks)
{
ary->wc_discrim = xdr_one;
ary->wc_nchunks = htonl(chunks);
}
void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
int chunk_no,
u32 rs_handle, u64 rs_offset,
u32 write_len)
{
struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
seg->rs_handle = htonl(rs_handle);
seg->rs_length = htonl(write_len);
xdr_encode_hyper((u32 *) &seg->rs_offset, rs_offset);
}
void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
enum rpcrdma_proc rdma_type)
{
rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
rdma_resp->rm_type = htonl(rdma_type);
/* Encode <nul> chunks lists */
rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
}

View file

@ -0,0 +1,586 @@
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <linux/sunrpc/svc_rdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/*
* Replace the pages in the rq_argpages array with the pages from the SGE in
* the RDMA_RECV completion. The SGL should contain full pages up until the
* last one.
*/
static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *ctxt,
u32 byte_count)
{
struct page *page;
u32 bc;
int sge_no;
/* Swap the page in the SGE with the page in argpages */
page = ctxt->pages[0];
put_page(rqstp->rq_pages[0]);
rqstp->rq_pages[0] = page;
/* Set up the XDR head */
rqstp->rq_arg.head[0].iov_base = page_address(page);
rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length);
rqstp->rq_arg.len = byte_count;
rqstp->rq_arg.buflen = byte_count;
/* Compute bytes past head in the SGL */
bc = byte_count - rqstp->rq_arg.head[0].iov_len;
/* If data remains, store it in the pagelist */
rqstp->rq_arg.page_len = bc;
rqstp->rq_arg.page_base = 0;
rqstp->rq_arg.pages = &rqstp->rq_pages[1];
sge_no = 1;
while (bc && sge_no < ctxt->count) {
page = ctxt->pages[sge_no];
put_page(rqstp->rq_pages[sge_no]);
rqstp->rq_pages[sge_no] = page;
bc -= min(bc, ctxt->sge[sge_no].length);
rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
sge_no++;
}
rqstp->rq_respages = &rqstp->rq_pages[sge_no];
/* We should never run out of SGE because the limit is defined to
* support the max allowed RPC data length
*/
BUG_ON(bc && (sge_no == ctxt->count));
BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len)
!= byte_count);
BUG_ON(rqstp->rq_arg.len != byte_count);
/* If not all pages were used from the SGL, free the remaining ones */
bc = sge_no;
while (sge_no < ctxt->count) {
page = ctxt->pages[sge_no++];
put_page(page);
}
ctxt->count = bc;
/* Set up tail */
rqstp->rq_arg.tail[0].iov_base = NULL;
rqstp->rq_arg.tail[0].iov_len = 0;
}
struct chunk_sge {
int start; /* sge no for this chunk */
int count; /* sge count for this chunk */
};
/* Encode a read-chunk-list as an array of IB SGE
*
* Assumptions:
* - chunk[0]->position points to pages[0] at an offset of 0
* - pages[] is not physically or virtually contigous and consists of
* PAGE_SIZE elements.
*
* Output:
* - sge array pointing into pages[] array.
* - chunk_sge array specifying sge index and count for each
* chunk in the read list
*
*/
static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
struct rpcrdma_msg *rmsgp,
struct ib_sge *sge,
struct chunk_sge *ch_sge_ary,
int ch_count,
int byte_count)
{
int sge_no;
int sge_bytes;
int page_off;
int page_no;
int ch_bytes;
int ch_no;
struct rpcrdma_read_chunk *ch;
sge_no = 0;
page_no = 0;
page_off = 0;
ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
ch_no = 0;
ch_bytes = ch->rc_target.rs_length;
head->arg.head[0] = rqstp->rq_arg.head[0];
head->arg.tail[0] = rqstp->rq_arg.tail[0];
head->arg.pages = &head->pages[head->count];
head->sge[0].length = head->count; /* save count of hdr pages */
head->arg.page_base = 0;
head->arg.page_len = ch_bytes;
head->arg.len = rqstp->rq_arg.len + ch_bytes;
head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
head->count++;
ch_sge_ary[0].start = 0;
while (byte_count) {
sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
sge[sge_no].addr =
ib_dma_map_page(xprt->sc_cm_id->device,
rqstp->rq_arg.pages[page_no],
page_off, sge_bytes,
DMA_FROM_DEVICE);
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
/*
* Don't bump head->count here because the same page
* may be used by multiple SGE.
*/
head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
byte_count -= sge_bytes;
ch_bytes -= sge_bytes;
sge_no++;
/*
* If all bytes for this chunk have been mapped to an
* SGE, move to the next SGE
*/
if (ch_bytes == 0) {
ch_sge_ary[ch_no].count =
sge_no - ch_sge_ary[ch_no].start;
ch_no++;
ch++;
ch_sge_ary[ch_no].start = sge_no;
ch_bytes = ch->rc_target.rs_length;
/* If bytes remaining account for next chunk */
if (byte_count) {
head->arg.page_len += ch_bytes;
head->arg.len += ch_bytes;
head->arg.buflen += ch_bytes;
}
}
/*
* If this SGE consumed all of the page, move to the
* next page
*/
if ((sge_bytes + page_off) == PAGE_SIZE) {
page_no++;
page_off = 0;
/*
* If there are still bytes left to map, bump
* the page count
*/
if (byte_count)
head->count++;
} else
page_off += sge_bytes;
}
BUG_ON(byte_count != 0);
return sge_no;
}
static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt,
struct ib_sge *sge,
u64 *sgl_offset,
int count)
{
int i;
ctxt->count = count;
for (i = 0; i < count; i++) {
ctxt->sge[i].addr = sge[i].addr;
ctxt->sge[i].length = sge[i].length;
*sgl_offset = *sgl_offset + sge[i].length;
}
}
static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
{
#ifdef RDMA_TRANSPORT_IWARP
if ((RDMA_TRANSPORT_IWARP ==
rdma_node_get_transport(xprt->sc_cm_id->
device->node_type))
&& sge_count > 1)
return 1;
else
#endif
return min_t(int, sge_count, xprt->sc_max_sge);
}
/*
* Use RDMA_READ to read data from the advertised client buffer into the
* XDR stream starting at rq_arg.head[0].iov_base.
* Each chunk in the array
* contains the following fields:
* discrim - '1', This isn't used for data placement
* position - The xdr stream offset (the same for every chunk)
* handle - RMR for client memory region
* length - data transfer length
* offset - 64 bit tagged offset in remote memory region
*
* On our side, we need to read into a pagelist. The first page immediately
* follows the RPC header.
*
* This function returns 1 to indicate success. The data is not yet in
* the pagelist and therefore the RPC request must be deferred. The
* I/O completion will enqueue the transport again and
* svc_rdma_recvfrom will complete the request.
*
* NOTE: The ctxt must not be touched after the last WR has been posted
* because the I/O completion processing may occur on another
* processor and free / modify the context. Ne touche pas!
*/
static int rdma_read_xdr(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *hdr_ctxt)
{
struct ib_send_wr read_wr;
int err = 0;
int ch_no;
struct ib_sge *sge;
int ch_count;
int byte_count;
int sge_count;
u64 sgl_offset;
struct rpcrdma_read_chunk *ch;
struct svc_rdma_op_ctxt *ctxt = NULL;
struct svc_rdma_op_ctxt *head;
struct svc_rdma_op_ctxt *tmp_sge_ctxt;
struct svc_rdma_op_ctxt *tmp_ch_ctxt;
struct chunk_sge *ch_sge_ary;
/* If no read list is present, return 0 */
ch = svc_rdma_get_read_chunk(rmsgp);
if (!ch)
return 0;
/* Allocate temporary contexts to keep SGE */
BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge));
tmp_sge_ctxt = svc_rdma_get_context(xprt);
sge = tmp_sge_ctxt->sge;
tmp_ch_ctxt = svc_rdma_get_context(xprt);
ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
sge, ch_sge_ary,
ch_count, byte_count);
head = svc_rdma_get_context(xprt);
sgl_offset = 0;
ch_no = 0;
for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
ch->rc_discrim != 0; ch++, ch_no++) {
next_sge:
if (!ctxt)
ctxt = head;
else {
ctxt->next = svc_rdma_get_context(xprt);
ctxt = ctxt->next;
}
ctxt->next = NULL;
ctxt->direction = DMA_FROM_DEVICE;
clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
if ((ch+1)->rc_discrim == 0) {
/*
* Checked in sq_cq_reap to see if we need to
* be enqueued
*/
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
ctxt->next = hdr_ctxt;
hdr_ctxt->next = head;
}
/* Prepare READ WR */
memset(&read_wr, 0, sizeof read_wr);
ctxt->wr_op = IB_WR_RDMA_READ;
read_wr.wr_id = (unsigned long)ctxt;
read_wr.opcode = IB_WR_RDMA_READ;
read_wr.send_flags = IB_SEND_SIGNALED;
read_wr.wr.rdma.rkey = ch->rc_target.rs_handle;
read_wr.wr.rdma.remote_addr =
get_unaligned(&(ch->rc_target.rs_offset)) +
sgl_offset;
read_wr.sg_list = &sge[ch_sge_ary[ch_no].start];
read_wr.num_sge =
rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count);
rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start],
&sgl_offset,
read_wr.num_sge);
/* Post the read */
err = svc_rdma_send(xprt, &read_wr);
if (err) {
printk(KERN_ERR "svcrdma: Error posting send = %d\n",
err);
/*
* Break the circular list so free knows when
* to stop if the error happened to occur on
* the last read
*/
ctxt->next = NULL;
goto out;
}
atomic_inc(&rdma_stat_read);
if (read_wr.num_sge < ch_sge_ary[ch_no].count) {
ch_sge_ary[ch_no].count -= read_wr.num_sge;
ch_sge_ary[ch_no].start += read_wr.num_sge;
goto next_sge;
}
sgl_offset = 0;
err = 0;
}
out:
svc_rdma_put_context(tmp_sge_ctxt, 0);
svc_rdma_put_context(tmp_ch_ctxt, 0);
/* Detach arg pages. svc_recv will replenish them */
for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
rqstp->rq_pages[ch_no] = NULL;
/*
* Detach res pages. svc_release must see a resused count of
* zero or it will attempt to put them.
*/
while (rqstp->rq_resused)
rqstp->rq_respages[--rqstp->rq_resused] = NULL;
if (err) {
printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
/* Free the linked list of read contexts */
while (head != NULL) {
ctxt = head->next;
svc_rdma_put_context(head, 1);
head = ctxt;
}
return 0;
}
return 1;
}
static int rdma_read_complete(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *data)
{
struct svc_rdma_op_ctxt *head = data->next;
int page_no;
int ret;
BUG_ON(!head);
/* Copy RPC pages */
for (page_no = 0; page_no < head->count; page_no++) {
put_page(rqstp->rq_pages[page_no]);
rqstp->rq_pages[page_no] = head->pages[page_no];
}
/* Point rq_arg.pages past header */
rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length];
rqstp->rq_arg.page_len = head->arg.page_len;
rqstp->rq_arg.page_base = head->arg.page_base;
/* rq_respages starts after the last arg page */
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
rqstp->rq_resused = 0;
/* Rebuild rq_arg head and tail. */
rqstp->rq_arg.head[0] = head->arg.head[0];
rqstp->rq_arg.tail[0] = head->arg.tail[0];
rqstp->rq_arg.len = head->arg.len;
rqstp->rq_arg.buflen = head->arg.buflen;
/* XXX: What should this be? */
rqstp->rq_prot = IPPROTO_MAX;
/*
* Free the contexts we used to build the RDMA_READ. We have
* to be careful here because the context list uses the same
* next pointer used to chain the contexts associated with the
* RDMA_READ
*/
data->next = NULL; /* terminate circular list */
do {
data = head->next;
svc_rdma_put_context(head, 0);
head = data;
} while (head != NULL);
ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len
+ rqstp->rq_arg.tail[0].iov_len;
dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, "
"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
rqstp->rq_arg.head[0].iov_len);
/* Indicate that we've consumed an RQ credit */
rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
svc_xprt_received(rqstp->rq_xprt);
return ret;
}
/*
* Set up the rqstp thread context to point to the RQ buffer. If
* necessary, pull additional data from the client with an RDMA_READ
* request.
*/
int svc_rdma_recvfrom(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
struct svcxprt_rdma *rdma_xprt =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
struct svc_rdma_op_ctxt *ctxt = NULL;
struct rpcrdma_msg *rmsgp;
int ret = 0;
int len;
dprintk("svcrdma: rqstp=%p\n", rqstp);
/*
* The rq_xprt_ctxt indicates if we've consumed an RQ credit
* or not. It is used in the rdma xpo_release_rqst function to
* determine whether or not to return an RQ WQE to the RQ.
*/
rqstp->rq_xprt_ctxt = NULL;
spin_lock_bh(&rdma_xprt->sc_read_complete_lock);
if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
struct svc_rdma_op_ctxt,
dto_q);
list_del_init(&ctxt->dto_q);
}
spin_unlock_bh(&rdma_xprt->sc_read_complete_lock);
if (ctxt)
return rdma_read_complete(rqstp, ctxt);
spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
struct svc_rdma_op_ctxt,
dto_q);
list_del_init(&ctxt->dto_q);
} else {
atomic_inc(&rdma_stat_rq_starve);
clear_bit(XPT_DATA, &xprt->xpt_flags);
ctxt = NULL;
}
spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
if (!ctxt) {
/* This is the EAGAIN path. The svc_recv routine will
* return -EAGAIN, the nfsd thread will go to call into
* svc_recv again and we shouldn't be on the active
* transport list
*/
if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
goto close_out;
BUG_ON(ret);
goto out;
}
dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
ctxt, rdma_xprt, rqstp, ctxt->wc_status);
BUG_ON(ctxt->wc_status != IB_WC_SUCCESS);
atomic_inc(&rdma_stat_recv);
/* Build up the XDR from the receive buffers. */
rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
/* Decode the RDMA header. */
len = svc_rdma_xdr_decode_req(&rmsgp, rqstp);
rqstp->rq_xprt_hlen = len;
/* If the request is invalid, reply with an error */
if (len < 0) {
if (len == -ENOSYS)
(void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
goto close_out;
}
/* Read read-list data. If we would need to wait, defer
* it. Not that in this case, we don't return the RQ credit
* until after the read completes.
*/
if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) {
svc_xprt_received(xprt);
return 0;
}
/* Indicate we've consumed an RQ credit */
rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len
+ rqstp->rq_arg.tail[0].iov_len;
svc_rdma_put_context(ctxt, 0);
out:
dprintk("svcrdma: ret = %d, rq_arg.len =%d, "
"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
ret, rqstp->rq_arg.len,
rqstp->rq_arg.head[0].iov_base,
rqstp->rq_arg.head[0].iov_len);
rqstp->rq_prot = IPPROTO_MAX;
svc_xprt_copy_addrs(rqstp, xprt);
svc_xprt_received(xprt);
return ret;
close_out:
if (ctxt) {
svc_rdma_put_context(ctxt, 1);
/* Indicate we've consumed an RQ credit */
rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
}
dprintk("svcrdma: transport %p is closing\n", xprt);
/*
* Set the close bit and enqueue it. svc_recv will see the
* close bit and call svc_xprt_delete
*/
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_received(xprt);
return 0;
}

View file

@ -0,0 +1,520 @@
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <linux/sunrpc/svc_rdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/* Encode an XDR as an array of IB SGE
*
* Assumptions:
* - head[0] is physically contiguous.
* - tail[0] is physically contiguous.
* - pages[] is not physically or virtually contigous and consists of
* PAGE_SIZE elements.
*
* Output:
* SGE[0] reserved for RCPRDMA header
* SGE[1] data from xdr->head[]
* SGE[2..sge_count-2] data from xdr->pages[]
* SGE[sge_count-1] data from xdr->tail.
*
*/
static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct ib_sge *sge,
int *sge_count)
{
/* Max we need is the length of the XDR / pagesize + one for
* head + one for tail + one for RPCRDMA header
*/
int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
int sge_no;
u32 byte_count = xdr->len;
u32 sge_bytes;
u32 page_bytes;
int page_off;
int page_no;
/* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1;
/* Head SGE */
sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device,
xdr->head[0].iov_base,
xdr->head[0].iov_len,
DMA_TO_DEVICE);
sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len);
byte_count -= sge_bytes;
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
sge_no++;
/* pages SGE */
page_no = 0;
page_bytes = xdr->page_len;
page_off = xdr->page_base;
while (byte_count && page_bytes) {
sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off));
sge[sge_no].addr =
ib_dma_map_page(xprt->sc_cm_id->device,
xdr->pages[page_no], page_off,
sge_bytes, DMA_TO_DEVICE);
sge_bytes = min(sge_bytes, page_bytes);
byte_count -= sge_bytes;
page_bytes -= sge_bytes;
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
sge_no++;
page_no++;
page_off = 0; /* reset for next time through loop */
}
/* Tail SGE */
if (byte_count && xdr->tail[0].iov_len) {
sge[sge_no].addr =
ib_dma_map_single(xprt->sc_cm_id->device,
xdr->tail[0].iov_base,
xdr->tail[0].iov_len,
DMA_TO_DEVICE);
sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len);
byte_count -= sge_bytes;
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
sge_no++;
}
BUG_ON(sge_no > sge_max);
BUG_ON(byte_count != 0);
*sge_count = sge_no;
return sge;
}
/* Assumptions:
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
u32 rmr, u64 to,
u32 xdr_off, int write_len,
struct ib_sge *xdr_sge, int sge_count)
{
struct svc_rdma_op_ctxt *tmp_sge_ctxt;
struct ib_send_wr write_wr;
struct ib_sge *sge;
int xdr_sge_no;
int sge_no;
int sge_bytes;
int sge_off;
int bc;
struct svc_rdma_op_ctxt *ctxt;
int ret = 0;
BUG_ON(sge_count >= 32);
dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
"write_len=%d, xdr_sge=%p, sge_count=%d\n",
rmr, to, xdr_off, write_len, xdr_sge, sge_count);
ctxt = svc_rdma_get_context(xprt);
ctxt->count = 0;
tmp_sge_ctxt = svc_rdma_get_context(xprt);
sge = tmp_sge_ctxt->sge;
/* Find the SGE associated with xdr_off */
for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count;
xdr_sge_no++) {
if (xdr_sge[xdr_sge_no].length > bc)
break;
bc -= xdr_sge[xdr_sge_no].length;
}
sge_off = bc;
bc = write_len;
sge_no = 0;
/* Copy the remaining SGE */
while (bc != 0 && xdr_sge_no < sge_count) {
sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off;
sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey;
sge_bytes = min((size_t)bc,
(size_t)(xdr_sge[xdr_sge_no].length-sge_off));
sge[sge_no].length = sge_bytes;
sge_off = 0;
sge_no++;
xdr_sge_no++;
bc -= sge_bytes;
}
BUG_ON(bc != 0);
BUG_ON(xdr_sge_no > sge_count);
/* Prepare WRITE WR */
memset(&write_wr, 0, sizeof write_wr);
ctxt->wr_op = IB_WR_RDMA_WRITE;
write_wr.wr_id = (unsigned long)ctxt;
write_wr.sg_list = &sge[0];
write_wr.num_sge = sge_no;
write_wr.opcode = IB_WR_RDMA_WRITE;
write_wr.send_flags = IB_SEND_SIGNALED;
write_wr.wr.rdma.rkey = rmr;
write_wr.wr.rdma.remote_addr = to;
/* Post It */
atomic_inc(&rdma_stat_write);
if (svc_rdma_send(xprt, &write_wr)) {
svc_rdma_put_context(ctxt, 1);
/* Fatal error, close transport */
ret = -EIO;
}
svc_rdma_put_context(tmp_sge_ctxt, 0);
return ret;
}
static int send_write_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
struct svc_rqst *rqstp,
struct ib_sge *sge,
int sge_count)
{
u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
int write_len;
int max_write;
u32 xdr_off;
int chunk_off;
int chunk_no;
struct rpcrdma_write_array *arg_ary;
struct rpcrdma_write_array *res_ary;
int ret;
arg_ary = svc_rdma_get_write_array(rdma_argp);
if (!arg_ary)
return 0;
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[1];
max_write = xprt->sc_max_sge * PAGE_SIZE;
/* Write chunks start at the pagelist */
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
xfer_len && chunk_no < arg_ary->wc_nchunks;
chunk_no++) {
struct rpcrdma_segment *arg_ch;
u64 rs_offset;
arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
write_len = min(xfer_len, arg_ch->rs_length);
/* Prepare the response chunk given the length actually
* written */
rs_offset = get_unaligned(&(arg_ch->rs_offset));
svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
arg_ch->rs_handle,
rs_offset,
write_len);
chunk_off = 0;
while (write_len) {
int this_write;
this_write = min(write_len, max_write);
ret = send_write(xprt, rqstp,
arg_ch->rs_handle,
rs_offset + chunk_off,
xdr_off,
this_write,
sge,
sge_count);
if (ret) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
return -EIO;
}
chunk_off += this_write;
xdr_off += this_write;
xfer_len -= this_write;
write_len -= this_write;
}
}
/* Update the req with the number of chunks actually used */
svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);
return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
}
static int send_reply_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
struct svc_rqst *rqstp,
struct ib_sge *sge,
int sge_count)
{
u32 xfer_len = rqstp->rq_res.len;
int write_len;
int max_write;
u32 xdr_off;
int chunk_no;
int chunk_off;
struct rpcrdma_segment *ch;
struct rpcrdma_write_array *arg_ary;
struct rpcrdma_write_array *res_ary;
int ret;
arg_ary = svc_rdma_get_reply_array(rdma_argp);
if (!arg_ary)
return 0;
/* XXX: need to fix when reply lists occur with read-list and or
* write-list */
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[2];
max_write = xprt->sc_max_sge * PAGE_SIZE;
/* xdr offset starts at RPC message */
for (xdr_off = 0, chunk_no = 0;
xfer_len && chunk_no < arg_ary->wc_nchunks;
chunk_no++) {
u64 rs_offset;
ch = &arg_ary->wc_array[chunk_no].wc_target;
write_len = min(xfer_len, ch->rs_length);
/* Prepare the reply chunk given the length actually
* written */
rs_offset = get_unaligned(&(ch->rs_offset));
svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
ch->rs_handle, rs_offset,
write_len);
chunk_off = 0;
while (write_len) {
int this_write;
this_write = min(write_len, max_write);
ret = send_write(xprt, rqstp,
ch->rs_handle,
rs_offset + chunk_off,
xdr_off,
this_write,
sge,
sge_count);
if (ret) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
return -EIO;
}
chunk_off += this_write;
xdr_off += this_write;
xfer_len -= this_write;
write_len -= this_write;
}
}
/* Update the req with the number of chunks actually used */
svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);
return rqstp->rq_res.len;
}
/* This function prepares the portion of the RPCRDMA message to be
* sent in the RDMA_SEND. This function is called after data sent via
* RDMA has already been transmitted. There are three cases:
* - The RPCRDMA header, RPC header, and payload are all sent in a
* single RDMA_SEND. This is the "inline" case.
* - The RPCRDMA header and some portion of the RPC header and data
* are sent via this RDMA_SEND and another portion of the data is
* sent via RDMA.
* - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC
* header and data are all transmitted via RDMA.
* In all three cases, this function prepares the RPCRDMA header in
* sge[0], the 'type' parameter indicates the type to place in the
* RPCRDMA header, and the 'byte_count' field indicates how much of
* the XDR to include in this RDMA_SEND.
*/
static int send_reply(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
struct page *page,
struct rpcrdma_msg *rdma_resp,
struct svc_rdma_op_ctxt *ctxt,
int sge_count,
int byte_count)
{
struct ib_send_wr send_wr;
int sge_no;
int sge_bytes;
int page_no;
int ret;
/* Prepare the context */
ctxt->pages[0] = page;
ctxt->count = 1;
/* Prepare the SGE for the RPCRDMA Header */
ctxt->sge[0].addr =
ib_dma_map_page(rdma->sc_cm_id->device,
page, 0, PAGE_SIZE, DMA_TO_DEVICE);
ctxt->direction = DMA_TO_DEVICE;
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
/* Determine how many of our SGE are to be transmitted */
for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) {
sge_bytes = min((size_t)ctxt->sge[sge_no].length,
(size_t)byte_count);
byte_count -= sge_bytes;
}
BUG_ON(byte_count != 0);
/* Save all respages in the ctxt and remove them from the
* respages array. They are our pages until the I/O
* completes.
*/
for (page_no = 0; page_no < rqstp->rq_resused; page_no++) {
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
ctxt->count++;
rqstp->rq_respages[page_no] = NULL;
}
BUG_ON(sge_no > rdma->sc_max_sge);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
send_wr.wr_id = (unsigned long)ctxt;
send_wr.sg_list = ctxt->sge;
send_wr.num_sge = sge_no;
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
ret = svc_rdma_send(rdma, &send_wr);
if (ret)
svc_rdma_put_context(ctxt, 1);
return ret;
}
void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
{
}
/*
* Return the start of an xdr buffer.
*/
static void *xdr_start(struct xdr_buf *xdr)
{
return xdr->head[0].iov_base -
(xdr->len -
xdr->page_len -
xdr->tail[0].iov_len -
xdr->head[0].iov_len);
}
int svc_rdma_sendto(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
struct rpcrdma_msg *rdma_argp;
struct rpcrdma_msg *rdma_resp;
struct rpcrdma_write_array *reply_ary;
enum rpcrdma_proc reply_type;
int ret;
int inline_bytes;
struct ib_sge *sge;
int sge_count = 0;
struct page *res_page;
struct svc_rdma_op_ctxt *ctxt;
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
/* Get the RDMA request header. */
rdma_argp = xdr_start(&rqstp->rq_arg);
/* Build an SGE for the XDR */
ctxt = svc_rdma_get_context(rdma);
ctxt->direction = DMA_TO_DEVICE;
sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count);
inline_bytes = rqstp->rq_res.len;
/* Create the RDMA response header */
res_page = svc_rdma_get_page();
rdma_resp = page_address(res_page);
reply_ary = svc_rdma_get_reply_array(rdma_argp);
if (reply_ary)
reply_type = RDMA_NOMSG;
else
reply_type = RDMA_MSG;
svc_rdma_xdr_encode_reply_header(rdma, rdma_argp,
rdma_resp, reply_type);
/* Send any write-chunk data and build resp write-list */
ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
rqstp, sge, sge_count);
if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
ret);
goto error;
}
inline_bytes -= ret;
/* Send any reply-list data and update resp reply-list */
ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
rqstp, sge, sge_count);
if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
ret);
goto error;
}
inline_bytes -= ret;
ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count,
inline_bytes);
dprintk("svcrdma: send_reply returns %d\n", ret);
return ret;
error:
svc_rdma_put_context(ctxt, 0);
put_page(res_page);
return ret;
}

File diff suppressed because it is too large Load diff