linux-stable/net/9p/client.c

2313 lines
54 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* 9P Client
*
* Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
* Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/idr.h>
#include <linux/mutex.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/slab.h>
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
#include <linux/uio.h>
#include <linux/netfs.h>
#include <net/9p/9p.h>
#include <linux/parser.h>
#include <linux/seq_file.h>
#include <net/9p/client.h>
#include <net/9p/transport.h>
#include "protocol.h"
#define CREATE_TRACE_POINTS
#include <trace/events/9p.h>
/* DEFAULT MSIZE = 32 pages worth of payload + P9_HDRSZ +
* room for write (16 extra) or read (11 extra) operands.
*/
#define DEFAULT_MSIZE ((128 * 1024) + P9_IOHDRSZ)
/* Client Option Parsing (code inspired by NFS code)
* - a little lazy - parse all client options
*/
enum {
Opt_msize,
Opt_trans,
Opt_legacy,
Opt_version,
Opt_err,
};
static const match_table_t tokens = {
{Opt_msize, "msize=%u"},
{Opt_legacy, "noextend"},
{Opt_trans, "trans=%s"},
{Opt_version, "version=%s"},
{Opt_err, NULL},
};
inline int p9_is_proto_dotl(struct p9_client *clnt)
{
return clnt->proto_version == p9_proto_2000L;
}
EXPORT_SYMBOL(p9_is_proto_dotl);
inline int p9_is_proto_dotu(struct p9_client *clnt)
{
return clnt->proto_version == p9_proto_2000u;
}
EXPORT_SYMBOL(p9_is_proto_dotu);
int p9_show_client_options(struct seq_file *m, struct p9_client *clnt)
{
if (clnt->msize != DEFAULT_MSIZE)
seq_printf(m, ",msize=%u", clnt->msize);
seq_printf(m, ",trans=%s", clnt->trans_mod->name);
switch (clnt->proto_version) {
case p9_proto_legacy:
seq_puts(m, ",noextend");
break;
case p9_proto_2000u:
seq_puts(m, ",version=9p2000.u");
break;
case p9_proto_2000L:
/* Default */
break;
}
if (clnt->trans_mod->show_options)
return clnt->trans_mod->show_options(m, clnt);
return 0;
}
EXPORT_SYMBOL(p9_show_client_options);
/* Some error codes are taken directly from the server replies,
* make sure they are valid.
*/
static int safe_errno(int err)
{
if (err > 0 || err < -MAX_ERRNO) {
p9_debug(P9_DEBUG_ERROR, "Invalid error code %d\n", err);
return -EPROTO;
}
return err;
}
/* Interpret mount option for protocol version */
static int get_protocol_version(char *s)
{
int version = -EINVAL;
if (!strcmp(s, "9p2000")) {
version = p9_proto_legacy;
p9_debug(P9_DEBUG_9P, "Protocol version: Legacy\n");
} else if (!strcmp(s, "9p2000.u")) {
version = p9_proto_2000u;
p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.u\n");
} else if (!strcmp(s, "9p2000.L")) {
version = p9_proto_2000L;
p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.L\n");
} else {
pr_info("Unknown protocol version %s\n", s);
}
return version;
}
/**
* parse_opts - parse mount options into client structure
* @opts: options string passed from mount
* @clnt: existing v9fs client information
*
* Return 0 upon success, -ERRNO upon failure
*/
static int parse_opts(char *opts, struct p9_client *clnt)
{
char *options, *tmp_options;
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
char *s;
int ret = 0;
clnt->proto_version = p9_proto_2000L;
clnt->msize = DEFAULT_MSIZE;
if (!opts)
return 0;
tmp_options = kstrdup(opts, GFP_KERNEL);
if (!tmp_options)
return -ENOMEM;
options = tmp_options;
while ((p = strsep(&options, ",")) != NULL) {
int token, r;
if (!*p)
continue;
token = match_token(p, tokens, args);
switch (token) {
case Opt_msize:
r = match_int(&args[0], &option);
if (r < 0) {
p9_debug(P9_DEBUG_ERROR,
"integer field, but no integer?\n");
ret = r;
continue;
}
if (option < 4096) {
p9_debug(P9_DEBUG_ERROR,
"msize should be at least 4k\n");
ret = -EINVAL;
continue;
}
clnt->msize = option;
break;
case Opt_trans:
s = match_strdup(&args[0]);
if (!s) {
ret = -ENOMEM;
p9_debug(P9_DEBUG_ERROR,
"problem allocating copy of trans arg\n");
goto free_and_return;
}
v9fs_put_trans(clnt->trans_mod);
clnt->trans_mod = v9fs_get_trans_by_name(s);
if (!clnt->trans_mod) {
pr_info("Could not find request transport: %s\n",
s);
ret = -EINVAL;
}
kfree(s);
break;
case Opt_legacy:
clnt->proto_version = p9_proto_legacy;
break;
case Opt_version:
s = match_strdup(&args[0]);
if (!s) {
ret = -ENOMEM;
p9_debug(P9_DEBUG_ERROR,
"problem allocating copy of version arg\n");
goto free_and_return;
}
r = get_protocol_version(s);
if (r < 0)
ret = r;
else
clnt->proto_version = r;
kfree(s);
break;
default:
continue;
}
}
free_and_return:
if (ret)
v9fs_put_trans(clnt->trans_mod);
kfree(tmp_options);
return ret;
}
static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
int alloc_msize)
{
if (likely(c->fcall_cache) && alloc_msize == c->msize) {
fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
fc->cache = c->fcall_cache;
} else {
fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
fc->cache = NULL;
}
if (!fc->sdata)
return -ENOMEM;
fc->capacity = alloc_msize;
return 0;
}
void p9_fcall_fini(struct p9_fcall *fc)
{
/* sdata can be NULL for interrupted requests in trans_rdma,
* and kmem_cache_free does not do NULL-check for us
*/
if (unlikely(!fc->sdata))
return;
if (fc->cache)
kmem_cache_free(fc->cache, fc->sdata);
else
kfree(fc->sdata);
}
EXPORT_SYMBOL(p9_fcall_fini);
static struct kmem_cache *p9_req_cache;
/**
* p9_tag_alloc - Allocate a new request.
* @c: Client session.
* @type: Transaction type.
* @t_size: Buffer size for holding this request
* (automatic calculation by format template if 0).
* @r_size: Buffer size for holding server's reply on this request
* (automatic calculation by format template if 0).
* @fmt: Format template for assembling 9p request message
* (see p9pdu_vwritef).
* @ap: Variable arguments to be fed to passed format template
* (see p9pdu_vwritef).
*
* Context: Process context.
* Return: Pointer to new request.
*/
static struct p9_req_t *
p9_tag_alloc(struct p9_client *c, int8_t type, uint t_size, uint r_size,
const char *fmt, va_list ap)
{
struct p9_req_t *req = kmem_cache_alloc(p9_req_cache, GFP_NOFS);
int alloc_tsize;
int alloc_rsize;
int tag;
va_list apc;
va_copy(apc, ap);
alloc_tsize = min_t(size_t, c->msize,
t_size ?: p9_msg_buf_size(c, type, fmt, apc));
va_end(apc);
alloc_rsize = min_t(size_t, c->msize,
r_size ?: p9_msg_buf_size(c, type + 1, fmt, ap));
if (!req)
return ERR_PTR(-ENOMEM);
if (p9_fcall_init(c, &req->tc, alloc_tsize))
goto free_req;
if (p9_fcall_init(c, &req->rc, alloc_rsize))
goto free;
p9pdu_reset(&req->tc);
p9pdu_reset(&req->rc);
9p: Transport error uninitialized The p9_tag_alloc() does not initialize the transport error t_err field. The struct p9_req_t *req is allocated and stored in a struct p9_client variable. The field t_err is never initialized before p9_conn_cancel() checks its value. KUMSAN(KernelUninitializedMemorySantizer, a new error detection tool) reports this bug. ================================================================== BUG: KUMSAN: use of uninitialized memory in p9_conn_cancel+0x2d9/0x3b0 Read of size 4 at addr ffff88805f9b600c by task kworker/1:2/1216 CPU: 1 PID: 1216 Comm: kworker/1:2 Not tainted 5.2.0-rc4+ #28 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 Workqueue: events p9_write_work Call Trace: dump_stack+0x75/0xae __kumsan_report+0x17c/0x3e6 kumsan_report+0xe/0x20 p9_conn_cancel+0x2d9/0x3b0 p9_write_work+0x183/0x4a0 process_one_work+0x4d1/0x8c0 worker_thread+0x6e/0x780 kthread+0x1ca/0x1f0 ret_from_fork+0x35/0x40 Allocated by task 1979: save_stack+0x19/0x80 __kumsan_kmalloc.constprop.3+0xbc/0x120 kmem_cache_alloc+0xa7/0x170 p9_client_prepare_req.part.9+0x3b/0x380 p9_client_rpc+0x15e/0x880 p9_client_create+0x3d0/0xac0 v9fs_session_init+0x192/0xc80 v9fs_mount+0x67/0x470 legacy_get_tree+0x70/0xd0 vfs_get_tree+0x4a/0x1c0 do_mount+0xba9/0xf90 ksys_mount+0xa8/0x120 __x64_sys_mount+0x62/0x70 do_syscall_64+0x6d/0x1e0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 0: (stack is not available) The buggy address belongs to the object at ffff88805f9b6008 which belongs to the cache p9_req_t of size 144 The buggy address is located 4 bytes inside of 144-byte region [ffff88805f9b6008, ffff88805f9b6098) The buggy address belongs to the page: page:ffffea00017e6d80 refcount:1 mapcount:0 mapping:ffff888068b63740 index:0xffff88805f9b7d90 compound_mapcount: 0 flags: 0x100000000010200(slab|head) raw: 0100000000010200 ffff888068b66450 ffff888068b66450 ffff888068b63740 raw: ffff88805f9b7d90 0000000000100001 00000001ffffffff 0000000000000000 page dumped because: kumsan: bad access detected ================================================================== Link: http://lkml.kernel.org/r/20190613070854.10434-1-shuaibinglu@126.com Signed-off-by: Lu Shuaibing <shuaibinglu@126.com> [dominique.martinet@cea.fr: grouped the added init with the others] Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
2019-06-13 07:08:54 +00:00
req->t_err = 0;
req->status = REQ_STATUS_ALLOC;
9p: set req refcount to zero to avoid uninitialized usage When a new request is allocated, the refcount will be zero if it is reused, but if the request is newly allocated from slab, it is not fully initialized before being added to idr. If the p9_read_work got a response before the refcount initiated. It will use a uninitialized req, which will result in a bad request data struct. Here is the logs from syzbot. Corrupted memory at 0xffff88807eade00b [ 0xff 0x07 0x00 0x00 0x00 0x00 0x00 0x00 . . . . . . . . ] (in kfence-#110): p9_fcall_fini net/9p/client.c:248 [inline] p9_req_put net/9p/client.c:396 [inline] p9_req_put+0x208/0x250 net/9p/client.c:390 p9_client_walk+0x247/0x540 net/9p/client.c:1165 clone_fid fs/9p/fid.h:21 [inline] v9fs_fid_xattr_set+0xe4/0x2b0 fs/9p/xattr.c:118 v9fs_xattr_set fs/9p/xattr.c:100 [inline] v9fs_xattr_handler_set+0x6f/0x120 fs/9p/xattr.c:159 __vfs_setxattr+0x119/0x180 fs/xattr.c:182 __vfs_setxattr_noperm+0x129/0x5f0 fs/xattr.c:216 __vfs_setxattr_locked+0x1d3/0x260 fs/xattr.c:277 vfs_setxattr+0x143/0x340 fs/xattr.c:309 setxattr+0x146/0x160 fs/xattr.c:617 path_setxattr+0x197/0x1c0 fs/xattr.c:636 __do_sys_setxattr fs/xattr.c:652 [inline] __se_sys_setxattr fs/xattr.c:648 [inline] __ia32_sys_setxattr+0xc0/0x160 fs/xattr.c:648 do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline] __do_fast_syscall_32+0x65/0xf0 arch/x86/entry/common.c:178 do_fast_syscall_32+0x33/0x70 arch/x86/entry/common.c:203 entry_SYSENTER_compat_after_hwframe+0x70/0x82 Below is a similar scenario, the scenario in the syzbot log looks more complicated than this one, but this patch can fix it. T21124 p9_read_work ======================== second trans ================================= p9_client_walk p9_client_rpc p9_client_prepare_req p9_tag_alloc req = kmem_cache_alloc(p9_req_cache, GFP_NOFS); tag = idr_alloc << preempted >> req->tc.tag = tag; /* req->[refcount/tag] == uninitialized */ m->rreq = p9_tag_lookup(m->client, m->rc.tag); /* increments uninitalized refcount */ refcount_set(&req->refcount, 2); /* cb drops one ref */ p9_client_cb(req) /* reader thread drops its ref: request is incorrectly freed */ p9_req_put(req) /* use after free and ref underflow */ p9_req_put(req) To fix it, we can initialize the refcount to zero before add to idr. Link: https://lkml.kernel.org/r/20221201033310.18589-1-schspa@gmail.com Cc: stable@vger.kernel.org # 6.0+ due to 6cda12864cb0 ("9p: Drop kref usage") Fixes: 728356dedeff ("9p: Add refcount to p9_req_t") Reported-by: syzbot+8f1060e2aaf8ca55220b@syzkaller.appspotmail.com Signed-off-by: Schspa Shi <schspa@gmail.com> Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com> Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
2022-12-01 03:33:10 +00:00
/* refcount needs to be set to 0 before inserting into the idr
* so p9_tag_lookup does not accept a request that is not fully
* initialized. refcount_set to 2 below will mark request ready.
*/
refcount_set(&req->refcount, 0);
init_waitqueue_head(&req->wq);
INIT_LIST_HEAD(&req->req_list);
idr_preload(GFP_NOFS);
spin_lock_irq(&c->lock);
if (type == P9_TVERSION)
tag = idr_alloc(&c->reqs, req, P9_NOTAG, P9_NOTAG + 1,
GFP_NOWAIT);
else
tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT);
req->tc.tag = tag;
spin_unlock_irq(&c->lock);
idr_preload_end();
if (tag < 0)
goto free;
/* Init ref to two because in the general case there is one ref
* that is put asynchronously by a writer thread, one ref
* temporarily given by p9_tag_lookup and put by p9_client_cb
* in the recv thread, and one ref put by p9_req_put in the
* main thread. The only exception is virtio that does not use
* p9_tag_lookup but does not have a writer thread either
* (the write happens synchronously in the request/zc_request
* callback), so p9_client_cb eats the second ref there
* as the pointer is duplicated directly by virtqueue_add_sgs()
*/
refcount_set(&req->refcount, 2);
return req;
free:
p9_fcall_fini(&req->tc);
p9_fcall_fini(&req->rc);
free_req:
kmem_cache_free(p9_req_cache, req);
return ERR_PTR(-ENOMEM);
}
/**
* p9_tag_lookup - Look up a request by tag.
* @c: Client session.
* @tag: Transaction ID.
*
* Context: Any context.
* Return: A request, or %NULL if there is no request with that tag.
*/
struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
{
struct p9_req_t *req;
rcu_read_lock();
again:
req = idr_find(&c->reqs, tag);
if (req) {
/* We have to be careful with the req found under rcu_read_lock
* Thanks to SLAB_TYPESAFE_BY_RCU we can safely try to get the
* ref again without corrupting other data, then check again
* that the tag matches once we have the ref
*/
if (!p9_req_try_get(req))
goto again;
if (req->tc.tag != tag) {
p9_req_put(c, req);
goto again;
}
}
rcu_read_unlock();
return req;
}
EXPORT_SYMBOL(p9_tag_lookup);
/**
* p9_tag_remove - Remove a tag.
* @c: Client session.
* @r: Request of reference.
*
* Context: Any context.
*/
static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
{
unsigned long flags;
u16 tag = r->tc.tag;
p9_debug(P9_DEBUG_MUX, "freeing clnt %p req %p tag: %d\n", c, r, tag);
spin_lock_irqsave(&c->lock, flags);
idr_remove(&c->reqs, tag);
spin_unlock_irqrestore(&c->lock, flags);
}
int p9_req_put(struct p9_client *c, struct p9_req_t *r)
{
if (refcount_dec_and_test(&r->refcount)) {
p9_tag_remove(c, r);
p9_fcall_fini(&r->tc);
p9_fcall_fini(&r->rc);
kmem_cache_free(p9_req_cache, r);
return 1;
}
return 0;
}
EXPORT_SYMBOL(p9_req_put);
/**
* p9_tag_cleanup - cleans up tags structure and reclaims resources
* @c: v9fs client struct
*
* This frees resources associated with the tags structure
*
*/
static void p9_tag_cleanup(struct p9_client *c)
{
struct p9_req_t *req;
int id;
rcu_read_lock();
idr_for_each_entry(&c->reqs, req, id) {
pr_info("Tag %d still in use\n", id);
if (p9_req_put(c, req) == 0)
pr_warn("Packet with tag %d has still references",
req->tc.tag);
}
rcu_read_unlock();
}
/**
* p9_client_cb - call back from transport to client
net: 9p: Fix kerneldoc warnings of missing parameters etc net/9p/client.c:420: warning: Function parameter or member 'c' not described in 'p9_client_cb' net/9p/client.c:420: warning: Function parameter or member 'req' not described in 'p9_client_cb' net/9p/client.c:420: warning: Function parameter or member 'status' not described in 'p9_client_cb' net/9p/client.c:568: warning: Function parameter or member 'uidata' not described in 'p9_check_zc_errors' net/9p/trans_common.c:23: warning: Function parameter or member 'nr_pages' not described in 'p9_release_pages' net/9p/trans_common.c:23: warning: Function parameter or member 'pages' not described in 'p9_release_pages' net/9p/trans_fd.c:132: warning: Function parameter or member 'rreq' not described in 'p9_conn' net/9p/trans_fd.c:132: warning: Function parameter or member 'wreq' not described in 'p9_conn' net/9p/trans_fd.c:56: warning: Function parameter or member 'privport' not described in 'p9_fd_opts' net/9p/trans_rdma.c:113: warning: Function parameter or member 'cqe' not described in 'p9_rdma_context' net/9p/trans_rdma.c:129: warning: Function parameter or member 'privport' not described in 'p9_rdma_opts' net/9p/trans_virtio.c:215: warning: Function parameter or member 'limit' not described in 'pack_sg_list_p' net/9p/trans_virtio.c:83: warning: Function parameter or member 'chan_list' not described in 'virtio_chan' net/9p/trans_virtio.c:83: warning: Function parameter or member 'p9_max_pages' not described in 'virtio_chan' net/9p/trans_virtio.c:83: warning: Function parameter or member 'ring_bufs_avail' not described in 'virtio_chan' net/9p/trans_virtio.c:83: warning: Function parameter or member 'tag' not described in 'virtio_chan' net/9p/trans_virtio.c:83: warning: Function parameter or member 'vc_wq' not described in 'virtio_chan' Signed-off-by: Andrew Lunn <andrew@lunn.ch> Acked-by: Dominique Martinet <asmadeus@codewreck.org> Link: https://lore.kernel.org/r/20201031182655.1082065-1-andrew@lunn.ch Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:26:55 +00:00
* @c: client state
* @req: request received
* @status: request status, one of REQ_STATUS_*
*
*/
void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
{
p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag);
/* This barrier is needed to make sure any change made to req before
* the status change is visible to another thread
*/
smp_wmb();
WRITE_ONCE(req->status, status);
wake_up(&req->wq);
p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
p9_req_put(c, req);
}
EXPORT_SYMBOL(p9_client_cb);
/**
* p9_parse_header - parse header arguments out of a packet
* @pdu: packet to parse
* @size: size of packet
* @type: type of request
* @tag: tag of packet
* @rewind: set if we need to rewind offset afterwards
*/
int
p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type,
int16_t *tag, int rewind)
{
s8 r_type;
s16 r_tag;
s32 r_size;
int offset = pdu->offset;
int err;
pdu->offset = 0;
err = p9pdu_readf(pdu, 0, "dbw", &r_size, &r_type, &r_tag);
if (err)
goto rewind_and_exit;
if (type)
*type = r_type;
if (tag)
*tag = r_tag;
if (size)
*size = r_size;
if (pdu->size != r_size || r_size < 7) {
err = -EINVAL;
goto rewind_and_exit;
}
pdu->id = r_type;
pdu->tag = r_tag;
p9_debug(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n",
pdu->size, pdu->id, pdu->tag);
rewind_and_exit:
if (rewind)
pdu->offset = offset;
return err;
}
EXPORT_SYMBOL(p9_parse_header);
/**
* p9_check_errors - check 9p packet for error return and process it
* @c: current client instance
* @req: request to parse and check for error conditions
*
* returns error code if one is discovered, otherwise returns 0
*
* this will have to be more complicated if we have multiple
* error packet types
*/
static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
{
s8 type;
int err;
int ecode;
err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
if (req->rc.size > req->rc.capacity && !req->rc.zc) {
pr_err("requested packet size too big: %d does not fit %zu (type=%d)\n",
req->rc.size, req->rc.capacity, req->rc.id);
return -EIO;
}
/* dump the response from server
* This should be after check errors which poplulate pdu_fcall.
*/
trace_9p_protocol_dump(c, &req->rc);
if (err) {
p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
return err;
}
if (type != P9_RERROR && type != P9_RLERROR)
return 0;
if (!p9_is_proto_dotl(c)) {
char *ename = NULL;
err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
&ename, &ecode);
if (err) {
kfree(ename);
goto out_err;
}
remove lots of IS_ERR_VALUE abuses Most users of IS_ERR_VALUE() in the kernel are wrong, as they pass an 'int' into a function that takes an 'unsigned long' argument. This happens to work because the type is sign-extended on 64-bit architectures before it gets converted into an unsigned type. However, anything that passes an 'unsigned short' or 'unsigned int' argument into IS_ERR_VALUE() is guaranteed to be broken, as are 8-bit integers and types that are wider than 'unsigned long'. Andrzej Hajda has already fixed a lot of the worst abusers that were causing actual bugs, but it would be nice to prevent any users that are not passing 'unsigned long' arguments. This patch changes all users of IS_ERR_VALUE() that I could find on 32-bit ARM randconfig builds and x86 allmodconfig. For the moment, this doesn't change the definition of IS_ERR_VALUE() because there are probably still architecture specific users elsewhere. Almost all the warnings I got are for files that are better off using 'if (err)' or 'if (err < 0)'. The only legitimate user I could find that we get a warning for is the (32-bit only) freescale fman driver, so I did not remove the IS_ERR_VALUE() there but changed the type to 'unsigned long'. For 9pfs, I just worked around one user whose calling conventions are so obscure that I did not dare change the behavior. I was using this definition for testing: #define IS_ERR_VALUE(x) ((unsigned long*)NULL == (typeof (x)*)NULL && \ unlikely((unsigned long long)(x) >= (unsigned long long)(typeof(x))-MAX_ERRNO)) which ends up making all 16-bit or wider types work correctly with the most plausible interpretation of what IS_ERR_VALUE() was supposed to return according to its users, but also causes a compile-time warning for any users that do not pass an 'unsigned long' argument. I suggested this approach earlier this year, but back then we ended up deciding to just fix the users that are obviously broken. After the initial warning that caused me to get involved in the discussion (fs/gfs2/dir.c) showed up again in the mainline kernel, Linus asked me to send the whole thing again. [ Updated the 9p parts as per Al Viro - Linus ] Signed-off-by: Arnd Bergmann <arnd@arndb.de> Cc: Andrzej Hajda <a.hajda@samsung.com> Cc: Andrew Morton <akpm@linux-foundation.org> Link: https://lkml.org/lkml/2016/1/7/363 Link: https://lkml.org/lkml/2016/5/27/486 Acked-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org> # For nvmem part Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-05-27 21:23:25 +00:00
if (p9_is_proto_dotu(c) && ecode < 512)
err = -ecode;
remove lots of IS_ERR_VALUE abuses Most users of IS_ERR_VALUE() in the kernel are wrong, as they pass an 'int' into a function that takes an 'unsigned long' argument. This happens to work because the type is sign-extended on 64-bit architectures before it gets converted into an unsigned type. However, anything that passes an 'unsigned short' or 'unsigned int' argument into IS_ERR_VALUE() is guaranteed to be broken, as are 8-bit integers and types that are wider than 'unsigned long'. Andrzej Hajda has already fixed a lot of the worst abusers that were causing actual bugs, but it would be nice to prevent any users that are not passing 'unsigned long' arguments. This patch changes all users of IS_ERR_VALUE() that I could find on 32-bit ARM randconfig builds and x86 allmodconfig. For the moment, this doesn't change the definition of IS_ERR_VALUE() because there are probably still architecture specific users elsewhere. Almost all the warnings I got are for files that are better off using 'if (err)' or 'if (err < 0)'. The only legitimate user I could find that we get a warning for is the (32-bit only) freescale fman driver, so I did not remove the IS_ERR_VALUE() there but changed the type to 'unsigned long'. For 9pfs, I just worked around one user whose calling conventions are so obscure that I did not dare change the behavior. I was using this definition for testing: #define IS_ERR_VALUE(x) ((unsigned long*)NULL == (typeof (x)*)NULL && \ unlikely((unsigned long long)(x) >= (unsigned long long)(typeof(x))-MAX_ERRNO)) which ends up making all 16-bit or wider types work correctly with the most plausible interpretation of what IS_ERR_VALUE() was supposed to return according to its users, but also causes a compile-time warning for any users that do not pass an 'unsigned long' argument. I suggested this approach earlier this year, but back then we ended up deciding to just fix the users that are obviously broken. After the initial warning that caused me to get involved in the discussion (fs/gfs2/dir.c) showed up again in the mainline kernel, Linus asked me to send the whole thing again. [ Updated the 9p parts as per Al Viro - Linus ] Signed-off-by: Arnd Bergmann <arnd@arndb.de> Cc: Andrzej Hajda <a.hajda@samsung.com> Cc: Andrew Morton <akpm@linux-foundation.org> Link: https://lkml.org/lkml/2016/1/7/363 Link: https://lkml.org/lkml/2016/5/27/486 Acked-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org> # For nvmem part Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-05-27 21:23:25 +00:00
if (!err) {
err = p9_errstr2errno(ename, strlen(ename));
p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n",
-ecode, ename);
}
kfree(ename);
} else {
err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
if (err)
goto out_err;
err = -ecode;
p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
}
return err;
out_err:
p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
return err;
}
static struct p9_req_t *
p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
/**
* p9_client_flush - flush (cancel) a request
* @c: client state
* @oldreq: request to cancel
*
* This sents a flush for a particular request and links
* the flush request to the original request. The current
* code only supports a single flush request although the protocol
* allows for multiple flush requests to be sent for a single request.
*
*/
static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
{
struct p9_req_t *req;
s16 oldtag;
int err;
err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1);
if (err)
return err;
p9_debug(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag);
req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag);
if (IS_ERR(req))
return PTR_ERR(req);
/* if we haven't received a response for oldreq,
* remove it from the list
*/
if (READ_ONCE(oldreq->status) == REQ_STATUS_SENT) {
if (c->trans_mod->cancelled)
c->trans_mod->cancelled(c, oldreq);
}
p9_req_put(c, req);
return 0;
}
static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
int8_t type, uint t_size, uint r_size,
const char *fmt, va_list ap)
{
int err;
struct p9_req_t *req;
va_list apc;
p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type);
/* we allow for any status other than disconnected */
if (c->status == Disconnected)
return ERR_PTR(-EIO);
/* if status is begin_disconnected we allow only clunk request */
if (c->status == BeginDisconnect && type != P9_TCLUNK)
return ERR_PTR(-EIO);
va_copy(apc, ap);
req = p9_tag_alloc(c, type, t_size, r_size, fmt, apc);
va_end(apc);
if (IS_ERR(req))
return req;
/* marshall the data */
p9pdu_prepare(&req->tc, req->tc.tag, type);
err = p9pdu_vwritef(&req->tc, c->proto_version, fmt, ap);
if (err)
goto reterr;
p9pdu_finalize(c, &req->tc);
trace_9p_client_req(c, type, req->tc.tag);
return req;
reterr:
p9_req_put(c, req);
/* We have to put also the 2nd reference as it won't be used */
p9_req_put(c, req);
return ERR_PTR(err);
}
/**
* p9_client_rpc - issue a request and wait for a response
* @c: client session
* @type: type of request
* @fmt: protocol format string (see protocol.c)
*
* Returns request structure (which client must free using p9_req_put)
*/
static struct p9_req_t *
p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
{
va_list ap;
int sigpending, err;
unsigned long flags;
struct p9_req_t *req;
/* Passing zero for tsize/rsize to p9_client_prepare_req() tells it to
* auto determine an appropriate (small) request/response size
* according to actual message data being sent. Currently RDMA
* transport is excluded from this response message size optimization,
* as it would not cope with it, due to its pooled response buffers
* (using an optimized request size for RDMA as well though).
*/
const uint tsize = 0;
const uint rsize = c->trans_mod->pooled_rbuffers ? c->msize : 0;
va_start(ap, fmt);
req = p9_client_prepare_req(c, type, tsize, rsize, fmt, ap);
va_end(ap);
if (IS_ERR(req))
return req;
req->tc.zc = false;
req->rc.zc = false;
if (signal_pending(current)) {
sigpending = 1;
clear_thread_flag(TIF_SIGPENDING);
} else {
sigpending = 0;
}
err = c->trans_mod->request(c, req);
if (err < 0) {
/* write won't happen */
p9_req_put(c, req);
if (err != -ERESTARTSYS && err != -EFAULT)
c->status = Disconnected;
net/9p: avoid -ERESTARTSYS leak to userspace If it was interrupted by a signal, the 9p client may need to send some more requests to the server for cleanup before returning to userspace. To avoid such a last minute request to be interrupted right away, the client memorizes if a signal is pending, clears TIF_SIGPENDING, handles the request and calls recalc_sigpending() before returning. Unfortunately, if the transmission of this cleanup request fails for any reason, the transport returns an error and the client propagates it right away, without calling recalc_sigpending(). This ends up with -ERESTARTSYS from the initially interrupted request crawling up to syscall exit, with TIF_SIGPENDING cleared by the cleanup request. The specific signal handling code, which is responsible for converting -ERESTARTSYS to -EINTR is not called, and userspace receives the confusing errno value: open: Unknown error 512 (512) This is really hard to hit in real life. I discovered the issue while working on hot-unplug of a virtio-9p-pci device with an instrumented QEMU allowing to control request completion. Both p9_client_zc_rpc() and p9_client_rpc() functions have this buggy error path actually. Their code flow is a bit obscure and the best thing to do would probably be a full rewrite: to really ensure this situation of clearing TIF_SIGPENDING and returning -ERESTARTSYS can never happen. But given the general lack of interest for the 9p code, I won't risk breaking more things. So this patch simply fixes the buggy paths in both functions with a trivial label+goto. Thanks to Laurent Dufour for his help and suggestions on how to find the root cause and how to fix it. Link: http://lkml.kernel.org/r/152062809886.10599.7361006774123053312.stgit@bahia.lan Signed-off-by: Greg Kurz <groug@kaod.org> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com> Cc: Eric Van Hensbergen <ericvh@gmail.com> Cc: Ron Minnich <rminnich@sandia.gov> Cc: Latchesar Ionkov <lucho@ionkov.net> Cc: David Miller <davem@davemloft.net> Cc: Laurent Dufour <ldufour@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-04-05 23:19:44 +00:00
goto recalc_sigpending;
}
again:
/* Wait for the response */
err = wait_event_killable(req->wq,
READ_ONCE(req->status) >= REQ_STATUS_RCVD);
/* Make sure our req is coherent with regard to updates in other
* threads - echoes to wmb() in the callback
*/
smp_rmb();
if (err == -ERESTARTSYS && c->status == Connected &&
type == P9_TFLUSH) {
sigpending = 1;
clear_thread_flag(TIF_SIGPENDING);
goto again;
}
if (READ_ONCE(req->status) == REQ_STATUS_ERROR) {
p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
err = req->t_err;
}
if (err == -ERESTARTSYS && c->status == Connected) {
p9_debug(P9_DEBUG_MUX, "flushing\n");
sigpending = 1;
clear_thread_flag(TIF_SIGPENDING);
if (c->trans_mod->cancel(c, req))
p9_client_flush(c, req);
/* if we received the response anyway, don't signal error */
if (READ_ONCE(req->status) == REQ_STATUS_RCVD)
err = 0;
}
net/9p: avoid -ERESTARTSYS leak to userspace If it was interrupted by a signal, the 9p client may need to send some more requests to the server for cleanup before returning to userspace. To avoid such a last minute request to be interrupted right away, the client memorizes if a signal is pending, clears TIF_SIGPENDING, handles the request and calls recalc_sigpending() before returning. Unfortunately, if the transmission of this cleanup request fails for any reason, the transport returns an error and the client propagates it right away, without calling recalc_sigpending(). This ends up with -ERESTARTSYS from the initially interrupted request crawling up to syscall exit, with TIF_SIGPENDING cleared by the cleanup request. The specific signal handling code, which is responsible for converting -ERESTARTSYS to -EINTR is not called, and userspace receives the confusing errno value: open: Unknown error 512 (512) This is really hard to hit in real life. I discovered the issue while working on hot-unplug of a virtio-9p-pci device with an instrumented QEMU allowing to control request completion. Both p9_client_zc_rpc() and p9_client_rpc() functions have this buggy error path actually. Their code flow is a bit obscure and the best thing to do would probably be a full rewrite: to really ensure this situation of clearing TIF_SIGPENDING and returning -ERESTARTSYS can never happen. But given the general lack of interest for the 9p code, I won't risk breaking more things. So this patch simply fixes the buggy paths in both functions with a trivial label+goto. Thanks to Laurent Dufour for his help and suggestions on how to find the root cause and how to fix it. Link: http://lkml.kernel.org/r/152062809886.10599.7361006774123053312.stgit@bahia.lan Signed-off-by: Greg Kurz <groug@kaod.org> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com> Cc: Eric Van Hensbergen <ericvh@gmail.com> Cc: Ron Minnich <rminnich@sandia.gov> Cc: Latchesar Ionkov <lucho@ionkov.net> Cc: David Miller <davem@davemloft.net> Cc: Laurent Dufour <ldufour@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-04-05 23:19:44 +00:00
recalc_sigpending:
if (sigpending) {
spin_lock_irqsave(&current->sighand->siglock, flags);
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, flags);
}
if (err < 0)
goto reterr;
err = p9_check_errors(c, req);
trace_9p_client_res(c, type, req->rc.tag, err);
if (!err)
return req;
reterr:
p9_req_put(c, req);
return ERR_PTR(safe_errno(err));
}
/**
* p9_client_zc_rpc - issue a request and wait for a response
* @c: client session
* @type: type of request
* @uidata: destination for zero copy read
* @uodata: source for zero copy write
* @inlen: read buffer size
* @olen: write buffer size
* @in_hdrlen: reader header size, This is the size of response protocol data
* @fmt: protocol format string (see protocol.c)
*
* Returns request structure (which client must free using p9_req_put)
*/
static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
struct iov_iter *uidata,
struct iov_iter *uodata,
int inlen, int olen, int in_hdrlen,
const char *fmt, ...)
{
va_list ap;
int sigpending, err;
unsigned long flags;
struct p9_req_t *req;
va_start(ap, fmt);
/* We allocate a inline protocol data of only 4k bytes.
* The actual content is passed in zero-copy fashion.
*/
req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, P9_ZC_HDR_SZ, fmt, ap);
va_end(ap);
if (IS_ERR(req))
return req;
req->tc.zc = true;
req->rc.zc = true;
if (signal_pending(current)) {
sigpending = 1;
clear_thread_flag(TIF_SIGPENDING);
} else {
sigpending = 0;
}
err = c->trans_mod->zc_request(c, req, uidata, uodata,
inlen, olen, in_hdrlen);
if (err < 0) {
if (err == -EIO)
c->status = Disconnected;
if (err != -ERESTARTSYS)
net/9p: avoid -ERESTARTSYS leak to userspace If it was interrupted by a signal, the 9p client may need to send some more requests to the server for cleanup before returning to userspace. To avoid such a last minute request to be interrupted right away, the client memorizes if a signal is pending, clears TIF_SIGPENDING, handles the request and calls recalc_sigpending() before returning. Unfortunately, if the transmission of this cleanup request fails for any reason, the transport returns an error and the client propagates it right away, without calling recalc_sigpending(). This ends up with -ERESTARTSYS from the initially interrupted request crawling up to syscall exit, with TIF_SIGPENDING cleared by the cleanup request. The specific signal handling code, which is responsible for converting -ERESTARTSYS to -EINTR is not called, and userspace receives the confusing errno value: open: Unknown error 512 (512) This is really hard to hit in real life. I discovered the issue while working on hot-unplug of a virtio-9p-pci device with an instrumented QEMU allowing to control request completion. Both p9_client_zc_rpc() and p9_client_rpc() functions have this buggy error path actually. Their code flow is a bit obscure and the best thing to do would probably be a full rewrite: to really ensure this situation of clearing TIF_SIGPENDING and returning -ERESTARTSYS can never happen. But given the general lack of interest for the 9p code, I won't risk breaking more things. So this patch simply fixes the buggy paths in both functions with a trivial label+goto. Thanks to Laurent Dufour for his help and suggestions on how to find the root cause and how to fix it. Link: http://lkml.kernel.org/r/152062809886.10599.7361006774123053312.stgit@bahia.lan Signed-off-by: Greg Kurz <groug@kaod.org> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com> Cc: Eric Van Hensbergen <ericvh@gmail.com> Cc: Ron Minnich <rminnich@sandia.gov> Cc: Latchesar Ionkov <lucho@ionkov.net> Cc: David Miller <davem@davemloft.net> Cc: Laurent Dufour <ldufour@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-04-05 23:19:44 +00:00
goto recalc_sigpending;
}
if (READ_ONCE(req->status) == REQ_STATUS_ERROR) {
p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
err = req->t_err;
}
if (err == -ERESTARTSYS && c->status == Connected) {
p9_debug(P9_DEBUG_MUX, "flushing\n");
sigpending = 1;
clear_thread_flag(TIF_SIGPENDING);
if (c->trans_mod->cancel(c, req))
p9_client_flush(c, req);
/* if we received the response anyway, don't signal error */
if (READ_ONCE(req->status) == REQ_STATUS_RCVD)
err = 0;
}
net/9p: avoid -ERESTARTSYS leak to userspace If it was interrupted by a signal, the 9p client may need to send some more requests to the server for cleanup before returning to userspace. To avoid such a last minute request to be interrupted right away, the client memorizes if a signal is pending, clears TIF_SIGPENDING, handles the request and calls recalc_sigpending() before returning. Unfortunately, if the transmission of this cleanup request fails for any reason, the transport returns an error and the client propagates it right away, without calling recalc_sigpending(). This ends up with -ERESTARTSYS from the initially interrupted request crawling up to syscall exit, with TIF_SIGPENDING cleared by the cleanup request. The specific signal handling code, which is responsible for converting -ERESTARTSYS to -EINTR is not called, and userspace receives the confusing errno value: open: Unknown error 512 (512) This is really hard to hit in real life. I discovered the issue while working on hot-unplug of a virtio-9p-pci device with an instrumented QEMU allowing to control request completion. Both p9_client_zc_rpc() and p9_client_rpc() functions have this buggy error path actually. Their code flow is a bit obscure and the best thing to do would probably be a full rewrite: to really ensure this situation of clearing TIF_SIGPENDING and returning -ERESTARTSYS can never happen. But given the general lack of interest for the 9p code, I won't risk breaking more things. So this patch simply fixes the buggy paths in both functions with a trivial label+goto. Thanks to Laurent Dufour for his help and suggestions on how to find the root cause and how to fix it. Link: http://lkml.kernel.org/r/152062809886.10599.7361006774123053312.stgit@bahia.lan Signed-off-by: Greg Kurz <groug@kaod.org> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com> Cc: Eric Van Hensbergen <ericvh@gmail.com> Cc: Ron Minnich <rminnich@sandia.gov> Cc: Latchesar Ionkov <lucho@ionkov.net> Cc: David Miller <davem@davemloft.net> Cc: Laurent Dufour <ldufour@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-04-05 23:19:44 +00:00
recalc_sigpending:
if (sigpending) {
spin_lock_irqsave(&current->sighand->siglock, flags);
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, flags);
}
if (err < 0)
goto reterr;
9p: handling Rerror without copy_from_iter_full() p9_client_zc_rpc()/p9_check_zc_errors() are playing fast and loose with copy_from_iter_full(). Reading from file is done by sending Tread request. Response consists of fixed-sized header (including the amount of data actually read) followed by the data itself. For zero-copy case we arrange the things so that the first 11 bytes of reply go into the fixed-sized buffer, with the rest going straight into the pages we want to read into. What makes the things inconvenient is that sglist describing what should go where has to be set *before* the reply arrives. As the result, if reply is an error, the things get interesting. On success we get size[4] Rread tag[2] count[4] data[count] For error layout varies depending upon the protocol variant - in original 9P and 9P2000 it's size[4] Rerror tag[2] len[2] error[len] in 9P2000.U size[4] Rerror tag[2] len[2] error[len] errno[4] in 9P2000.L size[4] Rlerror tag[2] errno[4] The last case is nice and simple - we have an 11-byte response that fits into the fixed-sized buffer we hoped to get an Rread into. In other two, though, we get a variable-length string spill into the pages we'd prepared for the data to be read. Had that been in fixed-sized buffer (which is actually 4K), we would've dealt with that the same way we handle non-zerocopy case. However, for zerocopy it doesn't end up there, so we need to copy it from those pages. The trouble is, by the time we get around to that, the references to pages in question are already dropped. As the result, p9_zc_check_errors() tries to get the data using copy_from_iter_full(). Unfortunately, the iov_iter it's trying to read from might *NOT* be capable of that. It is, after all, a data destination, not data source. In particular, if it's an ITER_PIPE one, copy_from_iter_full() will simply fail. In ->zc_request() itself we do have those pages and dealing with the problem in there would be a simple matter of memcpy_from_page() into the fixed-sized buffer. Moreover, it isn't hard to recognize the (rare) case when such copying is needed. That way we get rid of p9_zc_check_errors() entirely - p9_check_errors() can be used instead both for zero-copy and non-zero-copy cases. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2022-06-09 14:01:34 +00:00
err = p9_check_errors(c, req);
trace_9p_client_res(c, type, req->rc.tag, err);
if (!err)
return req;
reterr:
p9_req_put(c, req);
return ERR_PTR(safe_errno(err));
}
static struct p9_fid *p9_fid_create(struct p9_client *clnt)
{
int ret;
struct p9_fid *fid;
p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt);
net/9p: Initialize the iounit field during fid creation Ensure that the fid's iounit field is set to zero when a new fid is created. Certain 9P operations, such as OPEN and CREATE, allow the server to reply with an iounit size which the client code assigns to the p9_fid struct shortly after the fid is created by p9_fid_create(). On the other hand, an XATTRWALK operation doesn't allow for the server to specify an iounit value. The iounit field of the newly allocated p9_fid struct remained uninitialized in that case. Depending on allocation patterns, the iounit value could have been something reasonable that was carried over from previously freed fids or, in the worst case, could have been arbitrary values from non-fid related usages of the memory location. The bug was detected in the Windows Subsystem for Linux 2 (WSL2) kernel after the uninitialized iounit field resulted in the typical sequence of two getxattr(2) syscalls, one to get the size of an xattr and another after allocating a sufficiently sized buffer to fit the xattr value, to hit an unexpected ERANGE error in the second call to getxattr(2). An uninitialized iounit field would sometimes force rsize to be smaller than the xattr value size in p9_client_read_once() and the 9P server in WSL refused to chunk up the READ on the attr_fid and, instead, returned ERANGE to the client. The virtfs server in QEMU seems happy to chunk up the READ and this problem goes undetected there. Link: https://lkml.kernel.org/r/20220710141402.803295-1-tyhicks@linux.microsoft.com Fixes: ebf46264a004 ("fs/9p: Add support user. xattr") Cc: stable@vger.kernel.org Signed-off-by: Tyler Hicks <tyhicks@linux.microsoft.com> Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com> Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
2022-07-10 14:14:02 +00:00
fid = kzalloc(sizeof(*fid), GFP_KERNEL);
if (!fid)
return NULL;
fid->mode = -1;
fid->uid = current_fsuid();
fid->clnt = clnt;
refcount_set(&fid->count, 1);
idr_preload(GFP_KERNEL);
spin_lock_irq(&clnt->lock);
ret = idr_alloc_u32(&clnt->fids, fid, &fid->fid, P9_NOFID - 1,
GFP_NOWAIT);
spin_unlock_irq(&clnt->lock);
idr_preload_end();
if (!ret) {
trace_9p_fid_ref(fid, P9_FID_REF_CREATE);
return fid;
}
kfree(fid);
return NULL;
}
static void p9_fid_destroy(struct p9_fid *fid)
{
struct p9_client *clnt;
unsigned long flags;
p9_debug(P9_DEBUG_FID, "fid %d\n", fid->fid);
trace_9p_fid_ref(fid, P9_FID_REF_DESTROY);
clnt = fid->clnt;
spin_lock_irqsave(&clnt->lock, flags);
idr_remove(&clnt->fids, fid->fid);
spin_unlock_irqrestore(&clnt->lock, flags);
kfree(fid->rdir);
kfree(fid);
}
/* We also need to export tracepoint symbols for tracepoint_enabled() */
EXPORT_TRACEPOINT_SYMBOL(9p_fid_ref);
void do_trace_9p_fid_get(struct p9_fid *fid)
{
trace_9p_fid_ref(fid, P9_FID_REF_GET);
}
EXPORT_SYMBOL(do_trace_9p_fid_get);
void do_trace_9p_fid_put(struct p9_fid *fid)
{
trace_9p_fid_ref(fid, P9_FID_REF_PUT);
}
EXPORT_SYMBOL(do_trace_9p_fid_put);
static int p9_client_version(struct p9_client *c)
{
int err;
struct p9_req_t *req;
char *version = NULL;
int msize;
p9_debug(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n",
c->msize, c->proto_version);
switch (c->proto_version) {
case p9_proto_2000L:
req = p9_client_rpc(c, P9_TVERSION, "ds",
c->msize, "9P2000.L");
break;
case p9_proto_2000u:
req = p9_client_rpc(c, P9_TVERSION, "ds",
c->msize, "9P2000.u");
break;
case p9_proto_legacy:
req = p9_client_rpc(c, P9_TVERSION, "ds",
c->msize, "9P2000");
break;
default:
return -EINVAL;
}
if (IS_ERR(req))
return PTR_ERR(req);
err = p9pdu_readf(&req->rc, c->proto_version, "ds", &msize, &version);
if (err) {
p9_debug(P9_DEBUG_9P, "version error %d\n", err);
trace_9p_protocol_dump(c, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
if (!strncmp(version, "9P2000.L", 8)) {
c->proto_version = p9_proto_2000L;
} else if (!strncmp(version, "9P2000.u", 8)) {
c->proto_version = p9_proto_2000u;
} else if (!strncmp(version, "9P2000", 6)) {
c->proto_version = p9_proto_legacy;
} else {
p9_debug(P9_DEBUG_ERROR,
"server returned an unknown version: %s\n", version);
err = -EREMOTEIO;
goto error;
}
if (msize < 4096) {
p9_debug(P9_DEBUG_ERROR,
"server returned a msize < 4096: %d\n", msize);
err = -EREMOTEIO;
goto error;
}
if (msize < c->msize)
c->msize = msize;
error:
kfree(version);
p9_req_put(c, req);
return err;
}
struct p9_client *p9_client_create(const char *dev_name, char *options)
{
int err;
struct p9_client *clnt;
char *client_id;
clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
if (!clnt)
return ERR_PTR(-ENOMEM);
clnt->trans_mod = NULL;
clnt->trans = NULL;
clnt->fcall_cache = NULL;
client_id = utsname()->nodename;
memcpy(clnt->name, client_id, strlen(client_id) + 1);
spin_lock_init(&clnt->lock);
idr_init(&clnt->fids);
idr_init(&clnt->reqs);
err = parse_opts(options, clnt);
if (err < 0)
goto free_client;
if (!clnt->trans_mod)
clnt->trans_mod = v9fs_get_default_trans();
if (!clnt->trans_mod) {
err = -EPROTONOSUPPORT;
p9_debug(P9_DEBUG_ERROR,
"No transport defined or default transport\n");
goto free_client;
}
p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n",
clnt, clnt->trans_mod, clnt->msize, clnt->proto_version);
err = clnt->trans_mod->create(clnt, dev_name, options);
if (err)
goto put_trans;
if (clnt->msize > clnt->trans_mod->maxsize) {
clnt->msize = clnt->trans_mod->maxsize;
pr_info("Limiting 'msize' to %d as this is the maximum "
"supported by transport %s\n",
clnt->msize, clnt->trans_mod->name
);
}
if (clnt->msize < 4096) {
p9_debug(P9_DEBUG_ERROR,
"Please specify a msize of at least 4k\n");
err = -EINVAL;
goto close_trans;
}
err = p9_client_version(clnt);
if (err)
goto close_trans;
/* P9_HDRSZ + 4 is the smallest packet header we can have that is
* followed by data accessed from userspace by read
*/
clnt->fcall_cache =
kmem_cache_create_usercopy("9p-fcall-cache", clnt->msize,
0, 0, P9_HDRSZ + 4,
clnt->msize - (P9_HDRSZ + 4),
NULL);
return clnt;
close_trans:
clnt->trans_mod->close(clnt);
put_trans:
v9fs_put_trans(clnt->trans_mod);
free_client:
kfree(clnt);
return ERR_PTR(err);
}
EXPORT_SYMBOL(p9_client_create);
void p9_client_destroy(struct p9_client *clnt)
{
struct p9_fid *fid;
int id;
p9_debug(P9_DEBUG_MUX, "clnt %p\n", clnt);
if (clnt->trans_mod)
clnt->trans_mod->close(clnt);
v9fs_put_trans(clnt->trans_mod);
idr_for_each_entry(&clnt->fids, fid, id) {
pr_info("Found fid %d not clunked\n", fid->fid);
p9_fid_destroy(fid);
}
p9_tag_cleanup(clnt);
kmem_cache_destroy(clnt->fcall_cache);
kfree(clnt);
}
EXPORT_SYMBOL(p9_client_destroy);
void p9_client_disconnect(struct p9_client *clnt)
{
p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt);
clnt->status = Disconnected;
}
EXPORT_SYMBOL(p9_client_disconnect);
void p9_client_begin_disconnect(struct p9_client *clnt)
{
p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt);
clnt->status = BeginDisconnect;
}
EXPORT_SYMBOL(p9_client_begin_disconnect);
struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
const char *uname, kuid_t n_uname,
const char *aname)
{
int err;
struct p9_req_t *req;
struct p9_fid *fid;
struct p9_qid qid;
p9_debug(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n",
afid ? afid->fid : -1, uname, aname);
fid = p9_fid_create(clnt);
if (!fid) {
err = -ENOMEM;
goto error;
}
fid->uid = n_uname;
req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid,
afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
p9_req_put(clnt, req);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n",
qid.type, qid.path, qid.version);
memmove(&fid->qid, &qid, sizeof(struct p9_qid));
p9_req_put(clnt, req);
return fid;
error:
if (fid)
p9_fid_destroy(fid);
return ERR_PTR(err);
}
EXPORT_SYMBOL(p9_client_attach);
struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
const unsigned char * const *wnames, int clone)
{
int err;
struct p9_client *clnt;
struct p9_fid *fid;
struct p9_qid *wqids;
struct p9_req_t *req;
u16 nwqids, count;
wqids = NULL;
clnt = oldfid->clnt;
if (clone) {
fid = p9_fid_create(clnt);
if (!fid) {
err = -ENOMEM;
goto error;
}
fid->uid = oldfid->uid;
} else {
fid = oldfid;
}
p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n",
oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);
req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,
nwname, wnames);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
p9_req_put(clnt, req);
goto clunk_fid;
}
p9_req_put(clnt, req);
p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids);
if (nwqids != nwname) {
err = -ENOENT;
goto clunk_fid;
}
for (count = 0; count < nwqids; count++)
p9_debug(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n",
count, wqids[count].type,
wqids[count].path,
wqids[count].version);
if (nwname)
memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid));
else
memmove(&fid->qid, &oldfid->qid, sizeof(struct p9_qid));
kfree(wqids);
return fid;
clunk_fid:
kfree(wqids);
p9_fid_put(fid);
fid = NULL;
error:
if (fid && fid != oldfid)
p9_fid_destroy(fid);
return ERR_PTR(err);
}
EXPORT_SYMBOL(p9_client_walk);
int p9_client_open(struct p9_fid *fid, int mode)
{
int err;
struct p9_client *clnt;
struct p9_req_t *req;
struct p9_qid qid;
int iounit;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n",
p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);
if (fid->mode != -1)
return -EINVAL;
if (p9_is_proto_dotl(clnt))
req = p9_client_rpc(clnt, P9_TLOPEN, "dd", fid->fid, mode & P9L_MODE_MASK);
else
req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode & P9L_MODE_MASK);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
p9_debug(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n",
p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type,
qid.path, qid.version, iounit);
memmove(&fid->qid, &qid, sizeof(struct p9_qid));
fid->mode = mode;
fid->iounit = iounit;
free_and_error:
p9_req_put(clnt, req);
error:
return err;
}
EXPORT_SYMBOL(p9_client_open);
int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags,
u32 mode, kgid_t gid, struct p9_qid *qid)
{
int err;
struct p9_client *clnt;
struct p9_req_t *req;
int iounit;
p9_debug(P9_DEBUG_9P,
">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n",
ofid->fid, name, flags, mode,
from_kgid(&init_user_ns, gid));
clnt = ofid->clnt;
if (ofid->mode != -1)
return -EINVAL;
req = p9_client_rpc(clnt, P9_TLCREATE, "dsddg", ofid->fid, name, flags,
mode & P9L_MODE_MASK, gid);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", qid, &iounit);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
p9_debug(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n",
qid->type, qid->path, qid->version, iounit);
memmove(&ofid->qid, qid, sizeof(struct p9_qid));
ofid->mode = flags;
ofid->iounit = iounit;
free_and_error:
p9_req_put(clnt, req);
error:
return err;
}
EXPORT_SYMBOL(p9_client_create_dotl);
int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
char *extension)
{
int err;
struct p9_client *clnt;
struct p9_req_t *req;
struct p9_qid qid;
int iounit;
p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n",
fid->fid, name, perm, mode);
clnt = fid->clnt;
if (fid->mode != -1)
return -EINVAL;
req = p9_client_rpc(clnt, P9_TCREATE, "dsdb?s", fid->fid, name, perm,
mode & P9L_MODE_MASK, extension);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
p9_debug(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n",
qid.type, qid.path, qid.version, iounit);
memmove(&fid->qid, &qid, sizeof(struct p9_qid));
fid->mode = mode;
fid->iounit = iounit;
free_and_error:
p9_req_put(clnt, req);
error:
return err;
}
EXPORT_SYMBOL(p9_client_fcreate);
int p9_client_symlink(struct p9_fid *dfid, const char *name,
const char *symtgt, kgid_t gid, struct p9_qid *qid)
{
int err;
struct p9_client *clnt;
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n",
dfid->fid, name, symtgt);
clnt = dfid->clnt;
req = p9_client_rpc(clnt, P9_TSYMLINK, "dssg", dfid->fid, name, symtgt,
gid);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
p9_debug(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n",
qid->type, qid->path, qid->version);
free_and_error:
p9_req_put(clnt, req);
error:
return err;
}
EXPORT_SYMBOL(p9_client_symlink);
int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newname)
{
struct p9_client *clnt;
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n",
dfid->fid, oldfid->fid, newname);
clnt = dfid->clnt;
req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid,
newname);
if (IS_ERR(req))
return PTR_ERR(req);
p9_debug(P9_DEBUG_9P, "<<< RLINK\n");
p9_req_put(clnt, req);
return 0;
}
EXPORT_SYMBOL(p9_client_link);
int p9_client_fsync(struct p9_fid *fid, int datasync)
{
int err = 0;
struct p9_client *clnt;
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
fid->fid, datasync);
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TFSYNC, "dd", fid->fid, datasync);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);
p9_req_put(clnt, req);
error:
return err;
}
EXPORT_SYMBOL(p9_client_fsync);
int p9_client_clunk(struct p9_fid *fid)
{
int err = 0;
struct p9_client *clnt;
struct p9_req_t *req;
int retries = 0;
again:
p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n",
fid->fid, retries);
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid);
p9_req_put(clnt, req);
error:
/* Fid is not valid even after a failed clunk
* If interrupted, retry once then give up and
* leak fid until umount.
*/
if (err == -ERESTARTSYS) {
if (retries++ == 0)
goto again;
} else {
p9_fid_destroy(fid);
}
return err;
}
EXPORT_SYMBOL(p9_client_clunk);
int p9_client_remove(struct p9_fid *fid)
{
int err = 0;
struct p9_client *clnt;
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid);
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TREMOVE, "d", fid->fid);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);
p9_req_put(clnt, req);
error:
if (err == -ERESTARTSYS)
p9_fid_put(fid);
else
p9_fid_destroy(fid);
return err;
}
EXPORT_SYMBOL(p9_client_remove);
int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags)
{
int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
p9_debug(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n",
dfid->fid, name, flags);
clnt = dfid->clnt;
req = p9_client_rpc(clnt, P9_TUNLINKAT, "dsd", dfid->fid, name, flags);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name);
p9_req_put(clnt, req);
error:
return err;
}
EXPORT_SYMBOL(p9_client_unlinkat);
int
p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
{
int total = 0;
*err = 0;
while (iov_iter_count(to)) {
int count;
count = p9_client_read_once(fid, offset, to, err);
if (!count || *err)
break;
offset += count;
total += count;
}
return total;
}
EXPORT_SYMBOL(p9_client_read);
int
p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
int *err)
{
struct p9_client *clnt = fid->clnt;
struct p9_req_t *req;
int count = iov_iter_count(to);
int rsize, received, non_zc = 0;
char *dataptr;
*err = 0;
p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %zu\n",
fid->fid, offset, iov_iter_count(to));
rsize = fid->iounit;
if (!rsize || rsize > clnt->msize - P9_IOHDRSZ)
rsize = clnt->msize - P9_IOHDRSZ;
if (count < rsize)
rsize = count;
/* Don't bother zerocopy for small IO (< 1024) */
if (clnt->trans_mod->zc_request && rsize > 1024) {
/* response header len is 11
* PDU Header(7) + IO Size (4)
*/
req = p9_client_zc_rpc(clnt, P9_TREAD, to, NULL, rsize,
0, 11, "dqd", fid->fid,
offset, rsize);
} else {
non_zc = 1;
req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
rsize);
}
if (IS_ERR(req)) {
*err = PTR_ERR(req);
if (!non_zc)
iov_iter_revert(to, count - iov_iter_count(to));
return 0;
}
*err = p9pdu_readf(&req->rc, clnt->proto_version,
"D", &received, &dataptr);
if (*err) {
if (!non_zc)
iov_iter_revert(to, count - iov_iter_count(to));
trace_9p_protocol_dump(clnt, &req->rc);
p9_req_put(clnt, req);
return 0;
}
if (rsize < received) {
pr_err("bogus RREAD count (%d > %d)\n", received, rsize);
received = rsize;
}
p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received);
if (non_zc) {
int n = copy_to_iter(dataptr, received, to);
if (n != received) {
*err = -EFAULT;
p9_req_put(clnt, req);
return n;
}
} else {
iov_iter_revert(to, count - received - iov_iter_count(to));
}
p9_req_put(clnt, req);
return received;
}
EXPORT_SYMBOL(p9_client_read_once);
int
p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
{
struct p9_client *clnt = fid->clnt;
struct p9_req_t *req;
int total = 0;
*err = 0;
while (iov_iter_count(from)) {
int count = iov_iter_count(from);
int rsize = fid->iounit;
int written;
if (!rsize || rsize > clnt->msize - P9_IOHDRSZ)
rsize = clnt->msize - P9_IOHDRSZ;
if (count < rsize)
rsize = count;
p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n",
fid->fid, offset, rsize, count);
/* Don't bother zerocopy for small IO (< 1024) */
if (clnt->trans_mod->zc_request && rsize > 1024) {
req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0,
rsize, P9_ZC_HDR_SZ, "dqd",
fid->fid, offset, rsize);
} else {
req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid,
offset, rsize, from);
}
if (IS_ERR(req)) {
iov_iter_revert(from, count - iov_iter_count(from));
*err = PTR_ERR(req);
break;
}
*err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written);
if (*err) {
iov_iter_revert(from, count - iov_iter_count(from));
trace_9p_protocol_dump(clnt, &req->rc);
p9_req_put(clnt, req);
break;
}
if (rsize < written) {
pr_err("bogus RWRITE count (%d > %d)\n", written, rsize);
written = rsize;
}
p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written);
p9_req_put(clnt, req);
iov_iter_revert(from, count - written - iov_iter_count(from));
total += written;
offset += written;
}
return total;
}
EXPORT_SYMBOL(p9_client_write);
void
p9_client_write_subreq(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *wreq = subreq->rreq;
struct p9_fid *fid = wreq->netfs_priv;
struct p9_client *clnt = fid->clnt;
struct p9_req_t *req;
unsigned long long start = subreq->start + subreq->transferred;
int written, len = subreq->len - subreq->transferred;
int err;
p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu len %d\n",
fid->fid, start, len);
/* Don't bother zerocopy for small IO (< 1024) */
if (clnt->trans_mod->zc_request && len > 1024) {
req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, &subreq->io_iter,
0, wreq->len, P9_ZC_HDR_SZ, "dqd",
fid->fid, start, len);
} else {
req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid,
start, len, &subreq->io_iter);
}
if (IS_ERR(req)) {
netfs_write_subrequest_terminated(subreq, PTR_ERR(req), false);
return;
}
err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
p9_req_put(clnt, req);
netfs_write_subrequest_terminated(subreq, err, false);
return;
}
if (written > len) {
pr_err("bogus RWRITE count (%d > %u)\n", written, len);
written = len;
}
p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len);
p9_req_put(clnt, req);
netfs_write_subrequest_terminated(subreq, written, false);
}
EXPORT_SYMBOL(p9_client_write_subreq);
struct p9_wstat *p9_client_stat(struct p9_fid *fid)
{
int err;
struct p9_client *clnt;
struct p9_wstat *ret;
struct p9_req_t *req;
u16 ignored;
p9_debug(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid);
ret = kmalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return ERR_PTR(-ENOMEM);
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TSTAT, "d", fid->fid);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
p9_req_put(clnt, req);
goto error;
}
p9_debug(P9_DEBUG_9P,
"<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
"<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
"<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
"<<< uid=%d gid=%d n_muid=%d\n",
ret->size, ret->type, ret->dev, ret->qid.type, ret->qid.path,
ret->qid.version, ret->mode,
ret->atime, ret->mtime, ret->length,
ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
from_kuid(&init_user_ns, ret->n_uid),
from_kgid(&init_user_ns, ret->n_gid),
from_kuid(&init_user_ns, ret->n_muid));
p9_req_put(clnt, req);
return ret;
error:
kfree(ret);
return ERR_PTR(err);
}
EXPORT_SYMBOL(p9_client_stat);
9p: getattr client implementation for 9P2000.L protocol. SYNOPSIS size[4] Tgetattr tag[2] fid[4] request_mask[8] size[4] Rgetattr tag[2] lstat[n] DESCRIPTION The getattr transaction inquires about the file identified by fid. request_mask is a bit mask that specifies which fields of the stat structure is the client interested in. The reply will contain a machine-independent directory entry, laid out as follows: st_result_mask[8] Bit mask that indicates which fields in the stat structure have been populated by the server qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file st_mode[4] Permission and flags st_uid[4] User id of owner st_gid[4] Group ID of owner st_nlink[8] Number of hard links st_rdev[8] Device ID (if special file) st_size[8] Size, in bytes st_blksize[8] Block size for file system IO st_blocks[8] Number of file system blocks allocated st_atime_sec[8] Time of last access, seconds st_atime_nsec[8] Time of last access, nanoseconds st_mtime_sec[8] Time of last modification, seconds st_mtime_nsec[8] Time of last modification, nanoseconds st_ctime_sec[8] Time of last status change, seconds st_ctime_nsec[8] Time of last status change, nanoseconds st_btime_sec[8] Time of creation (birth) of file, seconds st_btime_nsec[8] Time of creation (birth) of file, nanoseconds st_gen[8] Inode generation st_data_version[8] Data version number request_mask and result_mask bit masks contain the following bits #define P9_STATS_MODE 0x00000001ULL #define P9_STATS_NLINK 0x00000002ULL #define P9_STATS_UID 0x00000004ULL #define P9_STATS_GID 0x00000008ULL #define P9_STATS_RDEV 0x00000010ULL #define P9_STATS_ATIME 0x00000020ULL #define P9_STATS_MTIME 0x00000040ULL #define P9_STATS_CTIME 0x00000080ULL #define P9_STATS_INO 0x00000100ULL #define P9_STATS_SIZE 0x00000200ULL #define P9_STATS_BLOCKS 0x00000400ULL #define P9_STATS_BTIME 0x00000800ULL #define P9_STATS_GEN 0x00001000ULL #define P9_STATS_DATA_VERSION 0x00002000ULL #define P9_STATS_BASIC 0x000007ffULL #define P9_STATS_ALL 0x00003fffULL This patch implements the client side of getattr implementation for 9P2000.L. It introduces a new structure p9_stat_dotl for getting Linux stat information along with QID. The data layout is similar to stat structure in Linux user space with the following major differences: inode (st_ino) is not part of data. Instead qid is. device (st_dev) is not part of data because this doesn't make sense on the client. All time variables are 64 bit wide on the wire. The kernel seems to use 32 bit variables for these variables. However, some of the architectures have used 64 bit variables and glibc exposes 64 bit variables to user space on some architectures. Hence to be on the safer side we have made these 64 bit in the protocol. Refer to the comments in include/asm-generic/stat.h There are some additional fields: st_btime_sec, st_btime_nsec, st_gen, st_data_version apart from the bitmask, st_result_mask. The bit mask is filled by the server to indicate which stat fields have been populated by the server. Currently there is no clean way for the server to obtain these additional fields, so it sends back just the basic fields. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
2010-07-12 14:37:23 +00:00
struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
u64 request_mask)
9p: getattr client implementation for 9P2000.L protocol. SYNOPSIS size[4] Tgetattr tag[2] fid[4] request_mask[8] size[4] Rgetattr tag[2] lstat[n] DESCRIPTION The getattr transaction inquires about the file identified by fid. request_mask is a bit mask that specifies which fields of the stat structure is the client interested in. The reply will contain a machine-independent directory entry, laid out as follows: st_result_mask[8] Bit mask that indicates which fields in the stat structure have been populated by the server qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file st_mode[4] Permission and flags st_uid[4] User id of owner st_gid[4] Group ID of owner st_nlink[8] Number of hard links st_rdev[8] Device ID (if special file) st_size[8] Size, in bytes st_blksize[8] Block size for file system IO st_blocks[8] Number of file system blocks allocated st_atime_sec[8] Time of last access, seconds st_atime_nsec[8] Time of last access, nanoseconds st_mtime_sec[8] Time of last modification, seconds st_mtime_nsec[8] Time of last modification, nanoseconds st_ctime_sec[8] Time of last status change, seconds st_ctime_nsec[8] Time of last status change, nanoseconds st_btime_sec[8] Time of creation (birth) of file, seconds st_btime_nsec[8] Time of creation (birth) of file, nanoseconds st_gen[8] Inode generation st_data_version[8] Data version number request_mask and result_mask bit masks contain the following bits #define P9_STATS_MODE 0x00000001ULL #define P9_STATS_NLINK 0x00000002ULL #define P9_STATS_UID 0x00000004ULL #define P9_STATS_GID 0x00000008ULL #define P9_STATS_RDEV 0x00000010ULL #define P9_STATS_ATIME 0x00000020ULL #define P9_STATS_MTIME 0x00000040ULL #define P9_STATS_CTIME 0x00000080ULL #define P9_STATS_INO 0x00000100ULL #define P9_STATS_SIZE 0x00000200ULL #define P9_STATS_BLOCKS 0x00000400ULL #define P9_STATS_BTIME 0x00000800ULL #define P9_STATS_GEN 0x00001000ULL #define P9_STATS_DATA_VERSION 0x00002000ULL #define P9_STATS_BASIC 0x000007ffULL #define P9_STATS_ALL 0x00003fffULL This patch implements the client side of getattr implementation for 9P2000.L. It introduces a new structure p9_stat_dotl for getting Linux stat information along with QID. The data layout is similar to stat structure in Linux user space with the following major differences: inode (st_ino) is not part of data. Instead qid is. device (st_dev) is not part of data because this doesn't make sense on the client. All time variables are 64 bit wide on the wire. The kernel seems to use 32 bit variables for these variables. However, some of the architectures have used 64 bit variables and glibc exposes 64 bit variables to user space on some architectures. Hence to be on the safer side we have made these 64 bit in the protocol. Refer to the comments in include/asm-generic/stat.h There are some additional fields: st_btime_sec, st_btime_nsec, st_gen, st_data_version apart from the bitmask, st_result_mask. The bit mask is filled by the server to indicate which stat fields have been populated by the server. Currently there is no clean way for the server to obtain these additional fields, so it sends back just the basic fields. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
2010-07-12 14:37:23 +00:00
{
int err;
struct p9_client *clnt;
struct p9_stat_dotl *ret;
9p: getattr client implementation for 9P2000.L protocol. SYNOPSIS size[4] Tgetattr tag[2] fid[4] request_mask[8] size[4] Rgetattr tag[2] lstat[n] DESCRIPTION The getattr transaction inquires about the file identified by fid. request_mask is a bit mask that specifies which fields of the stat structure is the client interested in. The reply will contain a machine-independent directory entry, laid out as follows: st_result_mask[8] Bit mask that indicates which fields in the stat structure have been populated by the server qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file st_mode[4] Permission and flags st_uid[4] User id of owner st_gid[4] Group ID of owner st_nlink[8] Number of hard links st_rdev[8] Device ID (if special file) st_size[8] Size, in bytes st_blksize[8] Block size for file system IO st_blocks[8] Number of file system blocks allocated st_atime_sec[8] Time of last access, seconds st_atime_nsec[8] Time of last access, nanoseconds st_mtime_sec[8] Time of last modification, seconds st_mtime_nsec[8] Time of last modification, nanoseconds st_ctime_sec[8] Time of last status change, seconds st_ctime_nsec[8] Time of last status change, nanoseconds st_btime_sec[8] Time of creation (birth) of file, seconds st_btime_nsec[8] Time of creation (birth) of file, nanoseconds st_gen[8] Inode generation st_data_version[8] Data version number request_mask and result_mask bit masks contain the following bits #define P9_STATS_MODE 0x00000001ULL #define P9_STATS_NLINK 0x00000002ULL #define P9_STATS_UID 0x00000004ULL #define P9_STATS_GID 0x00000008ULL #define P9_STATS_RDEV 0x00000010ULL #define P9_STATS_ATIME 0x00000020ULL #define P9_STATS_MTIME 0x00000040ULL #define P9_STATS_CTIME 0x00000080ULL #define P9_STATS_INO 0x00000100ULL #define P9_STATS_SIZE 0x00000200ULL #define P9_STATS_BLOCKS 0x00000400ULL #define P9_STATS_BTIME 0x00000800ULL #define P9_STATS_GEN 0x00001000ULL #define P9_STATS_DATA_VERSION 0x00002000ULL #define P9_STATS_BASIC 0x000007ffULL #define P9_STATS_ALL 0x00003fffULL This patch implements the client side of getattr implementation for 9P2000.L. It introduces a new structure p9_stat_dotl for getting Linux stat information along with QID. The data layout is similar to stat structure in Linux user space with the following major differences: inode (st_ino) is not part of data. Instead qid is. device (st_dev) is not part of data because this doesn't make sense on the client. All time variables are 64 bit wide on the wire. The kernel seems to use 32 bit variables for these variables. However, some of the architectures have used 64 bit variables and glibc exposes 64 bit variables to user space on some architectures. Hence to be on the safer side we have made these 64 bit in the protocol. Refer to the comments in include/asm-generic/stat.h There are some additional fields: st_btime_sec, st_btime_nsec, st_gen, st_data_version apart from the bitmask, st_result_mask. The bit mask is filled by the server to indicate which stat fields have been populated by the server. Currently there is no clean way for the server to obtain these additional fields, so it sends back just the basic fields. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
2010-07-12 14:37:23 +00:00
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n",
fid->fid, request_mask);
9p: getattr client implementation for 9P2000.L protocol. SYNOPSIS size[4] Tgetattr tag[2] fid[4] request_mask[8] size[4] Rgetattr tag[2] lstat[n] DESCRIPTION The getattr transaction inquires about the file identified by fid. request_mask is a bit mask that specifies which fields of the stat structure is the client interested in. The reply will contain a machine-independent directory entry, laid out as follows: st_result_mask[8] Bit mask that indicates which fields in the stat structure have been populated by the server qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file st_mode[4] Permission and flags st_uid[4] User id of owner st_gid[4] Group ID of owner st_nlink[8] Number of hard links st_rdev[8] Device ID (if special file) st_size[8] Size, in bytes st_blksize[8] Block size for file system IO st_blocks[8] Number of file system blocks allocated st_atime_sec[8] Time of last access, seconds st_atime_nsec[8] Time of last access, nanoseconds st_mtime_sec[8] Time of last modification, seconds st_mtime_nsec[8] Time of last modification, nanoseconds st_ctime_sec[8] Time of last status change, seconds st_ctime_nsec[8] Time of last status change, nanoseconds st_btime_sec[8] Time of creation (birth) of file, seconds st_btime_nsec[8] Time of creation (birth) of file, nanoseconds st_gen[8] Inode generation st_data_version[8] Data version number request_mask and result_mask bit masks contain the following bits #define P9_STATS_MODE 0x00000001ULL #define P9_STATS_NLINK 0x00000002ULL #define P9_STATS_UID 0x00000004ULL #define P9_STATS_GID 0x00000008ULL #define P9_STATS_RDEV 0x00000010ULL #define P9_STATS_ATIME 0x00000020ULL #define P9_STATS_MTIME 0x00000040ULL #define P9_STATS_CTIME 0x00000080ULL #define P9_STATS_INO 0x00000100ULL #define P9_STATS_SIZE 0x00000200ULL #define P9_STATS_BLOCKS 0x00000400ULL #define P9_STATS_BTIME 0x00000800ULL #define P9_STATS_GEN 0x00001000ULL #define P9_STATS_DATA_VERSION 0x00002000ULL #define P9_STATS_BASIC 0x000007ffULL #define P9_STATS_ALL 0x00003fffULL This patch implements the client side of getattr implementation for 9P2000.L. It introduces a new structure p9_stat_dotl for getting Linux stat information along with QID. The data layout is similar to stat structure in Linux user space with the following major differences: inode (st_ino) is not part of data. Instead qid is. device (st_dev) is not part of data because this doesn't make sense on the client. All time variables are 64 bit wide on the wire. The kernel seems to use 32 bit variables for these variables. However, some of the architectures have used 64 bit variables and glibc exposes 64 bit variables to user space on some architectures. Hence to be on the safer side we have made these 64 bit in the protocol. Refer to the comments in include/asm-generic/stat.h There are some additional fields: st_btime_sec, st_btime_nsec, st_gen, st_data_version apart from the bitmask, st_result_mask. The bit mask is filled by the server to indicate which stat fields have been populated by the server. Currently there is no clean way for the server to obtain these additional fields, so it sends back just the basic fields. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
2010-07-12 14:37:23 +00:00
ret = kmalloc(sizeof(*ret), GFP_KERNEL);
9p: getattr client implementation for 9P2000.L protocol. SYNOPSIS size[4] Tgetattr tag[2] fid[4] request_mask[8] size[4] Rgetattr tag[2] lstat[n] DESCRIPTION The getattr transaction inquires about the file identified by fid. request_mask is a bit mask that specifies which fields of the stat structure is the client interested in. The reply will contain a machine-independent directory entry, laid out as follows: st_result_mask[8] Bit mask that indicates which fields in the stat structure have been populated by the server qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file st_mode[4] Permission and flags st_uid[4] User id of owner st_gid[4] Group ID of owner st_nlink[8] Number of hard links st_rdev[8] Device ID (if special file) st_size[8] Size, in bytes st_blksize[8] Block size for file system IO st_blocks[8] Number of file system blocks allocated st_atime_sec[8] Time of last access, seconds st_atime_nsec[8] Time of last access, nanoseconds st_mtime_sec[8] Time of last modification, seconds st_mtime_nsec[8] Time of last modification, nanoseconds st_ctime_sec[8] Time of last status change, seconds st_ctime_nsec[8] Time of last status change, nanoseconds st_btime_sec[8] Time of creation (birth) of file, seconds st_btime_nsec[8] Time of creation (birth) of file, nanoseconds st_gen[8] Inode generation st_data_version[8] Data version number request_mask and result_mask bit masks contain the following bits #define P9_STATS_MODE 0x00000001ULL #define P9_STATS_NLINK 0x00000002ULL #define P9_STATS_UID 0x00000004ULL #define P9_STATS_GID 0x00000008ULL #define P9_STATS_RDEV 0x00000010ULL #define P9_STATS_ATIME 0x00000020ULL #define P9_STATS_MTIME 0x00000040ULL #define P9_STATS_CTIME 0x00000080ULL #define P9_STATS_INO 0x00000100ULL #define P9_STATS_SIZE 0x00000200ULL #define P9_STATS_BLOCKS 0x00000400ULL #define P9_STATS_BTIME 0x00000800ULL #define P9_STATS_GEN 0x00001000ULL #define P9_STATS_DATA_VERSION 0x00002000ULL #define P9_STATS_BASIC 0x000007ffULL #define P9_STATS_ALL 0x00003fffULL This patch implements the client side of getattr implementation for 9P2000.L. It introduces a new structure p9_stat_dotl for getting Linux stat information along with QID. The data layout is similar to stat structure in Linux user space with the following major differences: inode (st_ino) is not part of data. Instead qid is. device (st_dev) is not part of data because this doesn't make sense on the client. All time variables are 64 bit wide on the wire. The kernel seems to use 32 bit variables for these variables. However, some of the architectures have used 64 bit variables and glibc exposes 64 bit variables to user space on some architectures. Hence to be on the safer side we have made these 64 bit in the protocol. Refer to the comments in include/asm-generic/stat.h There are some additional fields: st_btime_sec, st_btime_nsec, st_gen, st_data_version apart from the bitmask, st_result_mask. The bit mask is filled by the server to indicate which stat fields have been populated by the server. Currently there is no clean way for the server to obtain these additional fields, so it sends back just the basic fields. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
2010-07-12 14:37:23 +00:00
if (!ret)
return ERR_PTR(-ENOMEM);
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TGETATTR, "dq", fid->fid, request_mask);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret);
9p: getattr client implementation for 9P2000.L protocol. SYNOPSIS size[4] Tgetattr tag[2] fid[4] request_mask[8] size[4] Rgetattr tag[2] lstat[n] DESCRIPTION The getattr transaction inquires about the file identified by fid. request_mask is a bit mask that specifies which fields of the stat structure is the client interested in. The reply will contain a machine-independent directory entry, laid out as follows: st_result_mask[8] Bit mask that indicates which fields in the stat structure have been populated by the server qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file st_mode[4] Permission and flags st_uid[4] User id of owner st_gid[4] Group ID of owner st_nlink[8] Number of hard links st_rdev[8] Device ID (if special file) st_size[8] Size, in bytes st_blksize[8] Block size for file system IO st_blocks[8] Number of file system blocks allocated st_atime_sec[8] Time of last access, seconds st_atime_nsec[8] Time of last access, nanoseconds st_mtime_sec[8] Time of last modification, seconds st_mtime_nsec[8] Time of last modification, nanoseconds st_ctime_sec[8] Time of last status change, seconds st_ctime_nsec[8] Time of last status change, nanoseconds st_btime_sec[8] Time of creation (birth) of file, seconds st_btime_nsec[8] Time of creation (birth) of file, nanoseconds st_gen[8] Inode generation st_data_version[8] Data version number request_mask and result_mask bit masks contain the following bits #define P9_STATS_MODE 0x00000001ULL #define P9_STATS_NLINK 0x00000002ULL #define P9_STATS_UID 0x00000004ULL #define P9_STATS_GID 0x00000008ULL #define P9_STATS_RDEV 0x00000010ULL #define P9_STATS_ATIME 0x00000020ULL #define P9_STATS_MTIME 0x00000040ULL #define P9_STATS_CTIME 0x00000080ULL #define P9_STATS_INO 0x00000100ULL #define P9_STATS_SIZE 0x00000200ULL #define P9_STATS_BLOCKS 0x00000400ULL #define P9_STATS_BTIME 0x00000800ULL #define P9_STATS_GEN 0x00001000ULL #define P9_STATS_DATA_VERSION 0x00002000ULL #define P9_STATS_BASIC 0x000007ffULL #define P9_STATS_ALL 0x00003fffULL This patch implements the client side of getattr implementation for 9P2000.L. It introduces a new structure p9_stat_dotl for getting Linux stat information along with QID. The data layout is similar to stat structure in Linux user space with the following major differences: inode (st_ino) is not part of data. Instead qid is. device (st_dev) is not part of data because this doesn't make sense on the client. All time variables are 64 bit wide on the wire. The kernel seems to use 32 bit variables for these variables. However, some of the architectures have used 64 bit variables and glibc exposes 64 bit variables to user space on some architectures. Hence to be on the safer side we have made these 64 bit in the protocol. Refer to the comments in include/asm-generic/stat.h There are some additional fields: st_btime_sec, st_btime_nsec, st_gen, st_data_version apart from the bitmask, st_result_mask. The bit mask is filled by the server to indicate which stat fields have been populated by the server. Currently there is no clean way for the server to obtain these additional fields, so it sends back just the basic fields. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
2010-07-12 14:37:23 +00:00
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
p9_req_put(clnt, req);
9p: getattr client implementation for 9P2000.L protocol. SYNOPSIS size[4] Tgetattr tag[2] fid[4] request_mask[8] size[4] Rgetattr tag[2] lstat[n] DESCRIPTION The getattr transaction inquires about the file identified by fid. request_mask is a bit mask that specifies which fields of the stat structure is the client interested in. The reply will contain a machine-independent directory entry, laid out as follows: st_result_mask[8] Bit mask that indicates which fields in the stat structure have been populated by the server qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file st_mode[4] Permission and flags st_uid[4] User id of owner st_gid[4] Group ID of owner st_nlink[8] Number of hard links st_rdev[8] Device ID (if special file) st_size[8] Size, in bytes st_blksize[8] Block size for file system IO st_blocks[8] Number of file system blocks allocated st_atime_sec[8] Time of last access, seconds st_atime_nsec[8] Time of last access, nanoseconds st_mtime_sec[8] Time of last modification, seconds st_mtime_nsec[8] Time of last modification, nanoseconds st_ctime_sec[8] Time of last status change, seconds st_ctime_nsec[8] Time of last status change, nanoseconds st_btime_sec[8] Time of creation (birth) of file, seconds st_btime_nsec[8] Time of creation (birth) of file, nanoseconds st_gen[8] Inode generation st_data_version[8] Data version number request_mask and result_mask bit masks contain the following bits #define P9_STATS_MODE 0x00000001ULL #define P9_STATS_NLINK 0x00000002ULL #define P9_STATS_UID 0x00000004ULL #define P9_STATS_GID 0x00000008ULL #define P9_STATS_RDEV 0x00000010ULL #define P9_STATS_ATIME 0x00000020ULL #define P9_STATS_MTIME 0x00000040ULL #define P9_STATS_CTIME 0x00000080ULL #define P9_STATS_INO 0x00000100ULL #define P9_STATS_SIZE 0x00000200ULL #define P9_STATS_BLOCKS 0x00000400ULL #define P9_STATS_BTIME 0x00000800ULL #define P9_STATS_GEN 0x00001000ULL #define P9_STATS_DATA_VERSION 0x00002000ULL #define P9_STATS_BASIC 0x000007ffULL #define P9_STATS_ALL 0x00003fffULL This patch implements the client side of getattr implementation for 9P2000.L. It introduces a new structure p9_stat_dotl for getting Linux stat information along with QID. The data layout is similar to stat structure in Linux user space with the following major differences: inode (st_ino) is not part of data. Instead qid is. device (st_dev) is not part of data because this doesn't make sense on the client. All time variables are 64 bit wide on the wire. The kernel seems to use 32 bit variables for these variables. However, some of the architectures have used 64 bit variables and glibc exposes 64 bit variables to user space on some architectures. Hence to be on the safer side we have made these 64 bit in the protocol. Refer to the comments in include/asm-generic/stat.h There are some additional fields: st_btime_sec, st_btime_nsec, st_gen, st_data_version apart from the bitmask, st_result_mask. The bit mask is filled by the server to indicate which stat fields have been populated by the server. Currently there is no clean way for the server to obtain these additional fields, so it sends back just the basic fields. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
2010-07-12 14:37:23 +00:00
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RGETATTR st_result_mask=%lld\n"
"<<< qid=%x.%llx.%x\n"
"<<< st_mode=%8.8x st_nlink=%llu\n"
"<<< st_uid=%d st_gid=%d\n"
"<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n"
"<<< st_atime_sec=%lld st_atime_nsec=%lld\n"
"<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n"
"<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n"
"<<< st_btime_sec=%lld st_btime_nsec=%lld\n"
"<<< st_gen=%lld st_data_version=%lld\n",
ret->st_result_mask,
ret->qid.type, ret->qid.path, ret->qid.version,
ret->st_mode, ret->st_nlink,
from_kuid(&init_user_ns, ret->st_uid),
from_kgid(&init_user_ns, ret->st_gid),
ret->st_rdev, ret->st_size, ret->st_blksize, ret->st_blocks,
ret->st_atime_sec, ret->st_atime_nsec,
ret->st_mtime_sec, ret->st_mtime_nsec,
ret->st_ctime_sec, ret->st_ctime_nsec,
ret->st_btime_sec, ret->st_btime_nsec,
ret->st_gen, ret->st_data_version);
9p: getattr client implementation for 9P2000.L protocol. SYNOPSIS size[4] Tgetattr tag[2] fid[4] request_mask[8] size[4] Rgetattr tag[2] lstat[n] DESCRIPTION The getattr transaction inquires about the file identified by fid. request_mask is a bit mask that specifies which fields of the stat structure is the client interested in. The reply will contain a machine-independent directory entry, laid out as follows: st_result_mask[8] Bit mask that indicates which fields in the stat structure have been populated by the server qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file st_mode[4] Permission and flags st_uid[4] User id of owner st_gid[4] Group ID of owner st_nlink[8] Number of hard links st_rdev[8] Device ID (if special file) st_size[8] Size, in bytes st_blksize[8] Block size for file system IO st_blocks[8] Number of file system blocks allocated st_atime_sec[8] Time of last access, seconds st_atime_nsec[8] Time of last access, nanoseconds st_mtime_sec[8] Time of last modification, seconds st_mtime_nsec[8] Time of last modification, nanoseconds st_ctime_sec[8] Time of last status change, seconds st_ctime_nsec[8] Time of last status change, nanoseconds st_btime_sec[8] Time of creation (birth) of file, seconds st_btime_nsec[8] Time of creation (birth) of file, nanoseconds st_gen[8] Inode generation st_data_version[8] Data version number request_mask and result_mask bit masks contain the following bits #define P9_STATS_MODE 0x00000001ULL #define P9_STATS_NLINK 0x00000002ULL #define P9_STATS_UID 0x00000004ULL #define P9_STATS_GID 0x00000008ULL #define P9_STATS_RDEV 0x00000010ULL #define P9_STATS_ATIME 0x00000020ULL #define P9_STATS_MTIME 0x00000040ULL #define P9_STATS_CTIME 0x00000080ULL #define P9_STATS_INO 0x00000100ULL #define P9_STATS_SIZE 0x00000200ULL #define P9_STATS_BLOCKS 0x00000400ULL #define P9_STATS_BTIME 0x00000800ULL #define P9_STATS_GEN 0x00001000ULL #define P9_STATS_DATA_VERSION 0x00002000ULL #define P9_STATS_BASIC 0x000007ffULL #define P9_STATS_ALL 0x00003fffULL This patch implements the client side of getattr implementation for 9P2000.L. It introduces a new structure p9_stat_dotl for getting Linux stat information along with QID. The data layout is similar to stat structure in Linux user space with the following major differences: inode (st_ino) is not part of data. Instead qid is. device (st_dev) is not part of data because this doesn't make sense on the client. All time variables are 64 bit wide on the wire. The kernel seems to use 32 bit variables for these variables. However, some of the architectures have used 64 bit variables and glibc exposes 64 bit variables to user space on some architectures. Hence to be on the safer side we have made these 64 bit in the protocol. Refer to the comments in include/asm-generic/stat.h There are some additional fields: st_btime_sec, st_btime_nsec, st_gen, st_data_version apart from the bitmask, st_result_mask. The bit mask is filled by the server to indicate which stat fields have been populated by the server. Currently there is no clean way for the server to obtain these additional fields, so it sends back just the basic fields. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
2010-07-12 14:37:23 +00:00
p9_req_put(clnt, req);
9p: getattr client implementation for 9P2000.L protocol. SYNOPSIS size[4] Tgetattr tag[2] fid[4] request_mask[8] size[4] Rgetattr tag[2] lstat[n] DESCRIPTION The getattr transaction inquires about the file identified by fid. request_mask is a bit mask that specifies which fields of the stat structure is the client interested in. The reply will contain a machine-independent directory entry, laid out as follows: st_result_mask[8] Bit mask that indicates which fields in the stat structure have been populated by the server qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file st_mode[4] Permission and flags st_uid[4] User id of owner st_gid[4] Group ID of owner st_nlink[8] Number of hard links st_rdev[8] Device ID (if special file) st_size[8] Size, in bytes st_blksize[8] Block size for file system IO st_blocks[8] Number of file system blocks allocated st_atime_sec[8] Time of last access, seconds st_atime_nsec[8] Time of last access, nanoseconds st_mtime_sec[8] Time of last modification, seconds st_mtime_nsec[8] Time of last modification, nanoseconds st_ctime_sec[8] Time of last status change, seconds st_ctime_nsec[8] Time of last status change, nanoseconds st_btime_sec[8] Time of creation (birth) of file, seconds st_btime_nsec[8] Time of creation (birth) of file, nanoseconds st_gen[8] Inode generation st_data_version[8] Data version number request_mask and result_mask bit masks contain the following bits #define P9_STATS_MODE 0x00000001ULL #define P9_STATS_NLINK 0x00000002ULL #define P9_STATS_UID 0x00000004ULL #define P9_STATS_GID 0x00000008ULL #define P9_STATS_RDEV 0x00000010ULL #define P9_STATS_ATIME 0x00000020ULL #define P9_STATS_MTIME 0x00000040ULL #define P9_STATS_CTIME 0x00000080ULL #define P9_STATS_INO 0x00000100ULL #define P9_STATS_SIZE 0x00000200ULL #define P9_STATS_BLOCKS 0x00000400ULL #define P9_STATS_BTIME 0x00000800ULL #define P9_STATS_GEN 0x00001000ULL #define P9_STATS_DATA_VERSION 0x00002000ULL #define P9_STATS_BASIC 0x000007ffULL #define P9_STATS_ALL 0x00003fffULL This patch implements the client side of getattr implementation for 9P2000.L. It introduces a new structure p9_stat_dotl for getting Linux stat information along with QID. The data layout is similar to stat structure in Linux user space with the following major differences: inode (st_ino) is not part of data. Instead qid is. device (st_dev) is not part of data because this doesn't make sense on the client. All time variables are 64 bit wide on the wire. The kernel seems to use 32 bit variables for these variables. However, some of the architectures have used 64 bit variables and glibc exposes 64 bit variables to user space on some architectures. Hence to be on the safer side we have made these 64 bit in the protocol. Refer to the comments in include/asm-generic/stat.h There are some additional fields: st_btime_sec, st_btime_nsec, st_gen, st_data_version apart from the bitmask, st_result_mask. The bit mask is filled by the server to indicate which stat fields have been populated by the server. Currently there is no clean way for the server to obtain these additional fields, so it sends back just the basic fields. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
2010-07-12 14:37:23 +00:00
return ret;
error:
kfree(ret);
return ERR_PTR(err);
}
EXPORT_SYMBOL(p9_client_getattr_dotl);
static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
{
int ret;
/* NOTE: size shouldn't include its own length */
/* size[2] type[2] dev[4] qid[13] */
/* mode[4] atime[4] mtime[4] length[8]*/
/* name[s] uid[s] gid[s] muid[s] */
ret = 2 + 4 + 13 + 4 + 4 + 4 + 8 + 2 + 2 + 2 + 2;
if (wst->name)
ret += strlen(wst->name);
if (wst->uid)
ret += strlen(wst->uid);
if (wst->gid)
ret += strlen(wst->gid);
if (wst->muid)
ret += strlen(wst->muid);
if (proto_version == p9_proto_2000u ||
proto_version == p9_proto_2000L) {
/* extension[s] n_uid[4] n_gid[4] n_muid[4] */
ret += 2 + 4 + 4 + 4;
if (wst->extension)
ret += strlen(wst->extension);
}
return ret;
}
int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
{
int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
clnt = fid->clnt;
wst->size = p9_client_statsize(wst, clnt->proto_version);
p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n",
fid->fid);
p9_debug(P9_DEBUG_9P,
" sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
" mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
" name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
" uid=%d gid=%d n_muid=%d\n",
wst->size, wst->type, wst->dev, wst->qid.type,
wst->qid.path, wst->qid.version,
wst->mode, wst->atime, wst->mtime, wst->length,
wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
from_kuid(&init_user_ns, wst->n_uid),
from_kgid(&init_user_ns, wst->n_gid),
from_kuid(&init_user_ns, wst->n_muid));
req = p9_client_rpc(clnt, P9_TWSTAT, "dwS",
fid->fid, wst->size + 2, wst);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid);
p9_req_put(clnt, req);
error:
return err;
}
EXPORT_SYMBOL(p9_client_wstat);
9p: Implement client side of setattr for 9P2000.L protocol. SYNOPSIS size[4] Tsetattr tag[2] attr[n] size[4] Rsetattr tag[2] DESCRIPTION The setattr command changes some of the file status information. attr resembles the iattr structure used in Linux kernel. It specifies which status parameter is to be changed and to what value. It is laid out as follows: valid[4] specifies which status information is to be changed. Possible values are: ATTR_MODE (1 << 0) ATTR_UID (1 << 1) ATTR_GID (1 << 2) ATTR_SIZE (1 << 3) ATTR_ATIME (1 << 4) ATTR_MTIME (1 << 5) ATTR_ATIME_SET (1 << 7) ATTR_MTIME_SET (1 << 8) The last two bits represent whether the time information is being sent by the client's user space. In the absense of these bits the server always uses server's time. mode[4] File permission bits uid[4] Owner id of file gid[4] Group id of the file size[8] File size atime_sec[8] Time of last file access, seconds atime_nsec[8] Time of last file access, nanoseconds mtime_sec[8] Time of last file modification, seconds mtime_nsec[8] Time of last file modification, nanoseconds Explanation of the patches: -------------------------- *) The kernel just copies relevent contents of iattr structure to p9_iattr_dotl structure and passes it down to the client. The only check it has is calling inode_change_ok() *) The p9_iattr_dotl structure does not have ctime and ia_file parameters because I don't think these are needed in our case. The client user space can request updating just ctime by calling chown(fd, -1, -1). This is handled on server side without a need for putting ctime on the wire. *) The server currently supports changing mode, time, ownership and size of the file. *) 9P RFC says "Either all the changes in wstat request happen, or none of them does: if the request succeeds, all changes were made; if it fails, none were." I have not done anything to implement this specifically because I don't see a reason. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-18 06:20:10 +00:00
int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
{
int err = 0;
9p: Implement client side of setattr for 9P2000.L protocol. SYNOPSIS size[4] Tsetattr tag[2] attr[n] size[4] Rsetattr tag[2] DESCRIPTION The setattr command changes some of the file status information. attr resembles the iattr structure used in Linux kernel. It specifies which status parameter is to be changed and to what value. It is laid out as follows: valid[4] specifies which status information is to be changed. Possible values are: ATTR_MODE (1 << 0) ATTR_UID (1 << 1) ATTR_GID (1 << 2) ATTR_SIZE (1 << 3) ATTR_ATIME (1 << 4) ATTR_MTIME (1 << 5) ATTR_ATIME_SET (1 << 7) ATTR_MTIME_SET (1 << 8) The last two bits represent whether the time information is being sent by the client's user space. In the absense of these bits the server always uses server's time. mode[4] File permission bits uid[4] Owner id of file gid[4] Group id of the file size[8] File size atime_sec[8] Time of last file access, seconds atime_nsec[8] Time of last file access, nanoseconds mtime_sec[8] Time of last file modification, seconds mtime_nsec[8] Time of last file modification, nanoseconds Explanation of the patches: -------------------------- *) The kernel just copies relevent contents of iattr structure to p9_iattr_dotl structure and passes it down to the client. The only check it has is calling inode_change_ok() *) The p9_iattr_dotl structure does not have ctime and ia_file parameters because I don't think these are needed in our case. The client user space can request updating just ctime by calling chown(fd, -1, -1). This is handled on server side without a need for putting ctime on the wire. *) The server currently supports changing mode, time, ownership and size of the file. *) 9P RFC says "Either all the changes in wstat request happen, or none of them does: if the request succeeds, all changes were made; if it fails, none were." I have not done anything to implement this specifically because I don't see a reason. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-18 06:20:10 +00:00
struct p9_req_t *req;
struct p9_client *clnt;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid);
p9_debug(P9_DEBUG_9P, " valid=%x mode=%x uid=%d gid=%d size=%lld\n",
p9attr->valid, p9attr->mode,
from_kuid(&init_user_ns, p9attr->uid),
from_kgid(&init_user_ns, p9attr->gid),
p9attr->size);
p9_debug(P9_DEBUG_9P, " atime_sec=%lld atime_nsec=%lld\n",
p9attr->atime_sec, p9attr->atime_nsec);
p9_debug(P9_DEBUG_9P, " mtime_sec=%lld mtime_nsec=%lld\n",
p9attr->mtime_sec, p9attr->mtime_nsec);
9p: Implement client side of setattr for 9P2000.L protocol. SYNOPSIS size[4] Tsetattr tag[2] attr[n] size[4] Rsetattr tag[2] DESCRIPTION The setattr command changes some of the file status information. attr resembles the iattr structure used in Linux kernel. It specifies which status parameter is to be changed and to what value. It is laid out as follows: valid[4] specifies which status information is to be changed. Possible values are: ATTR_MODE (1 << 0) ATTR_UID (1 << 1) ATTR_GID (1 << 2) ATTR_SIZE (1 << 3) ATTR_ATIME (1 << 4) ATTR_MTIME (1 << 5) ATTR_ATIME_SET (1 << 7) ATTR_MTIME_SET (1 << 8) The last two bits represent whether the time information is being sent by the client's user space. In the absense of these bits the server always uses server's time. mode[4] File permission bits uid[4] Owner id of file gid[4] Group id of the file size[8] File size atime_sec[8] Time of last file access, seconds atime_nsec[8] Time of last file access, nanoseconds mtime_sec[8] Time of last file modification, seconds mtime_nsec[8] Time of last file modification, nanoseconds Explanation of the patches: -------------------------- *) The kernel just copies relevent contents of iattr structure to p9_iattr_dotl structure and passes it down to the client. The only check it has is calling inode_change_ok() *) The p9_iattr_dotl structure does not have ctime and ia_file parameters because I don't think these are needed in our case. The client user space can request updating just ctime by calling chown(fd, -1, -1). This is handled on server side without a need for putting ctime on the wire. *) The server currently supports changing mode, time, ownership and size of the file. *) 9P RFC says "Either all the changes in wstat request happen, or none of them does: if the request succeeds, all changes were made; if it fails, none were." I have not done anything to implement this specifically because I don't see a reason. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-18 06:20:10 +00:00
req = p9_client_rpc(clnt, P9_TSETATTR, "dI", fid->fid, p9attr);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid);
p9_req_put(clnt, req);
9p: Implement client side of setattr for 9P2000.L protocol. SYNOPSIS size[4] Tsetattr tag[2] attr[n] size[4] Rsetattr tag[2] DESCRIPTION The setattr command changes some of the file status information. attr resembles the iattr structure used in Linux kernel. It specifies which status parameter is to be changed and to what value. It is laid out as follows: valid[4] specifies which status information is to be changed. Possible values are: ATTR_MODE (1 << 0) ATTR_UID (1 << 1) ATTR_GID (1 << 2) ATTR_SIZE (1 << 3) ATTR_ATIME (1 << 4) ATTR_MTIME (1 << 5) ATTR_ATIME_SET (1 << 7) ATTR_MTIME_SET (1 << 8) The last two bits represent whether the time information is being sent by the client's user space. In the absense of these bits the server always uses server's time. mode[4] File permission bits uid[4] Owner id of file gid[4] Group id of the file size[8] File size atime_sec[8] Time of last file access, seconds atime_nsec[8] Time of last file access, nanoseconds mtime_sec[8] Time of last file modification, seconds mtime_nsec[8] Time of last file modification, nanoseconds Explanation of the patches: -------------------------- *) The kernel just copies relevent contents of iattr structure to p9_iattr_dotl structure and passes it down to the client. The only check it has is calling inode_change_ok() *) The p9_iattr_dotl structure does not have ctime and ia_file parameters because I don't think these are needed in our case. The client user space can request updating just ctime by calling chown(fd, -1, -1). This is handled on server side without a need for putting ctime on the wire. *) The server currently supports changing mode, time, ownership and size of the file. *) 9P RFC says "Either all the changes in wstat request happen, or none of them does: if the request succeeds, all changes were made; if it fails, none were." I have not done anything to implement this specifically because I don't see a reason. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-18 06:20:10 +00:00
error:
return err;
}
EXPORT_SYMBOL(p9_client_setattr);
int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
{
int err;
struct p9_req_t *req;
struct p9_client *clnt;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid);
req = p9_client_rpc(clnt, P9_TSTATFS, "d", fid->fid);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
err = p9pdu_readf(&req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
&sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
&sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
p9_req_put(clnt, req);
goto error;
}
p9_debug(P9_DEBUG_9P,
"<<< RSTATFS fid %d type 0x%x bsize %u blocks %llu bfree %llu bavail %llu files %llu ffree %llu fsid %llu namelen %u\n",
fid->fid, sb->type, sb->bsize, sb->blocks, sb->bfree,
sb->bavail, sb->files, sb->ffree, sb->fsid, sb->namelen);
p9_req_put(clnt, req);
error:
return err;
}
EXPORT_SYMBOL(p9_client_statfs);
int p9_client_rename(struct p9_fid *fid,
struct p9_fid *newdirfid, const char *name)
{
int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
fid->fid, newdirfid->fid, name);
req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid,
newdirfid->fid, name);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);
p9_req_put(clnt, req);
error:
return err;
}
EXPORT_SYMBOL(p9_client_rename);
int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
struct p9_fid *newdirfid, const char *new_name)
{
int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
clnt = olddirfid->clnt;
p9_debug(P9_DEBUG_9P,
">>> TRENAMEAT olddirfid %d old name %s newdirfid %d new name %s\n",
olddirfid->fid, old_name, newdirfid->fid, new_name);
req = p9_client_rpc(clnt, P9_TRENAMEAT, "dsds", olddirfid->fid,
old_name, newdirfid->fid, new_name);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n",
newdirfid->fid, new_name);
p9_req_put(clnt, req);
error:
return err;
}
EXPORT_SYMBOL(p9_client_renameat);
/* An xattrwalk without @attr_name gives the fid for the lisxattr namespace
*/
struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
const char *attr_name, u64 *attr_size)
{
int err;
struct p9_req_t *req;
struct p9_client *clnt;
struct p9_fid *attr_fid;
clnt = file_fid->clnt;
attr_fid = p9_fid_create(clnt);
if (!attr_fid) {
err = -ENOMEM;
goto error;
}
p9_debug(P9_DEBUG_9P,
">>> TXATTRWALK file_fid %d, attr_fid %d name '%s'\n",
file_fid->fid, attr_fid->fid, attr_name);
req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds",
file_fid->fid, attr_fid->fid, attr_name);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
p9_req_put(clnt, req);
goto clunk_fid;
}
p9_req_put(clnt, req);
p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n",
attr_fid->fid, *attr_size);
return attr_fid;
clunk_fid:
p9_fid_put(attr_fid);
attr_fid = NULL;
error:
if (attr_fid && attr_fid != file_fid)
p9_fid_destroy(attr_fid);
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(p9_client_xattrwalk);
int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
u64 attr_size, int flags)
{
int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
p9_debug(P9_DEBUG_9P,
">>> TXATTRCREATE fid %d name %s size %llu flag %d\n",
fid->fid, name, attr_size, flags);
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd",
fid->fid, name, attr_size, flags);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid);
p9_req_put(clnt, req);
error:
return err;
}
EXPORT_SYMBOL_GPL(p9_client_xattrcreate);
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 13:41:26 +00:00
int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
{
int err, rsize, non_zc = 0;
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 13:41:26 +00:00
struct p9_client *clnt;
struct p9_req_t *req;
char *dataptr;
struct kvec kv = {.iov_base = data, .iov_len = count};
struct iov_iter to;
iov_iter_kvec(&to, ITER_DEST, &kv, 1, count);
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 13:41:26 +00:00
p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
fid->fid, offset, count);
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 13:41:26 +00:00
clnt = fid->clnt;
rsize = fid->iounit;
if (!rsize || rsize > clnt->msize - P9_READDIRHDRSZ)
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 13:41:26 +00:00
rsize = clnt->msize - P9_READDIRHDRSZ;
if (count < rsize)
rsize = count;
/* Don't bother zerocopy for small IO (< 1024) */
if (clnt->trans_mod->zc_request && rsize > 1024) {
/* response header len is 11
* PDU Header(7) + IO Size (4)
*/
req = p9_client_zc_rpc(clnt, P9_TREADDIR, &to, NULL, rsize, 0,
11, "dqd", fid->fid, offset, rsize);
} else {
non_zc = 1;
req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid,
offset, rsize);
}
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 13:41:26 +00:00
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
}
err = p9pdu_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr);
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 13:41:26 +00:00
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 13:41:26 +00:00
goto free_and_error;
}
if (rsize < count) {
pr_err("bogus RREADDIR count (%d > %d)\n", count, rsize);
count = rsize;
}
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 13:41:26 +00:00
p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 13:41:26 +00:00
if (non_zc)
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 13:41:26 +00:00
memmove(data, dataptr, count);
p9_req_put(clnt, req);
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 13:41:26 +00:00
return count;
free_and_error:
p9_req_put(clnt, req);
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 13:41:26 +00:00
error:
return err;
}
EXPORT_SYMBOL(p9_client_readdir);
int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
dev_t rdev, kgid_t gid, struct p9_qid *qid)
{
int err;
struct p9_client *clnt;
struct p9_req_t *req;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P,
">>> TMKNOD fid %d name %s mode %d major %d minor %d\n",
fid->fid, name, mode, MAJOR(rdev), MINOR(rdev));
req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddg", fid->fid, name, mode,
MAJOR(rdev), MINOR(rdev), gid);
if (IS_ERR(req))
return PTR_ERR(req);
err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n",
qid->type, qid->path, qid->version);
error:
p9_req_put(clnt, req);
return err;
}
EXPORT_SYMBOL(p9_client_mknod_dotl);
int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
kgid_t gid, struct p9_qid *qid)
{
int err;
struct p9_client *clnt;
struct p9_req_t *req;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
fid->fid, name, mode, from_kgid(&init_user_ns, gid));
req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg",
fid->fid, name, mode, gid);
if (IS_ERR(req))
return PTR_ERR(req);
err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
qid->path, qid->version);
error:
p9_req_put(clnt, req);
return err;
}
EXPORT_SYMBOL(p9_client_mkdir_dotl);
9p: Implement TLOCK Synopsis size[4] TLock tag[2] fid[4] flock[n] size[4] RLock tag[2] status[1] Description Tlock is used to acquire/release byte range posix locks on a file identified by given fid. The reply contains status of the lock request flock structure: type[1] - Type of lock: F_RDLCK, F_WRLCK, F_UNLCK flags[4] - Flags could be either of P9_LOCK_FLAGS_BLOCK - Blocked lock request, if there is a conflicting lock exists, wait for that lock to be released. P9_LOCK_FLAGS_RECLAIM - Reclaim lock request, used when client is trying to reclaim a lock after a server restrart (due to crash) start[8] - Starting offset for lock length[8] - Number of bytes to lock If length is 0, lock all bytes starting at the location 'start' through to the end of file pid[4] - PID of the process that wants to take lock client_id[4] - Unique client id status[1] - Status of the lock request, can be P9_LOCK_SUCCESS(0), P9_LOCK_BLOCKED(1), P9_LOCK_ERROR(2) or P9_LOCK_GRACE(3) P9_LOCK_SUCCESS - Request was successful P9_LOCK_BLOCKED - A conflicting lock is held by another process P9_LOCK_ERROR - Error while processing the lock request P9_LOCK_GRACE - Server is in grace period, it can't accept new lock requests in this period (except locks with P9_LOCK_FLAGS_RECLAIM flag set) Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-09-27 06:04:24 +00:00
int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
{
int err;
struct p9_client *clnt;
struct p9_req_t *req;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P,
">>> TLOCK fid %d type %i flags %d start %lld length %lld proc_id %d client_id %s\n",
fid->fid, flock->type, flock->flags, flock->start,
flock->length, flock->proc_id, flock->client_id);
9p: Implement TLOCK Synopsis size[4] TLock tag[2] fid[4] flock[n] size[4] RLock tag[2] status[1] Description Tlock is used to acquire/release byte range posix locks on a file identified by given fid. The reply contains status of the lock request flock structure: type[1] - Type of lock: F_RDLCK, F_WRLCK, F_UNLCK flags[4] - Flags could be either of P9_LOCK_FLAGS_BLOCK - Blocked lock request, if there is a conflicting lock exists, wait for that lock to be released. P9_LOCK_FLAGS_RECLAIM - Reclaim lock request, used when client is trying to reclaim a lock after a server restrart (due to crash) start[8] - Starting offset for lock length[8] - Number of bytes to lock If length is 0, lock all bytes starting at the location 'start' through to the end of file pid[4] - PID of the process that wants to take lock client_id[4] - Unique client id status[1] - Status of the lock request, can be P9_LOCK_SUCCESS(0), P9_LOCK_BLOCKED(1), P9_LOCK_ERROR(2) or P9_LOCK_GRACE(3) P9_LOCK_SUCCESS - Request was successful P9_LOCK_BLOCKED - A conflicting lock is held by another process P9_LOCK_ERROR - Error while processing the lock request P9_LOCK_GRACE - Server is in grace period, it can't accept new lock requests in this period (except locks with P9_LOCK_FLAGS_RECLAIM flag set) Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-09-27 06:04:24 +00:00
req = p9_client_rpc(clnt, P9_TLOCK, "dbdqqds", fid->fid, flock->type,
flock->flags, flock->start, flock->length,
flock->proc_id, flock->client_id);
9p: Implement TLOCK Synopsis size[4] TLock tag[2] fid[4] flock[n] size[4] RLock tag[2] status[1] Description Tlock is used to acquire/release byte range posix locks on a file identified by given fid. The reply contains status of the lock request flock structure: type[1] - Type of lock: F_RDLCK, F_WRLCK, F_UNLCK flags[4] - Flags could be either of P9_LOCK_FLAGS_BLOCK - Blocked lock request, if there is a conflicting lock exists, wait for that lock to be released. P9_LOCK_FLAGS_RECLAIM - Reclaim lock request, used when client is trying to reclaim a lock after a server restrart (due to crash) start[8] - Starting offset for lock length[8] - Number of bytes to lock If length is 0, lock all bytes starting at the location 'start' through to the end of file pid[4] - PID of the process that wants to take lock client_id[4] - Unique client id status[1] - Status of the lock request, can be P9_LOCK_SUCCESS(0), P9_LOCK_BLOCKED(1), P9_LOCK_ERROR(2) or P9_LOCK_GRACE(3) P9_LOCK_SUCCESS - Request was successful P9_LOCK_BLOCKED - A conflicting lock is held by another process P9_LOCK_ERROR - Error while processing the lock request P9_LOCK_GRACE - Server is in grace period, it can't accept new lock requests in this period (except locks with P9_LOCK_FLAGS_RECLAIM flag set) Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-09-27 06:04:24 +00:00
if (IS_ERR(req))
return PTR_ERR(req);
err = p9pdu_readf(&req->rc, clnt->proto_version, "b", status);
9p: Implement TLOCK Synopsis size[4] TLock tag[2] fid[4] flock[n] size[4] RLock tag[2] status[1] Description Tlock is used to acquire/release byte range posix locks on a file identified by given fid. The reply contains status of the lock request flock structure: type[1] - Type of lock: F_RDLCK, F_WRLCK, F_UNLCK flags[4] - Flags could be either of P9_LOCK_FLAGS_BLOCK - Blocked lock request, if there is a conflicting lock exists, wait for that lock to be released. P9_LOCK_FLAGS_RECLAIM - Reclaim lock request, used when client is trying to reclaim a lock after a server restrart (due to crash) start[8] - Starting offset for lock length[8] - Number of bytes to lock If length is 0, lock all bytes starting at the location 'start' through to the end of file pid[4] - PID of the process that wants to take lock client_id[4] - Unique client id status[1] - Status of the lock request, can be P9_LOCK_SUCCESS(0), P9_LOCK_BLOCKED(1), P9_LOCK_ERROR(2) or P9_LOCK_GRACE(3) P9_LOCK_SUCCESS - Request was successful P9_LOCK_BLOCKED - A conflicting lock is held by another process P9_LOCK_ERROR - Error while processing the lock request P9_LOCK_GRACE - Server is in grace period, it can't accept new lock requests in this period (except locks with P9_LOCK_FLAGS_RECLAIM flag set) Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-09-27 06:04:24 +00:00
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
9p: Implement TLOCK Synopsis size[4] TLock tag[2] fid[4] flock[n] size[4] RLock tag[2] status[1] Description Tlock is used to acquire/release byte range posix locks on a file identified by given fid. The reply contains status of the lock request flock structure: type[1] - Type of lock: F_RDLCK, F_WRLCK, F_UNLCK flags[4] - Flags could be either of P9_LOCK_FLAGS_BLOCK - Blocked lock request, if there is a conflicting lock exists, wait for that lock to be released. P9_LOCK_FLAGS_RECLAIM - Reclaim lock request, used when client is trying to reclaim a lock after a server restrart (due to crash) start[8] - Starting offset for lock length[8] - Number of bytes to lock If length is 0, lock all bytes starting at the location 'start' through to the end of file pid[4] - PID of the process that wants to take lock client_id[4] - Unique client id status[1] - Status of the lock request, can be P9_LOCK_SUCCESS(0), P9_LOCK_BLOCKED(1), P9_LOCK_ERROR(2) or P9_LOCK_GRACE(3) P9_LOCK_SUCCESS - Request was successful P9_LOCK_BLOCKED - A conflicting lock is held by another process P9_LOCK_ERROR - Error while processing the lock request P9_LOCK_GRACE - Server is in grace period, it can't accept new lock requests in this period (except locks with P9_LOCK_FLAGS_RECLAIM flag set) Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-09-27 06:04:24 +00:00
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
9p: Implement TLOCK Synopsis size[4] TLock tag[2] fid[4] flock[n] size[4] RLock tag[2] status[1] Description Tlock is used to acquire/release byte range posix locks on a file identified by given fid. The reply contains status of the lock request flock structure: type[1] - Type of lock: F_RDLCK, F_WRLCK, F_UNLCK flags[4] - Flags could be either of P9_LOCK_FLAGS_BLOCK - Blocked lock request, if there is a conflicting lock exists, wait for that lock to be released. P9_LOCK_FLAGS_RECLAIM - Reclaim lock request, used when client is trying to reclaim a lock after a server restrart (due to crash) start[8] - Starting offset for lock length[8] - Number of bytes to lock If length is 0, lock all bytes starting at the location 'start' through to the end of file pid[4] - PID of the process that wants to take lock client_id[4] - Unique client id status[1] - Status of the lock request, can be P9_LOCK_SUCCESS(0), P9_LOCK_BLOCKED(1), P9_LOCK_ERROR(2) or P9_LOCK_GRACE(3) P9_LOCK_SUCCESS - Request was successful P9_LOCK_BLOCKED - A conflicting lock is held by another process P9_LOCK_ERROR - Error while processing the lock request P9_LOCK_GRACE - Server is in grace period, it can't accept new lock requests in this period (except locks with P9_LOCK_FLAGS_RECLAIM flag set) Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-09-27 06:04:24 +00:00
error:
p9_req_put(clnt, req);
9p: Implement TLOCK Synopsis size[4] TLock tag[2] fid[4] flock[n] size[4] RLock tag[2] status[1] Description Tlock is used to acquire/release byte range posix locks on a file identified by given fid. The reply contains status of the lock request flock structure: type[1] - Type of lock: F_RDLCK, F_WRLCK, F_UNLCK flags[4] - Flags could be either of P9_LOCK_FLAGS_BLOCK - Blocked lock request, if there is a conflicting lock exists, wait for that lock to be released. P9_LOCK_FLAGS_RECLAIM - Reclaim lock request, used when client is trying to reclaim a lock after a server restrart (due to crash) start[8] - Starting offset for lock length[8] - Number of bytes to lock If length is 0, lock all bytes starting at the location 'start' through to the end of file pid[4] - PID of the process that wants to take lock client_id[4] - Unique client id status[1] - Status of the lock request, can be P9_LOCK_SUCCESS(0), P9_LOCK_BLOCKED(1), P9_LOCK_ERROR(2) or P9_LOCK_GRACE(3) P9_LOCK_SUCCESS - Request was successful P9_LOCK_BLOCKED - A conflicting lock is held by another process P9_LOCK_ERROR - Error while processing the lock request P9_LOCK_GRACE - Server is in grace period, it can't accept new lock requests in this period (except locks with P9_LOCK_FLAGS_RECLAIM flag set) Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-09-27 06:04:24 +00:00
return err;
}
EXPORT_SYMBOL(p9_client_lock_dotl);
int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
{
int err;
struct p9_client *clnt;
struct p9_req_t *req;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P,
">>> TGETLOCK fid %d, type %i start %lld length %lld proc_id %d client_id %s\n",
fid->fid, glock->type, glock->start, glock->length,
glock->proc_id, glock->client_id);
req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid,
glock->type, glock->start, glock->length,
glock->proc_id, glock->client_id);
if (IS_ERR(req))
return PTR_ERR(req);
err = p9pdu_readf(&req->rc, clnt->proto_version, "bqqds", &glock->type,
&glock->start, &glock->length, &glock->proc_id,
&glock->client_id);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P,
"<<< RGETLOCK type %i start %lld length %lld proc_id %d client_id %s\n",
glock->type, glock->start, glock->length,
glock->proc_id, glock->client_id);
error:
p9_req_put(clnt, req);
return err;
}
EXPORT_SYMBOL(p9_client_getlock_dotl);
int p9_client_readlink(struct p9_fid *fid, char **target)
{
int err;
struct p9_client *clnt;
struct p9_req_t *req;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid);
req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid);
if (IS_ERR(req))
return PTR_ERR(req);
err = p9pdu_readf(&req->rc, clnt->proto_version, "s", target);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
error:
p9_req_put(clnt, req);
return err;
}
EXPORT_SYMBOL(p9_client_readlink);
int __init p9_client_init(void)
{
p9_req_cache = KMEM_CACHE(p9_req_t, SLAB_TYPESAFE_BY_RCU);
return p9_req_cache ? 0 : -ENOMEM;
}
void __exit p9_client_exit(void)
{
kmem_cache_destroy(p9_req_cache);
}