Merge branch 'layoutstats'

* layoutstats:
  pnfs/flexfiles: protect ktime manipulation with mirror lock
  nfs: provide pnfs_report_layoutstat when NFS42 is disabled
  pnfs/flexfiles: report layoutstat regularly
  nfs42: serialize LAYOUTSTATS calls of the same file
  pnfs/flexfiles: encode LAYOUTSTATS flexfiles specific data
  pnfs/flexfiles: add ff_layout_prepare_layoutstats
  pNFS/flexfiles: track when layout is first used
  pNFS/flexfiles: add layoutstats tracking
  pNFS/flexfiles: Remove unused struct members user_name, group_name
  pnfs: add pnfs_report_layoutstat helper function
  pNFS: fill in nfs42_layoutstat_ops
  NFSv.2/pnfs Add a LAYOUTSTATS rpc function
This commit is contained in:
Trond Myklebust 2015-06-26 14:01:59 -04:00
commit 122d328d30
13 changed files with 800 additions and 17 deletions

View file

@ -20,6 +20,7 @@
#include "../nfs4trace.h"
#include "../iostat.h"
#include "../nfs.h"
#include "../nfs42.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@ -271,6 +272,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
spin_lock_init(&fls->mirror_array[i]->lock);
fls->mirror_array[i]->ds_count = ds_count;
fls->mirror_array[i]->lseg = &fls->generic_hdr;
/* deviceid */
rc = decode_deviceid(&stream, &devid);
@ -412,6 +414,146 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls)
return 1;
}
static void
nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer)
{
/* first IO request? */
if (atomic_inc_return(&timer->n_ops) == 1) {
timer->start_time = ktime_get();
}
}
static ktime_t
nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer)
{
ktime_t start, now;
if (atomic_dec_return(&timer->n_ops) < 0)
WARN_ON_ONCE(1);
now = ktime_get();
start = timer->start_time;
timer->start_time = now;
return ktime_sub(now, start);
}
static ktime_t
nfs4_ff_layout_calc_completion_time(struct rpc_task *task)
{
return ktime_sub(ktime_get(), task->tk_start);
}
static bool
nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
struct nfs4_ff_layoutstat *layoutstat)
{
static const ktime_t notime = {0};
ktime_t now = ktime_get();
nfs4_ff_start_busy_timer(&layoutstat->busy_timer);
if (ktime_equal(mirror->start_time, notime))
mirror->start_time = now;
if (ktime_equal(mirror->last_report_time, notime))
mirror->last_report_time = now;
if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
FF_LAYOUTSTATS_REPORT_INTERVAL) {
mirror->last_report_time = now;
return true;
}
return false;
}
static void
nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat,
__u64 requested)
{
struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;
iostat->ops_requested++;
iostat->bytes_requested += requested;
}
static void
nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat,
__u64 requested,
__u64 completed,
ktime_t time_completed)
{
struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;
ktime_t timer;
iostat->ops_completed++;
iostat->bytes_completed += completed;
iostat->bytes_not_delivered += requested - completed;
timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer);
iostat->total_busy_time =
ktime_add(iostat->total_busy_time, timer);
iostat->aggregate_completion_time =
ktime_add(iostat->aggregate_completion_time, time_completed);
}
static void
nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror,
__u64 requested)
{
bool report;
spin_lock(&mirror->lock);
report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat);
nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested);
spin_unlock(&mirror->lock);
if (report)
pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode);
}
static void
nfs4_ff_layout_stat_io_end_read(struct rpc_task *task,
struct nfs4_ff_layout_mirror *mirror,
__u64 requested,
__u64 completed)
{
spin_lock(&mirror->lock);
nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat,
requested, completed,
nfs4_ff_layout_calc_completion_time(task));
spin_unlock(&mirror->lock);
}
static void
nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror,
__u64 requested)
{
bool report;
spin_lock(&mirror->lock);
report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat);
nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested);
spin_unlock(&mirror->lock);
if (report)
pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode);
}
static void
nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,
struct nfs4_ff_layout_mirror *mirror,
__u64 requested,
__u64 completed,
enum nfs3_stable_how committed)
{
if (committed == NFS_UNSTABLE)
requested = completed = 0;
spin_lock(&mirror->lock);
nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat,
requested, completed,
nfs4_ff_layout_calc_completion_time(task));
spin_unlock(&mirror->lock);
}
static int
ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo,
@ -906,6 +1048,10 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx)
static int ff_layout_read_prepare_common(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
nfs4_ff_layout_stat_io_start_read(
FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
hdr->args.count);
if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return -EIO;
@ -959,15 +1105,15 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
if (ff_layout_read_prepare_common(task, hdr))
return;
if (ff_layout_setup_sequence(hdr->ds_clp,
&hdr->args.seq_args,
&hdr->res.seq_res,
task))
return;
if (ff_layout_read_prepare_common(task, hdr))
return;
if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
hdr->args.lock_context, FMODE_READ) == -EIO)
rpc_exit(task, -EIO); /* lost lock, terminate I/O */
@ -979,6 +1125,10 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data)
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
nfs4_ff_layout_stat_io_end_read(task,
FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
hdr->args.count, hdr->res.count);
if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
nfs4_sequence_done(task, &hdr->res.seq_res);
@ -1080,6 +1230,10 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
static int ff_layout_write_prepare_common(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
nfs4_ff_layout_stat_io_start_write(
FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
hdr->args.count);
if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return -EIO;
@ -1113,15 +1267,15 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
if (ff_layout_write_prepare_common(task, hdr))
return;
if (ff_layout_setup_sequence(hdr->ds_clp,
&hdr->args.seq_args,
&hdr->res.seq_res,
task))
return;
if (ff_layout_write_prepare_common(task, hdr))
return;
if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
hdr->args.lock_context, FMODE_WRITE) == -EIO)
rpc_exit(task, -EIO); /* lost lock, terminate I/O */
@ -1131,6 +1285,11 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
nfs4_ff_layout_stat_io_end_write(task,
FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
hdr->args.count, hdr->res.count,
hdr->res.verf->committed);
if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
nfs4_sequence_done(task, &hdr->res.seq_res);
@ -1149,8 +1308,17 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
&NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
}
static void ff_layout_commit_prepare_common(struct rpc_task *task,
struct nfs_commit_data *cdata)
{
nfs4_ff_layout_stat_io_start_write(
FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
0);
}
static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
{
ff_layout_commit_prepare_common(task, data);
rpc_call_start(task);
}
@ -1158,10 +1326,30 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data)
{
struct nfs_commit_data *wdata = data;
ff_layout_setup_sequence(wdata->ds_clp,
if (ff_layout_setup_sequence(wdata->ds_clp,
&wdata->args.seq_args,
&wdata->res.seq_res,
task);
task))
return;
ff_layout_commit_prepare_common(task, data);
}
static void ff_layout_commit_done(struct rpc_task *task, void *data)
{
struct nfs_commit_data *cdata = data;
struct nfs_page *req;
__u64 count = 0;
if (task->tk_status == 0) {
list_for_each_entry(req, &cdata->pages, wb_list)
count += req->wb_bytes;
}
nfs4_ff_layout_stat_io_end_write(task,
FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
count, count, NFS_FILE_SYNC);
pnfs_generic_write_commit_done(task, data);
}
static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
@ -1202,14 +1390,14 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = {
.rpc_call_prepare = ff_layout_commit_prepare_v3,
.rpc_call_done = pnfs_generic_write_commit_done,
.rpc_call_done = ff_layout_commit_done,
.rpc_count_stats = ff_layout_commit_count_stats,
.rpc_release = pnfs_generic_commit_release,
};
static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = {
.rpc_call_prepare = ff_layout_commit_prepare_v4,
.rpc_call_done = pnfs_generic_write_commit_done,
.rpc_call_done = ff_layout_commit_done,
.rpc_count_stats = ff_layout_commit_count_stats,
.rpc_release = pnfs_generic_commit_release,
};
@ -1253,7 +1441,6 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
if (fh)
hdr->args.fh = fh;
/*
* Note that if we ever decide to split across DSes,
* then we may need to handle dense-like offsets.
@ -1382,6 +1569,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
if (fh)
data->args.fh = fh;
return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_commit_call_ops_v3 :
&ff_layout_commit_call_ops_v4,
@ -1485,6 +1673,247 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo,
dprintk("%s: Return\n", __func__);
}
static int
ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen)
{
const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
return snprintf(buf, buflen, "%pI4", &sin->sin_addr);
}
static size_t
ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf,
const int buflen)
{
const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
const struct in6_addr *addr = &sin6->sin6_addr;
/*
* RFC 4291, Section 2.2.2
*
* Shorthanded ANY address
*/
if (ipv6_addr_any(addr))
return snprintf(buf, buflen, "::");
/*
* RFC 4291, Section 2.2.2
*
* Shorthanded loopback address
*/
if (ipv6_addr_loopback(addr))
return snprintf(buf, buflen, "::1");
/*
* RFC 4291, Section 2.2.3
*
* Special presentation address format for mapped v4
* addresses.
*/
if (ipv6_addr_v4mapped(addr))
return snprintf(buf, buflen, "::ffff:%pI4",
&addr->s6_addr32[3]);
/*
* RFC 4291, Section 2.2.1
*/
return snprintf(buf, buflen, "%pI6c", addr);
}
/* Derived from rpc_sockaddr2uaddr */
static void
ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
{
struct sockaddr *sap = (struct sockaddr *)&da->da_addr;
char portbuf[RPCBIND_MAXUADDRPLEN];
char addrbuf[RPCBIND_MAXUADDRLEN];
char *netid;
unsigned short port;
int len, netid_len;
__be32 *p;
switch (sap->sa_family) {
case AF_INET:
if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
return;
port = ntohs(((struct sockaddr_in *)sap)->sin_port);
netid = "tcp";
netid_len = 3;
break;
case AF_INET6:
if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
return;
port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
netid = "tcp6";
netid_len = 4;
break;
default:
/* we only support tcp and tcp6 */
WARN_ON_ONCE(1);
return;
}
snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff);
len = strlcat(addrbuf, portbuf, sizeof(addrbuf));
p = xdr_reserve_space(xdr, 4 + netid_len);
xdr_encode_opaque(p, netid, netid_len);
p = xdr_reserve_space(xdr, 4 + len);
xdr_encode_opaque(p, addrbuf, len);
}
static void
ff_layout_encode_nfstime(struct xdr_stream *xdr,
ktime_t t)
{
struct timespec64 ts;
__be32 *p;
p = xdr_reserve_space(xdr, 12);
ts = ktime_to_timespec64(t);
p = xdr_encode_hyper(p, ts.tv_sec);
*p++ = cpu_to_be32(ts.tv_nsec);
}
static void
ff_layout_encode_io_latency(struct xdr_stream *xdr,
struct nfs4_ff_io_stat *stat)
{
__be32 *p;
p = xdr_reserve_space(xdr, 5 * 8);
p = xdr_encode_hyper(p, stat->ops_requested);
p = xdr_encode_hyper(p, stat->bytes_requested);
p = xdr_encode_hyper(p, stat->ops_completed);
p = xdr_encode_hyper(p, stat->bytes_completed);
p = xdr_encode_hyper(p, stat->bytes_not_delivered);
ff_layout_encode_nfstime(xdr, stat->total_busy_time);
ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time);
}
static void
ff_layout_encode_layoutstats(struct xdr_stream *xdr,
struct nfs42_layoutstat_args *args,
struct nfs42_layoutstat_devinfo *devinfo)
{
struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private;
struct nfs4_pnfs_ds_addr *da;
struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds;
struct nfs_fh *fh = &mirror->fh_versions[0];
__be32 *p, *start;
da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);
dprintk("%s: DS %s: encoding address %s\n",
__func__, ds->ds_remotestr, da->da_remotestr);
/* layoutupdate length */
start = xdr_reserve_space(xdr, 4);
/* netaddr4 */
ff_layout_encode_netaddr(xdr, da);
/* nfs_fh4 */
p = xdr_reserve_space(xdr, 4 + fh->size);
xdr_encode_opaque(p, fh->data, fh->size);
/* ff_io_latency4 read */
spin_lock(&mirror->lock);
ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat);
/* ff_io_latency4 write */
ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat);
spin_unlock(&mirror->lock);
/* nfstime4 */
ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time));
/* bool */
p = xdr_reserve_space(xdr, 4);
*p = cpu_to_be32(false);
*start = cpu_to_be32((xdr->p - start - 1) * 4);
}
static bool
ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args,
struct pnfs_layout_segment *pls,
int *dev_count, int dev_limit)
{
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_deviceid_node *dev;
struct nfs42_layoutstat_devinfo *devinfo;
int i;
for (i = 0; i <= FF_LAYOUT_MIRROR_COUNT(pls); i++) {
if (*dev_count >= dev_limit)
break;
mirror = FF_LAYOUT_COMP(pls, i);
if (!mirror || !mirror->mirror_ds)
continue;
dev = FF_LAYOUT_DEVID_NODE(pls, i);
devinfo = &args->devinfo[*dev_count];
memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);
devinfo->offset = pls->pls_range.offset;
devinfo->length = pls->pls_range.length;
/* well, we don't really know if IO is continuous or not! */
devinfo->read_count = mirror->read_stat.io_stat.bytes_completed;
devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed;
devinfo->write_count = mirror->write_stat.io_stat.bytes_completed;
devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed;
devinfo->layout_type = LAYOUT_FLEX_FILES;
devinfo->layoutstats_encode = ff_layout_encode_layoutstats;
devinfo->layout_private = mirror;
/* lseg refcount put in cleanup_layoutstats */
pnfs_get_lseg(pls);
++(*dev_count);
}
return *dev_count < dev_limit;
}
static int
ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
{
struct pnfs_layout_segment *pls;
int dev_count = 0;
spin_lock(&args->inode->i_lock);
list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) {
dev_count += FF_LAYOUT_MIRROR_COUNT(pls);
}
spin_unlock(&args->inode->i_lock);
/* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */
if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) {
dprintk("%s: truncating devinfo to limit (%d:%d)\n",
__func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV);
dev_count = PNFS_LAYOUTSTATS_MAXDEV;
}
args->devinfo = kmalloc(dev_count * sizeof(*args->devinfo), GFP_KERNEL);
if (!args->devinfo)
return -ENOMEM;
dev_count = 0;
spin_lock(&args->inode->i_lock);
list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) {
if (!ff_layout_mirror_prepare_stats(args, pls, &dev_count,
PNFS_LAYOUTSTATS_MAXDEV)) {
break;
}
}
spin_unlock(&args->inode->i_lock);
args->num_dev = dev_count;
return 0;
}
static void
ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data)
{
struct nfs4_ff_layout_mirror *mirror;
int i;
for (i = 0; i < data->args.num_dev; i++) {
mirror = data->args.devinfo[i].layout_private;
data->args.devinfo[i].layout_private = NULL;
pnfs_put_lseg(mirror->lseg);
}
}
static struct pnfs_layoutdriver_type flexfilelayout_type = {
.id = LAYOUT_FLEX_FILES,
.name = "LAYOUT_FLEX_FILES",
@ -1507,6 +1936,8 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
.alloc_deviceid_node = ff_layout_alloc_deviceid_node,
.encode_layoutreturn = ff_layout_encode_layoutreturn,
.sync = pnfs_nfs_generic_sync,
.prepare_layoutstats = ff_layout_prepare_layoutstats,
.cleanup_layoutstats = ff_layout_cleanup_layoutstats,
};
static int __init nfs4flexfilelayout_init(void)

View file

@ -15,6 +15,9 @@
* due to network error etc. */
#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096
/* LAYOUTSTATS report interval in ms */
#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L)
struct nfs4_ff_ds_version {
u32 version;
u32 minor_version;
@ -41,19 +44,42 @@ struct nfs4_ff_layout_ds_err {
struct nfs4_deviceid deviceid;
};
struct nfs4_ff_io_stat {
__u64 ops_requested;
__u64 bytes_requested;
__u64 ops_completed;
__u64 bytes_completed;
__u64 bytes_not_delivered;
ktime_t total_busy_time;
ktime_t aggregate_completion_time;
};
struct nfs4_ff_busy_timer {
ktime_t start_time;
atomic_t n_ops;
};
struct nfs4_ff_layoutstat {
struct nfs4_ff_io_stat io_stat;
struct nfs4_ff_busy_timer busy_timer;
};
struct nfs4_ff_layout_mirror {
struct pnfs_layout_segment *lseg; /* back pointer */
u32 ds_count;
u32 efficiency;
struct nfs4_ff_layout_ds *mirror_ds;
u32 fh_versions_cnt;
struct nfs_fh *fh_versions;
nfs4_stateid stateid;
struct nfs4_string user_name;
struct nfs4_string group_name;
u32 uid;
u32 gid;
struct rpc_cred *cred;
spinlock_t lock;
struct nfs4_ff_layoutstat read_stat;
struct nfs4_ff_layoutstat write_stat;
ktime_t start_time;
ktime_t last_report_time;
};
struct nfs4_ff_layout_segment {

View file

@ -5,11 +5,18 @@
#ifndef __LINUX_FS_NFS_NFS4_2_H
#define __LINUX_FS_NFS_NFS4_2_H
/*
* FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support
* more? Need to consider not to pre-alloc too much for a compound.
*/
#define PNFS_LAYOUTSTATS_MAXDEV (4)
/* nfs4.2proc.c */
int nfs42_proc_allocate(struct file *, loff_t, loff_t);
int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
int nfs42_proc_layoutstats_generic(struct nfs_server *,
struct nfs42_layoutstat_data *);
/* nfs4.2xdr.h */
extern struct rpc_procinfo nfs4_2_procedures[];

View file

@ -10,6 +10,11 @@
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "nfs42.h"
#include "iostat.h"
#include "pnfs.h"
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
fmode_t fmode)
@ -165,3 +170,79 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
}
static void
nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata)
{
struct nfs42_layoutstat_data *data = calldata;
struct nfs_server *server = NFS_SERVER(data->args.inode);
nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args,
&data->res.seq_res, task);
}
static void
nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
{
struct nfs42_layoutstat_data *data = calldata;
if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
/* well, we don't care about errors at all! */
if (task->tk_status)
dprintk("%s server returns %d\n", __func__, task->tk_status);
}
static void
nfs42_layoutstat_release(void *calldata)
{
struct nfs42_layoutstat_data *data = calldata;
struct nfs_server *nfss = NFS_SERVER(data->args.inode);
if (nfss->pnfs_curr_ld->cleanup_layoutstats)
nfss->pnfs_curr_ld->cleanup_layoutstats(data);
pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout);
smp_mb__before_atomic();
clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags);
smp_mb__after_atomic();
nfs_iput_and_deactive(data->inode);
kfree(data->args.devinfo);
kfree(data);
}
static const struct rpc_call_ops nfs42_layoutstat_ops = {
.rpc_call_prepare = nfs42_layoutstat_prepare,
.rpc_call_done = nfs42_layoutstat_done,
.rpc_release = nfs42_layoutstat_release,
};
int nfs42_proc_layoutstats_generic(struct nfs_server *server,
struct nfs42_layoutstat_data *data)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
};
struct rpc_task_setup task_setup = {
.rpc_client = server->client,
.rpc_message = &msg,
.callback_ops = &nfs42_layoutstat_ops,
.callback_data = data,
.flags = RPC_TASK_ASYNC,
};
struct rpc_task *task;
data->inode = nfs_igrab_and_active(data->args.inode);
if (!data->inode) {
nfs42_layoutstat_release(data);
return -EAGAIN;
}
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
task = rpc_run_task(&task_setup);
if (IS_ERR(task))
return PTR_ERR(task);
return 0;
}

View file

@ -4,6 +4,8 @@
#ifndef __LINUX_FS_NFS_NFS4_2XDR_H
#define __LINUX_FS_NFS_NFS4_2XDR_H
#include "nfs42.h"
#define encode_fallocate_maxsz (encode_stateid_maxsz + \
2 /* offset */ + \
2 /* length */)
@ -22,6 +24,16 @@
1 /* whence */ + \
2 /* offset */ + \
2 /* length */)
#define encode_io_info_maxsz 4
#define encode_layoutstats_maxsz (op_decode_hdr_maxsz + \
2 /* offset */ + \
2 /* length */ + \
encode_stateid_maxsz + \
encode_io_info_maxsz + \
encode_io_info_maxsz + \
1 /* opaque devaddr4 length */ + \
XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE))
#define decode_layoutstats_maxsz (op_decode_hdr_maxsz)
#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
@ -45,6 +57,14 @@
#define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
decode_seek_maxsz)
#define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz)
#define NFS4_dec_layoutstats_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz)
static void encode_fallocate(struct xdr_stream *xdr,
@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr,
encode_uint32(xdr, args->sa_what);
}
static void encode_layoutstats(struct xdr_stream *xdr,
struct nfs42_layoutstat_args *args,
struct nfs42_layoutstat_devinfo *devinfo,
struct compound_hdr *hdr)
{
__be32 *p;
encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr);
p = reserve_space(xdr, 8 + 8);
p = xdr_encode_hyper(p, devinfo->offset);
p = xdr_encode_hyper(p, devinfo->length);
encode_nfs4_stateid(xdr, &args->stateid);
p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4);
p = xdr_encode_hyper(p, devinfo->read_count);
p = xdr_encode_hyper(p, devinfo->read_bytes);
p = xdr_encode_hyper(p, devinfo->write_count);
p = xdr_encode_hyper(p, devinfo->write_bytes);
p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data,
NFS4_DEVICEID4_SIZE);
/* Encode layoutupdate4 */
*p++ = cpu_to_be32(devinfo->layout_type);
if (devinfo->layoutstats_encode != NULL)
devinfo->layoutstats_encode(xdr, args, devinfo);
else
encode_uint32(xdr, 0);
}
/*
* Encode ALLOCATE request
*/
@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
encode_nops(&hdr);
}
/*
* Encode LAYOUTSTATS request
*/
static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct nfs42_layoutstat_args *args)
{
int i;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV);
for (i = 0; i < args->num_dev; i++)
encode_layoutstats(xdr, args, &args->devinfo[i], &hdr);
encode_nops(&hdr);
}
static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_ALLOCATE);
@ -169,6 +238,28 @@ static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
return -EIO;
}
static int decode_layoutstats(struct xdr_stream *xdr,
struct nfs42_layoutstat_res *res)
{
int status;
__be32 *p;
status = decode_op_hdr(xdr, OP_LAYOUTSTATS);
if (status)
return status;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
res->rpc_status = be32_to_cpup(p++);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
* Decode ALLOCATE request
*/
@ -246,4 +337,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
out:
return status;
}
/*
* Decode LAYOUTSTATS request
*/
static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
struct nfs42_layoutstat_res *res)
{
struct compound_hdr hdr;
int status, i;
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
status = decode_putfh(xdr);
if (status)
goto out;
WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV);
for (i = 0; i < res->num_dev; i++) {
status = decode_layoutstats(xdr, res);
if (status)
goto out;
}
out:
res->rpc_status = status;
return status;
}
#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */

View file

@ -233,6 +233,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception
extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
struct rpc_message *, struct nfs4_sequence_args *,
struct nfs4_sequence_res *, int);
extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int);
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);

View file

@ -476,8 +476,8 @@ struct nfs4_call_sync_data {
struct nfs4_sequence_res *seq_res;
};
static void nfs4_init_sequence(struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res, int cache_reply)
void nfs4_init_sequence(struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res, int cache_reply)
{
args->sa_slot = NULL;
args->sa_cache_this = cache_reply;

View file

@ -7431,6 +7431,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(SEEK, enc_seek, dec_seek),
PROC(ALLOCATE, enc_allocate, dec_allocate),
PROC(DEALLOCATE, enc_deallocate, dec_deallocate),
PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats),
#endif /* CONFIG_NFS_V4_2 */
};

View file

@ -35,6 +35,7 @@
#include "iostat.h"
#include "nfs4trace.h"
#include "delegation.h"
#include "nfs42.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
@ -2250,3 +2251,60 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
}
return thp;
}
#if IS_ENABLED(CONFIG_NFS_V4_2)
int
pnfs_report_layoutstat(struct inode *inode)
{
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs42_layoutstat_data *data;
struct pnfs_layout_hdr *hdr;
int status = 0;
if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats)
goto out;
if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags))
goto out;
spin_lock(&inode->i_lock);
if (!NFS_I(inode)->layout) {
spin_unlock(&inode->i_lock);
goto out;
}
hdr = NFS_I(inode)->layout;
pnfs_get_layout_hdr(hdr);
spin_unlock(&inode->i_lock);
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data) {
status = -ENOMEM;
goto out_put;
}
data->args.fh = NFS_FH(inode);
data->args.inode = inode;
nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid);
status = ld->prepare_layoutstats(&data->args);
if (status)
goto out_free;
status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data);
out:
dprintk("%s returns %d\n", __func__, status);
return status;
out_free:
kfree(data);
out_put:
pnfs_put_layout_hdr(hdr);
smp_mb__before_atomic();
clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags);
smp_mb__after_atomic();
goto out;
}
EXPORT_SYMBOL_GPL(pnfs_report_layoutstat);
#endif

View file

@ -178,6 +178,8 @@ struct pnfs_layoutdriver_type {
void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo,
struct xdr_stream *xdr,
const struct nfs4_layoutcommit_args *args);
int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args);
void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data);
};
struct pnfs_layout_hdr {
@ -290,7 +292,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg);
/* nfs4_deviceid_flags */
enum {
NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */
@ -689,4 +690,14 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
#endif /* CONFIG_NFS_V4_1 */
#if IS_ENABLED(CONFIG_NFS_V4_2)
int pnfs_report_layoutstat(struct inode *inode);
#else
static inline int
pnfs_report_layoutstat(struct inode *inode)
{
return 0;
}
#endif
#endif /* FS_NFS_PNFS_H */

View file

@ -500,6 +500,7 @@ enum {
NFSPROC4_CLNT_SEEK,
NFSPROC4_CLNT_ALLOCATE,
NFSPROC4_CLNT_DEALLOCATE,
NFSPROC4_CLNT_LAYOUTSTATS,
};
/* nfs41 types */

View file

@ -219,6 +219,7 @@ struct nfs_inode {
#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */
#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */
#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */
static inline struct nfs_inode *NFS_I(const struct inode *inode)
{

View file

@ -316,6 +316,49 @@ struct nfs4_layoutreturn {
int rpc_status;
};
#define PNFS_LAYOUTSTATS_MAXSIZE 256
struct nfs42_layoutstat_args;
struct nfs42_layoutstat_devinfo;
typedef void (*layoutstats_encode_t)(struct xdr_stream *,
struct nfs42_layoutstat_args *,
struct nfs42_layoutstat_devinfo *);
/* Per file per deviceid layoutstats */
struct nfs42_layoutstat_devinfo {
struct nfs4_deviceid dev_id;
__u64 offset;
__u64 length;
__u64 read_count;
__u64 read_bytes;
__u64 write_count;
__u64 write_bytes;
__u32 layout_type;
layoutstats_encode_t layoutstats_encode;
void *layout_private;
};
struct nfs42_layoutstat_args {
struct nfs4_sequence_args seq_args;
struct nfs_fh *fh;
struct inode *inode;
nfs4_stateid stateid;
int num_dev;
struct nfs42_layoutstat_devinfo *devinfo;
};
struct nfs42_layoutstat_res {
struct nfs4_sequence_res seq_res;
int num_dev;
int rpc_status;
};
struct nfs42_layoutstat_data {
struct inode *inode;
struct nfs42_layoutstat_args args;
struct nfs42_layoutstat_res res;
};
struct stateowner_id {
__u64 create_time;
__u32 uniquifier;