NFS Client Updates for Linux 6.11

New Features:
   * Add support for large folios
   * Implement rpcrdma generic device removal notification
   * Add client support for attribute delegations
   * Use a LAYOUTRETURN during reboot recovery to report layoutstats and errors
   * Improve throughput for random buffered writes
   * Add NVMe support to pnfs/blocklayout
 
 Bugfixes:
   * Fix rpcrdma_reqs_reset()
   * Avoid soft lockups when using UDP
   * Fix an nfs/blocklayout premature PR key unregestration
   * Another fix for EXCHGID4_FLAG_USE_PNFS_DS for DS server
   * Do not extend writes to the entire folio
   * Pass explicit offset and count values to tracepoints
   * Fix a race to wake up sleeping SUNRPC sync tasks
   * Fix gss_status tracepoint output
 
 Cleanups:
   * Add missing MODULE_DESCRIPTION() macros
   * Add blocklayout / SCSI layout tracepoints
   * Remove asm-generic headers from xprtrdma verbs.c
   * Remove unused 'struct mnt_fhstatus'
   * Other delegation related cleanups
   * Other folio related cleanups
   * Other pNFS related cleanups
   * Other xprtrdma cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEnZ5MQTpR7cLU7KEp18tUv7ClQOsFAmaZgr0ACgkQ18tUv7Cl
 QOv8FxAAnUyYG7Kdbv+5Ko/SFv0imxCb5DQh2XC/hSHNrlKBlDnqe2PANXR9XocL
 mS0Wry5tZf/T+o+QoKv0HQUdWFlnqKzwclggrekf/lkioU1feWsLe2RzDl1iUh0V
 6fwcCyWXW1mYX2CtCaDe+/ZFcoZOMD+bItNHt/RdDScSnS9Jd8GSyocsVKsqaBx6
 3wub0FJ4UBgYNoX2T3YyK2JwvO9GLaKIQRJV74rjgPJKjcjhptbcb5MKBmOZrF95
 UCcpl4CwvD9RTsSEp0B98UbAFFpk8Nw1tmHF3GmyG/nsrJomDuLKFvbsiq23eHUf
 XeULZIbjMEzU56vjoTglZA4s7JYx17D0vzdPGUqU4mLN3LPm5LtGLBg2uQoPw/xW
 50euLU+ol36mfnQlBsuM/tAXgtoAcT63aNeNRNp8aOL47xA+PC6kWTBK9OaR5+x6
 w+d22Dpy+riMk1TRaAVt0ANcENKELsWRFvxkuWCpQhVoQ1h8LigQJzeggEEK7Sa6
 5u9H6wCTee2wz746uwA43koj1utuyrLq/5S+qEtCY1pbP3U0A+Gh0Xh00OXiYuzL
 TgRdksmiAL8cA51WjSrq6HhGLOUJAYLfbdKaVhW+fULxUVwzWhFFaFbbdiq/e4OR
 0pfqls8UZWICE51GeTfalEidpKZgV/LxU3QOuVoalWBULyj/TeI=
 =avTW
 -----END PGP SIGNATURE-----

Merge tag 'nfs-for-6.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

Pull NFS client updates from Anna Schumaker:
 "New Features:
   - Add support for large folios
   - Implement rpcrdma generic device removal notification
   - Add client support for attribute delegations
   - Use a LAYOUTRETURN during reboot recovery to report layoutstats
     and errors
   - Improve throughput for random buffered writes
   - Add NVMe support to pnfs/blocklayout

  Bugfixes:
   - Fix rpcrdma_reqs_reset()
   - Avoid soft lockups when using UDP
   - Fix an nfs/blocklayout premature PR key unregestration
   - Another fix for EXCHGID4_FLAG_USE_PNFS_DS for DS server
   - Do not extend writes to the entire folio
   - Pass explicit offset and count values to tracepoints
   - Fix a race to wake up sleeping SUNRPC sync tasks
   - Fix gss_status tracepoint output

  Cleanups:
   - Add missing MODULE_DESCRIPTION() macros
   - Add blocklayout / SCSI layout tracepoints
   - Remove asm-generic headers from xprtrdma verbs.c
   - Remove unused 'struct mnt_fhstatus'
   - Other delegation related cleanups
   - Other folio related cleanups
   - Other pNFS related cleanups
   - Other xprtrdma cleanups"

* tag 'nfs-for-6.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (63 commits)
  SUNRPC: Fixup gss_status tracepoint error output
  SUNRPC: Fix a race to wake a sync task
  nfs: split nfs_read_folio
  nfs: pass explicit offset/count to trace events
  nfs: do not extend writes to the entire folio
  nfs/blocklayout: add support for NVMe
  nfs: remove nfs_page_length
  nfs: remove the unused max_deviceinfo_size field from struct pnfs_layoutdriver_type
  nfs: don't reuse partially completed requests in nfs_lock_and_join_requests
  nfs: move nfs_wait_on_request to write.c
  nfs: fold nfs_page_group_lock_subrequests into nfs_lock_and_join_requests
  nfs: fold nfs_folio_find_and_lock_request into nfs_lock_and_join_requests
  nfs: simplify nfs_folio_find_and_lock_request
  nfs: remove nfs_folio_private_request
  nfs: remove dead code for the old swap over NFS implementation
  NFSv4.1 another fix for EXCHGID4_FLAG_USE_PNFS_DS for DS server
  nfs: Block on write congestion
  nfs: Properly initialize server->writeback
  nfs: Drop pointless check from nfs_commit_release_pages()
  nfs/blocklayout: SCSI layout trace points for reservation key reg/unreg
  ...
This commit is contained in:
Linus Torvalds 2024-07-18 17:17:30 -07:00
commit 4f40c636b2
58 changed files with 1684 additions and 813 deletions

View file

@ -564,25 +564,32 @@ bl_find_get_deviceid(struct nfs_server *server,
gfp_t gfp_mask)
{
struct nfs4_deviceid_node *node;
unsigned long start, end;
int err = -ENODEV;
retry:
node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
if (!node)
return ERR_PTR(-ENODEV);
if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
return node;
if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
unsigned long end = jiffies;
unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT;
end = jiffies;
start = end - PNFS_DEVICE_RETRY_TIMEOUT;
if (!time_in_range(node->timestamp_unavailable, start, end)) {
nfs4_delete_deviceid(node->ld, node->nfs_client, id);
goto retry;
if (!time_in_range(node->timestamp_unavailable, start, end)) {
nfs4_delete_deviceid(node->ld, node->nfs_client, id);
goto retry;
}
goto out_put;
}
if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node)))
goto out_put;
return node;
out_put:
nfs4_put_deviceid_node(node);
return ERR_PTR(-ENODEV);
return ERR_PTR(err);
}
static int

View file

@ -104,20 +104,26 @@ struct pnfs_block_dev {
u64 start;
u64 len;
enum pnfs_block_volume_type type;
u32 nr_children;
struct pnfs_block_dev *children;
u64 chunk_size;
struct file *bdev_file;
u64 disk_offset;
unsigned long flags;
u64 pr_key;
bool pr_registered;
bool (*map)(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev_map *map);
};
/* pnfs_block_dev flag bits */
enum {
PNFS_BDEV_REGISTERED = 0,
};
/* sector_t fields are all in 512-byte sectors */
struct pnfs_block_extent {
union {
@ -172,6 +178,7 @@ struct bl_msg_hdr {
#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */
/* dev.c */
bool bl_register_dev(struct pnfs_block_dev *d);
struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server,
struct pnfs_device *pdev, gfp_t gfp_mask);
void bl_free_deviceid_node(struct nfs4_deviceid_node *d);

View file

@ -10,12 +10,83 @@
#include <linux/pr.h>
#include "blocklayout.h"
#include "../nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
static void bl_unregister_scsi(struct pnfs_block_dev *dev)
{
struct block_device *bdev = file_bdev(dev->bdev_file);
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
int status;
if (!test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags))
return;
status = ops->pr_register(bdev, dev->pr_key, 0, false);
if (status)
trace_bl_pr_key_unreg_err(bdev, dev->pr_key, status);
else
trace_bl_pr_key_unreg(bdev, dev->pr_key);
}
static bool bl_register_scsi(struct pnfs_block_dev *dev)
{
struct block_device *bdev = file_bdev(dev->bdev_file);
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
int status;
if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags))
return true;
status = ops->pr_register(bdev, 0, dev->pr_key, true);
if (status) {
trace_bl_pr_key_reg_err(bdev, dev->pr_key, status);
return false;
}
trace_bl_pr_key_reg(bdev, dev->pr_key);
return true;
}
static void bl_unregister_dev(struct pnfs_block_dev *dev)
{
u32 i;
if (dev->nr_children) {
for (i = 0; i < dev->nr_children; i++)
bl_unregister_dev(&dev->children[i]);
return;
}
if (dev->type == PNFS_BLOCK_VOLUME_SCSI)
bl_unregister_scsi(dev);
}
bool bl_register_dev(struct pnfs_block_dev *dev)
{
u32 i;
if (dev->nr_children) {
for (i = 0; i < dev->nr_children; i++) {
if (!bl_register_dev(&dev->children[i])) {
while (i > 0)
bl_unregister_dev(&dev->children[--i]);
return false;
}
}
return true;
}
if (dev->type == PNFS_BLOCK_VOLUME_SCSI)
return bl_register_scsi(dev);
return true;
}
static void
bl_free_device(struct pnfs_block_dev *dev)
{
bl_unregister_dev(dev);
if (dev->nr_children) {
int i;
@ -23,17 +94,6 @@ bl_free_device(struct pnfs_block_dev *dev)
bl_free_device(&dev->children[i]);
kfree(dev->children);
} else {
if (dev->pr_registered) {
const struct pr_ops *ops =
file_bdev(dev->bdev_file)->bd_disk->fops->pr_ops;
int error;
error = ops->pr_register(file_bdev(dev->bdev_file),
dev->pr_key, 0, false);
if (error)
pr_err("failed to unregister PR key.\n");
}
if (dev->bdev_file)
fput(dev->bdev_file);
}
@ -314,7 +374,7 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix)
bdev_file = bdev_file_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE,
NULL, NULL);
if (IS_ERR(bdev_file)) {
pr_warn("pNFS: failed to open device %s (%ld)\n",
dprintk("failed to open device %s (%ld)\n",
devname, PTR_ERR(bdev_file));
}
@ -327,8 +387,9 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
struct file *bdev_file;
struct block_device *bdev;
const struct pr_ops *ops;
struct file *bdev_file;
int error;
if (!bl_validate_designator(v))
@ -344,35 +405,30 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
if (IS_ERR(bdev_file))
bdev_file = bl_open_path(v, "wwn-0x");
if (IS_ERR(bdev_file))
bdev_file = bl_open_path(v, "nvme-eui.");
if (IS_ERR(bdev_file)) {
pr_warn("pNFS: no device found for volume %*phN\n",
v->scsi.designator_len, v->scsi.designator);
return PTR_ERR(bdev_file);
}
d->bdev_file = bdev_file;
bdev = file_bdev(bdev_file);
d->len = bdev_nr_bytes(file_bdev(d->bdev_file));
d->len = bdev_nr_bytes(bdev);
d->map = bl_map_simple;
d->pr_key = v->scsi.pr_key;
if (d->len == 0)
return -ENODEV;
pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
file_bdev(d->bdev_file)->bd_disk->disk_name, d->pr_key);
ops = file_bdev(d->bdev_file)->bd_disk->fops->pr_ops;
ops = bdev->bd_disk->fops->pr_ops;
if (!ops) {
pr_err("pNFS: block device %s does not support reservations.",
file_bdev(d->bdev_file)->bd_disk->disk_name);
bdev->bd_disk->disk_name);
error = -EINVAL;
goto out_blkdev_put;
}
error = ops->pr_register(file_bdev(d->bdev_file), 0, d->pr_key, true);
if (error) {
pr_err("pNFS: failed to register key for block device %s.",
file_bdev(d->bdev_file)->bd_disk->disk_name);
goto out_blkdev_put;
}
d->pr_registered = true;
return 0;
out_blkdev_put:
@ -458,7 +514,9 @@ static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
switch (volumes[idx].type) {
d->type = volumes[idx].type;
switch (d->type) {
case PNFS_BLOCK_VOLUME_SIMPLE:
return bl_parse_simple(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_SLICE:
@ -470,7 +528,7 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
case PNFS_BLOCK_VOLUME_SCSI:
return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
default:
dprintk("unsupported volume type: %d\n", volumes[idx].type);
dprintk("unsupported volume type: %d\n", d->type);
return -EIO;
}
}

View file

@ -46,14 +46,15 @@ struct cb_compound_hdr_res {
struct cb_getattrargs {
struct nfs_fh fh;
uint32_t bitmap[2];
uint32_t bitmap[3];
};
struct cb_getattrres {
__be32 status;
uint32_t bitmap[2];
uint32_t bitmap[3];
uint64_t size;
uint64_t change_attr;
struct timespec64 atime;
struct timespec64 ctime;
struct timespec64 mtime;
};

View file

@ -37,7 +37,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
goto out;
res->bitmap[0] = res->bitmap[1] = 0;
memset(res->bitmap, 0, sizeof(res->bitmap));
res->status = htonl(NFS4ERR_BADHANDLE);
dprintk_rcu("NFS: GETATTR callback request from %s\n",
@ -59,12 +59,16 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
res->change_attr = delegation->change_attr;
if (nfs_have_writebacks(inode))
res->change_attr++;
res->atime = inode_get_atime(inode);
res->ctime = inode_get_ctime(inode);
res->mtime = inode_get_mtime(inode);
res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
args->bitmap[0];
res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
args->bitmap[1];
res->bitmap[0] = (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE) &
args->bitmap[0];
res->bitmap[1] = (FATTR4_WORD1_TIME_ACCESS |
FATTR4_WORD1_TIME_METADATA |
FATTR4_WORD1_TIME_MODIFY) & args->bitmap[1];
res->bitmap[2] = (FATTR4_WORD2_TIME_DELEG_ACCESS |
FATTR4_WORD2_TIME_DELEG_MODIFY) & args->bitmap[2];
res->status = 0;
out_iput:
rcu_read_unlock();
@ -319,9 +323,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
int stat;
if (args->cbl_recall_type == RETURN_FSID)
stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
stat = pnfs_layout_destroy_byfsid(clp, &args->cbl_fsid,
PNFS_LAYOUT_BULK_RETURN);
else
stat = pnfs_destroy_layouts_byclid(clp, true);
stat = pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_BULK_RETURN);
if (stat != 0)
return NFS4ERR_DELAY;
return NFS4ERR_NOMATCHING_LAYOUT;

View file

@ -25,8 +25,9 @@
#define CB_OP_GETATTR_BITMAP_MAXSZ (4 * 4) // bitmap length, 3 bitmaps
#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
CB_OP_GETATTR_BITMAP_MAXSZ + \
/* change, size, ctime, mtime */\
(2 + 2 + 3 + 3) * 4)
/* change, size, atime, ctime,
* mtime, deleg_atime, deleg_mtime */\
(2 + 2 + 3 + 3 + 3 + 3 + 3) * 4)
#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#if defined(CONFIG_NFS_V4_1)
@ -635,6 +636,13 @@ static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec64 *
return 0;
}
static __be32 encode_attr_atime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
{
if (!(bitmap[1] & FATTR4_WORD1_TIME_ACCESS))
return 0;
return encode_attr_time(xdr,time);
}
static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
{
if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA))
@ -649,6 +657,24 @@ static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap,
return encode_attr_time(xdr,time);
}
static __be32 encode_attr_delegatime(struct xdr_stream *xdr,
const uint32_t *bitmap,
const struct timespec64 *time)
{
if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS))
return 0;
return encode_attr_time(xdr,time);
}
static __be32 encode_attr_delegmtime(struct xdr_stream *xdr,
const uint32_t *bitmap,
const struct timespec64 *time)
{
if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY))
return 0;
return encode_attr_time(xdr,time);
}
static __be32 encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr)
{
__be32 status;
@ -697,12 +723,21 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr,
if (unlikely(status != 0))
goto out;
status = encode_attr_size(xdr, res->bitmap, res->size);
if (unlikely(status != 0))
goto out;
status = encode_attr_atime(xdr, res->bitmap, &res->atime);
if (unlikely(status != 0))
goto out;
status = encode_attr_ctime(xdr, res->bitmap, &res->ctime);
if (unlikely(status != 0))
goto out;
status = encode_attr_mtime(xdr, res->bitmap, &res->mtime);
if (unlikely(status != 0))
goto out;
status = encode_attr_delegatime(xdr, res->bitmap, &res->atime);
if (unlikely(status != 0))
goto out;
status = encode_attr_delegmtime(xdr, res->bitmap, &res->mtime);
*savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1)));
out:
return status;

View file

@ -994,6 +994,9 @@ struct nfs_server *nfs_alloc_server(void)
server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
init_waitqueue_head(&server->write_congestion_wait);
atomic_long_set(&server->writeback, 0);
ida_init(&server->openowner_id);
ida_init(&server->lockowner_id);
pnfs_init_server(server);

View file

@ -82,11 +82,10 @@ static void nfs_mark_return_delegation(struct nfs_server *server,
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
static bool
nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
fmode_t flags)
static bool nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
fmode_t type)
{
if (delegation != NULL && (delegation->type & flags) == flags &&
if (delegation != NULL && (delegation->type & type) == type &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
return true;
@ -103,19 +102,22 @@ struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode)
return NULL;
}
static int
nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
static int nfs4_do_check_delegation(struct inode *inode, fmode_t type,
int flags, bool mark)
{
struct nfs_delegation *delegation;
int ret = 0;
flags &= FMODE_READ|FMODE_WRITE;
type &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (nfs4_is_valid_delegation(delegation, flags)) {
if (nfs4_is_valid_delegation(delegation, type)) {
if (mark)
nfs_mark_delegation_referenced(delegation);
ret = 1;
if ((flags & NFS_DELEGATION_FLAG_TIME) &&
!test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags))
ret = 0;
}
rcu_read_unlock();
return ret;
@ -124,22 +126,23 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
* nfs4_have_delegation - check if inode has a delegation, mark it
* NFS_DELEGATION_REFERENCED if there is one.
* @inode: inode to check
* @flags: delegation types to check for
* @type: delegation types to check for
* @flags: various modifiers
*
* Returns one if inode has the indicated delegation, otherwise zero.
*/
int nfs4_have_delegation(struct inode *inode, fmode_t flags)
int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags)
{
return nfs4_do_check_delegation(inode, flags, true);
return nfs4_do_check_delegation(inode, type, flags, true);
}
/*
* nfs4_check_delegation - check if inode has a delegation, do not mark
* NFS_DELEGATION_REFERENCED if it has one.
*/
int nfs4_check_delegation(struct inode *inode, fmode_t flags)
int nfs4_check_delegation(struct inode *inode, fmode_t type)
{
return nfs4_do_check_delegation(inode, flags, false);
return nfs4_do_check_delegation(inode, type, 0, false);
}
static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
@ -221,11 +224,12 @@ static int nfs_delegation_claim_opens(struct inode *inode,
* @type: delegation type
* @stateid: delegation stateid
* @pagemod_limit: write delegation "space_limit"
* @deleg_type: raw delegation type
*
*/
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
fmode_t type, const nfs4_stateid *stateid,
unsigned long pagemod_limit)
unsigned long pagemod_limit, u32 deleg_type)
{
struct nfs_delegation *delegation;
const struct cred *oldcred = NULL;
@ -239,6 +243,14 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
delegation->pagemod_limit = pagemod_limit;
oldcred = delegation->cred;
delegation->cred = get_cred(cred);
switch (deleg_type) {
case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
break;
default:
clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
}
clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
if (test_and_clear_bit(NFS_DELEGATION_REVOKED,
&delegation->flags))
@ -250,11 +262,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
} else {
rcu_read_unlock();
nfs_inode_set_delegation(inode, cred, type, stateid,
pagemod_limit);
pagemod_limit, deleg_type);
}
}
static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
static int nfs_do_return_delegation(struct inode *inode,
struct nfs_delegation *delegation,
int issync)
{
const struct cred *cred;
int res = 0;
@ -263,9 +277,8 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
spin_lock(&delegation->lock);
cred = get_cred(delegation->cred);
spin_unlock(&delegation->lock);
res = nfs4_proc_delegreturn(inode, cred,
&delegation->stateid,
issync);
res = nfs4_proc_delegreturn(inode, cred, &delegation->stateid,
delegation, issync);
put_cred(cred);
}
return res;
@ -418,13 +431,13 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
* @type: delegation type
* @stateid: delegation stateid
* @pagemod_limit: write delegation "space_limit"
* @deleg_type: raw delegation type
*
* Returns zero on success, or a negative errno value.
*/
int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
fmode_t type,
const nfs4_stateid *stateid,
unsigned long pagemod_limit)
fmode_t type, const nfs4_stateid *stateid,
unsigned long pagemod_limit, u32 deleg_type)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
@ -444,6 +457,11 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
delegation->cred = get_cred(cred);
delegation->inode = inode;
delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
switch (deleg_type) {
case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
delegation->flags |= BIT(NFS_DELEGATION_DELEGTIME);
}
delegation->test_gen = 0;
spin_lock_init(&delegation->lock);
@ -508,6 +526,11 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
atomic_long_inc(&nfs_active_delegations);
trace_nfs4_set_delegation(inode, type);
/* If we hold writebacks and have delegated mtime then update */
if (deleg_type == NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG &&
nfs_have_writebacks(inode))
nfs_update_delegated_mtime(inode);
out:
spin_unlock(&clp->cl_lock);
if (delegation != NULL)

View file

@ -38,12 +38,15 @@ enum {
NFS_DELEGATION_TEST_EXPIRED,
NFS_DELEGATION_INODE_FREEING,
NFS_DELEGATION_RETURN_DELAYED,
NFS_DELEGATION_DELEGTIME,
};
int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
fmode_t type, const nfs4_stateid *stateid,
unsigned long pagemod_limit, u32 deleg_type);
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
fmode_t type, const nfs4_stateid *stateid,
unsigned long pagemod_limit, u32 deleg_type);
int nfs4_inode_return_delegation(struct inode *inode);
void nfs4_inode_return_delegation_on_close(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
@ -67,7 +70,9 @@ void nfs_test_expired_all_delegations(struct nfs_client *clp);
void nfs_reap_expired_delegations(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */
int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync);
int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
const nfs4_stateid *stateid,
struct nfs_delegation *delegation, int issync);
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred);
@ -75,8 +80,8 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
int nfs4_have_delegation(struct inode *inode, fmode_t flags);
int nfs4_check_delegation(struct inode *inode, fmode_t flags);
int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags);
int nfs4_check_delegation(struct inode *inode, fmode_t type);
bool nfs4_delegation_flush_on_close(const struct inode *inode);
void nfs_inode_find_delegation_state_and_recover(struct inode *inode,
const nfs4_stateid *stateid);
@ -84,9 +89,37 @@ int nfs4_inode_make_writeable(struct inode *inode);
#endif
#define NFS_DELEGATION_FLAG_TIME BIT(1)
void nfs_update_delegated_atime(struct inode *inode);
void nfs_update_delegated_mtime(struct inode *inode);
void nfs_update_delegated_mtime_locked(struct inode *inode);
static inline int nfs_have_read_or_write_delegation(struct inode *inode)
{
return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0);
}
static inline int nfs_have_write_delegation(struct inode *inode)
{
return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE, 0);
}
static inline int nfs_have_delegated_attributes(struct inode *inode)
{
return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0);
}
static inline int nfs_have_delegated_atime(struct inode *inode)
{
return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ,
NFS_DELEGATION_FLAG_TIME);
}
static inline int nfs_have_delegated_mtime(struct inode *inode)
{
return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE,
NFS_DELEGATION_FLAG_TIME);
}
#endif

View file

@ -1437,7 +1437,7 @@ static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
if (!dir || !nfs_verify_change_attribute(dir, verf))
return;
if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0))
nfs_set_verifier_delegated(&verf);
dentry->d_time = verf;
}

View file

@ -339,6 +339,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, struct page **pagep,
void **fsdata)
{
fgf_t fgp = FGP_WRITEBEGIN;
struct folio *folio;
int once_thru = 0;
int ret;
@ -346,8 +347,9 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
fgp |= fgf_set_order(len);
start:
folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN,
folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp,
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
return PTR_ERR(folio);
@ -425,7 +427,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
static void nfs_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
struct inode *inode = folio_file_mapping(folio)->host;
struct inode *inode = folio->mapping->host;
dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n",
folio->index, offset, length);
@ -434,7 +436,7 @@ static void nfs_invalidate_folio(struct folio *folio, size_t offset,
/* Cancel any unstarted writes on this page */
nfs_wb_folio_cancel(inode, folio);
folio_wait_private_2(folio); /* [DEPRECATED] */
trace_nfs_invalidate_folio(inode, folio);
trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length);
}
/*
@ -452,7 +454,7 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp)
if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL ||
current_is_kswapd())
return false;
if (nfs_wb_folio(folio_file_mapping(folio)->host, folio) < 0)
if (nfs_wb_folio(folio->mapping->host, folio) < 0)
return false;
}
return nfs_fscache_release_folio(folio, gfp);
@ -502,7 +504,8 @@ static int nfs_launder_folio(struct folio *folio)
folio_wait_private_2(folio); /* [DEPRECATED] */
ret = nfs_wb_folio(inode, folio);
trace_nfs_launder_folio_done(inode, folio, ret);
trace_nfs_launder_folio_done(inode, folio_pos(folio),
folio_size(folio), ret);
return ret;
}
@ -604,7 +607,7 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
folio_lock(folio);
mapping = folio_file_mapping(folio);
mapping = folio->mapping;
if (mapping != inode->i_mapping)
goto out_unlock;
@ -730,7 +733,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
}
fl->c.flc_type = saved_type;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
if (nfs_have_read_or_write_delegation(inode))
goto out_noconflict;
if (is_local)
@ -813,7 +816,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
* This makes locking act as a cache coherency point.
*/
nfs_sync_mapping(filp->f_mapping);
if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
if (!nfs_have_read_or_write_delegation(inode)) {
nfs_zap_caches(inode);
if (mapping_mapped(filp->f_mapping))
nfs_revalidate_mapping(inode, filp->f_mapping);

View file

@ -1110,7 +1110,6 @@ static const struct pnfs_commit_ops filelayout_commit_ops = {
.clear_request_commit = pnfs_generic_clear_request_commit,
.scan_commit_lists = pnfs_generic_scan_commit_lists,
.recover_commit_reqs = pnfs_generic_recover_commit_reqs,
.search_commit_reqs = pnfs_generic_search_commit_reqs,
.commit_pagelist = filelayout_commit_pagelist,
};

View file

@ -2548,7 +2548,7 @@ ff_layout_set_layoutdriver(struct nfs_server *server,
const struct nfs_fh *dummy)
{
#if IS_ENABLED(CONFIG_NFS_V4_2)
server->caps |= NFS_CAP_LAYOUTSTATS;
server->caps |= NFS_CAP_LAYOUTSTATS | NFS_CAP_REBOOT_LAYOUTRETURN;
#endif
return 0;
}

View file

@ -341,7 +341,7 @@ void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr)
int nfs_netfs_folio_unlock(struct folio *folio)
{
struct inode *inode = folio_file_mapping(folio)->host;
struct inode *inode = folio->mapping->host;
/*
* If fscache is enabled, netfs will unlock pages.

View file

@ -190,9 +190,8 @@ static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
{
struct nfs_inode *nfsi = NFS_I(inode);
bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
if (have_delegation) {
if (nfs_have_delegated_attributes(inode)) {
if (!(flags & NFS_INO_REVAL_FORCED))
flags &= ~(NFS_INO_INVALID_MODE |
NFS_INO_INVALID_OTHER |
@ -276,6 +275,8 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache);
void nfs_invalidate_atime(struct inode *inode)
{
if (nfs_have_delegated_atime(inode))
return;
spin_lock(&inode->i_lock);
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
spin_unlock(&inode->i_lock);
@ -491,6 +492,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
inode->i_data.a_ops = &nfs_file_aops;
nfs_inode_init_regular(nfsi);
mapping_set_large_folios(inode->i_mapping);
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
inode->i_fop = &nfs_dir_operations;
@ -604,6 +606,55 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
}
EXPORT_SYMBOL_GPL(nfs_fhget);
static void
nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr)
{
unsigned long cache_validity = NFS_I(inode)->cache_validity;
if (nfs_have_delegated_mtime(inode)) {
if (!(cache_validity & NFS_INO_INVALID_CTIME))
fattr->valid &= ~(NFS_ATTR_FATTR_PRECTIME |
NFS_ATTR_FATTR_CTIME);
if (!(cache_validity & NFS_INO_INVALID_MTIME))
fattr->valid &= ~(NFS_ATTR_FATTR_PREMTIME |
NFS_ATTR_FATTR_MTIME);
if (!(cache_validity & NFS_INO_INVALID_ATIME))
fattr->valid &= ~NFS_ATTR_FATTR_ATIME;
} else if (nfs_have_delegated_atime(inode)) {
if (!(cache_validity & NFS_INO_INVALID_ATIME))
fattr->valid &= ~NFS_ATTR_FATTR_ATIME;
}
}
void nfs_update_delegated_atime(struct inode *inode)
{
spin_lock(&inode->i_lock);
if (nfs_have_delegated_atime(inode)) {
inode_update_timestamps(inode, S_ATIME);
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ATIME;
}
spin_unlock(&inode->i_lock);
}
void nfs_update_delegated_mtime_locked(struct inode *inode)
{
if (nfs_have_delegated_mtime(inode)) {
inode_update_timestamps(inode, S_CTIME | S_MTIME);
NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_CTIME |
NFS_INO_INVALID_MTIME);
}
}
void nfs_update_delegated_mtime(struct inode *inode)
{
spin_lock(&inode->i_lock);
nfs_update_delegated_mtime_locked(inode);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL_GPL(nfs_update_delegated_mtime);
#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN)
int
@ -631,6 +682,17 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
attr->ia_valid &= ~ATTR_SIZE;
}
if (nfs_have_delegated_mtime(inode)) {
if (attr->ia_valid & ATTR_MTIME) {
nfs_update_delegated_mtime(inode);
attr->ia_valid &= ~ATTR_MTIME;
}
if (attr->ia_valid & ATTR_ATIME) {
nfs_update_delegated_atime(inode);
attr->ia_valid &= ~ATTR_ATIME;
}
}
/* Optimization: if the end result is no change, don't RPC */
if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0)
return 0;
@ -686,6 +748,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
spin_unlock(&inode->i_lock);
truncate_pagecache(inode, offset);
nfs_update_delegated_mtime_locked(inode);
spin_lock(&inode->i_lock);
out:
return err;
@ -709,8 +772,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
spin_lock(&inode->i_lock);
NFS_I(inode)->attr_gencount = fattr->gencount;
if ((attr->ia_valid & ATTR_SIZE) != 0) {
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME |
NFS_INO_INVALID_BLOCKS);
if (!nfs_have_delegated_mtime(inode))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
nfs_vmtruncate(inode, attr->ia_size);
}
@ -856,8 +920,12 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path,
/* Flush out writes to the server in order to update c/mtime/version. */
if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) &&
S_ISREG(inode->i_mode))
filemap_write_and_wait(inode->i_mapping);
S_ISREG(inode->i_mode)) {
if (nfs_have_delegated_mtime(inode))
filemap_fdatawrite(inode->i_mapping);
else
filemap_write_and_wait(inode->i_mapping);
}
/*
* We may force a getattr if the user cares about atime.
@ -1012,7 +1080,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
if (!is_sync)
return;
inode = d_inode(ctx->dentry);
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
if (nfs_have_read_or_write_delegation(inode))
return;
nfsi = NFS_I(inode);
if (inode->i_mapping->nrpages == 0)
@ -1482,7 +1550,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
unsigned long invalid = 0;
struct timespec64 ts;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
if (nfs_have_delegated_attributes(inode))
return 0;
if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) {
@ -2118,6 +2186,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
nfsi->read_cache_jiffies = fattr->time_start;
/* Fix up any delegated attributes in the struct nfs_fattr */
nfs_fattr_fixup_delegated(inode, fattr);
save_cache_validity = nfsi->cache_validity;
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ATIME
@ -2538,6 +2609,7 @@ static void __exit exit_nfs_fs(void)
/* Not quite true; I just maintain it */
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
MODULE_DESCRIPTION("NFS client support");
MODULE_LICENSE("GPL");
module_param(enable_ino64, bool, 0644);

View file

@ -785,7 +785,7 @@ static inline void nfs_folio_mark_unstable(struct folio *folio,
struct nfs_commit_info *cinfo)
{
if (folio && !cinfo->dreq) {
struct inode *inode = folio_file_mapping(folio)->host;
struct inode *inode = folio->mapping->host;
long nr = folio_nr_pages(folio);
/* This page is really still in write-back - just that the
@ -797,34 +797,15 @@ static inline void nfs_folio_mark_unstable(struct folio *folio,
}
}
/*
* Determine the number of bytes of data the page contains
*/
static inline
unsigned int nfs_page_length(struct page *page)
{
loff_t i_size = i_size_read(page_file_mapping(page)->host);
if (i_size > 0) {
pgoff_t index = page_index(page);
pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT;
if (index < end_index)
return PAGE_SIZE;
if (index == end_index)
return ((i_size - 1) & ~PAGE_MASK) + 1;
}
return 0;
}
/*
* Determine the number of bytes of data the page contains
*/
static inline size_t nfs_folio_length(struct folio *folio)
{
loff_t i_size = i_size_read(folio_file_mapping(folio)->host);
loff_t i_size = i_size_read(folio->mapping->host);
if (i_size > 0) {
pgoff_t index = folio_index(folio) >> folio_order(folio);
pgoff_t index = folio->index >> folio_order(folio);
pgoff_t end_index = (i_size - 1) >> folio_shift(folio);
if (index < end_index)
return folio_size(folio);

View file

@ -128,11 +128,6 @@ struct mountres {
rpc_authflavor_t *auth_flavors;
};
struct mnt_fhstatus {
u32 status;
struct nfs_fh *fh;
};
/**
* nfs_mount - Obtain an NFS file handle for the given host and path
* @info: pointer to mount request arguments

View file

@ -26,6 +26,7 @@ static void __exit exit_nfs_v2(void)
unregister_nfs_version(&nfs_v2);
}
MODULE_DESCRIPTION("NFSv2 client support");
MODULE_LICENSE("GPL");
module_init(init_nfs_v2);

View file

@ -979,11 +979,18 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
return status;
}
static int nfs3_have_delegation(struct inode *inode, fmode_t flags)
static int nfs3_have_delegation(struct inode *inode, fmode_t type, int flags)
{
return 0;
}
static int nfs3_return_delegation(struct inode *inode)
{
if (S_ISREG(inode->i_mode))
nfs_wb_all(inode);
return 0;
}
static const struct inode_operations nfs3_dir_inode_operations = {
.create = nfs_create,
.atomic_open = nfs_atomic_open_v23,
@ -1062,6 +1069,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.clear_acl_cache = forget_all_cached_acls,
.close_context = nfs_close_context,
.have_delegation = nfs3_have_delegation,
.return_delegation = nfs3_return_delegation,
.alloc_client = nfs_alloc_client,
.init_client = nfs_init_client,
.free_client = nfs_free_client,

View file

@ -27,6 +27,7 @@ static void __exit exit_nfs_v3(void)
unregister_nfs_version(&nfs_v3);
}
MODULE_DESCRIPTION("NFSv3 client support");
MODULE_LICENSE("GPL");
module_init(init_nfs_v3);

View file

@ -67,7 +67,8 @@ struct nfs4_minor_version_ops {
void (*free_lock_state)(struct nfs_server *,
struct nfs4_lock_state *);
int (*test_and_free_expired)(struct nfs_server *,
nfs4_stateid *, const struct cred *);
const nfs4_stateid *,
const struct cred *);
struct nfs_seqid *
(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
void (*session_trunk)(struct rpc_clnt *clnt,

View file

@ -231,9 +231,8 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
__set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
if (test_bit(NFS_CS_DS, &cl_init->init_flags))
__set_bit(NFS_CS_DS, &clp->cl_flags);
if (test_bit(NFS_CS_PNFS, &cl_init->init_flags))
__set_bit(NFS_CS_PNFS, &clp->cl_flags);
/*
* Set up the connection to the server before we add add to the
* global list.
@ -1013,7 +1012,6 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
__set_bit(NFS_CS_DS, &cl_init.init_flags);
__set_bit(NFS_CS_PNFS, &cl_init.init_flags);
cl_init.max_connect = NFS_MAX_TRANSPORTS;
/*

View file

@ -103,10 +103,10 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
const struct cred *cred,
struct nfs4_slot *slot,
bool is_privileged);
static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
const struct cred *);
static int nfs41_test_stateid(struct nfs_server *, const nfs4_stateid *,
const struct cred *);
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
const struct cred *, bool);
const struct cred *, bool);
#endif
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
@ -293,7 +293,7 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
unsigned long cache_validity;
memcpy(dst, src, NFS4_BITMASK_SZ*sizeof(*dst));
if (!inode || !nfs4_have_delegation(inode, FMODE_READ))
if (!inode || !nfs_have_read_or_write_delegation(inode))
return;
cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags;
@ -310,6 +310,18 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
dst[1] &= ~FATTR4_WORD1_MODE;
if (!(cache_validity & NFS_INO_INVALID_OTHER))
dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP);
if (nfs_have_delegated_mtime(inode)) {
if (!(cache_validity & NFS_INO_INVALID_ATIME))
dst[1] &= ~FATTR4_WORD1_TIME_ACCESS;
if (!(cache_validity & NFS_INO_INVALID_MTIME))
dst[1] &= ~FATTR4_WORD1_TIME_MODIFY;
if (!(cache_validity & NFS_INO_INVALID_CTIME))
dst[1] &= ~FATTR4_WORD1_TIME_METADATA;
} else if (nfs_have_delegated_atime(inode)) {
if (!(cache_validity & NFS_INO_INVALID_ATIME))
dst[1] &= ~FATTR4_WORD1_TIME_ACCESS;
}
}
static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
@ -1245,7 +1257,8 @@ nfs4_update_changeattr_locked(struct inode *inode,
struct nfs_inode *nfsi = NFS_I(inode);
u64 change_attr = inode_peek_iversion_raw(inode);
cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
if (!nfs_have_delegated_mtime(inode))
cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
if (S_ISDIR(inode->i_mode))
cache_validity |= NFS_INO_INVALID_DATA;
@ -1264,7 +1277,7 @@ nfs4_update_changeattr_locked(struct inode *inode,
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
if (!nfs_have_delegated_attributes(inode))
cache_validity |=
NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER |
@ -1320,8 +1333,7 @@ static fmode_t _nfs4_ctx_to_openmode(const struct nfs_open_context *ctx)
}
static u32
nfs4_map_atomic_open_share(struct nfs_server *server,
fmode_t fmode, int openflags)
nfs4_fmode_to_share_access(fmode_t fmode)
{
u32 res = 0;
@ -1335,11 +1347,27 @@ nfs4_map_atomic_open_share(struct nfs_server *server,
case FMODE_READ|FMODE_WRITE:
res = NFS4_SHARE_ACCESS_BOTH;
}
return res;
}
static u32
nfs4_map_atomic_open_share(struct nfs_server *server,
fmode_t fmode, int openflags)
{
u32 res = nfs4_fmode_to_share_access(fmode);
if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1))
goto out;
/* Want no delegation if we're using O_DIRECT */
if (openflags & O_DIRECT)
if (openflags & O_DIRECT) {
res |= NFS4_SHARE_WANT_NO_DELEG;
goto out;
}
/* res |= NFS4_SHARE_WANT_NO_PREFERENCE; */
if (server->caps & NFS_CAP_DELEGTIME)
res |= NFS4_SHARE_WANT_DELEG_TIMESTAMPS;
if (server->caps & NFS_CAP_OPEN_XOR)
res |= NFS4_SHARE_WANT_OPEN_XOR_DELEGATION;
out:
return res;
}
@ -1954,44 +1982,41 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
}
static void
nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state)
nfs4_process_delegation(struct inode *inode, const struct cred *cred,
enum open_claim_type4 claim,
const struct nfs4_open_delegation *delegation)
{
struct nfs_client *clp = NFS_SERVER(state->inode)->nfs_client;
struct nfs_delegation *delegation;
int delegation_flags = 0;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(state->inode)->delegation);
if (delegation)
delegation_flags = delegation->flags;
rcu_read_unlock();
switch (data->o_arg.claim) {
default:
switch (delegation->open_delegation_type) {
case NFS4_OPEN_DELEGATE_READ:
case NFS4_OPEN_DELEGATE_WRITE:
case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
break;
default:
return;
}
switch (claim) {
case NFS4_OPEN_CLAIM_DELEGATE_CUR:
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
"returning a delegation for "
"OPEN(CLAIM_DELEGATE_CUR)\n",
clp->cl_hostname);
return;
NFS_SERVER(inode)->nfs_client->cl_hostname);
break;
case NFS4_OPEN_CLAIM_PREVIOUS:
nfs_inode_reclaim_delegation(inode, cred, delegation->type,
&delegation->stateid,
delegation->pagemod_limit,
delegation->open_delegation_type);
break;
default:
nfs_inode_set_delegation(inode, cred, delegation->type,
&delegation->stateid,
delegation->pagemod_limit,
delegation->open_delegation_type);
}
if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
nfs_inode_set_delegation(state->inode,
data->owner->so_cred,
data->o_res.delegation_type,
&data->o_res.delegation,
data->o_res.pagemod_limit);
else
nfs_inode_reclaim_delegation(state->inode,
data->owner->so_cred,
data->o_res.delegation_type,
&data->o_res.delegation,
data->o_res.pagemod_limit);
if (data->o_res.do_recall)
nfs_async_inode_return_delegation(state->inode,
&data->o_res.delegation);
if (delegation->do_recall)
nfs_async_inode_return_delegation(inode, &delegation->stateid);
}
/*
@ -2015,11 +2040,16 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
if (ret)
return ERR_PTR(ret);
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
nfs4_process_delegation(state->inode,
data->owner->so_cred,
data->o_arg.claim,
&data->o_res.delegation);
if (!update_open_stateid(state, &data->o_res.stateid,
NULL, data->o_arg.fmode))
if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) {
if (!update_open_stateid(state, &data->o_res.stateid,
NULL, data->o_arg.fmode))
return ERR_PTR(-EAGAIN);
} else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode))
return ERR_PTR(-EAGAIN);
refcount_inc(&state->count);
@ -2083,10 +2113,18 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
if (IS_ERR(state))
goto out;
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
if (!update_open_stateid(state, &data->o_res.stateid,
NULL, data->o_arg.fmode)) {
nfs4_process_delegation(state->inode,
data->owner->so_cred,
data->o_arg.claim,
&data->o_res.delegation);
if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) {
if (!update_open_stateid(state, &data->o_res.stateid,
NULL, data->o_arg.fmode)) {
nfs4_put_open_state(state);
state = ERR_PTR(-EAGAIN);
}
} else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode)) {
nfs4_put_open_state(state);
state = ERR_PTR(-EAGAIN);
}
@ -2222,7 +2260,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
{
struct nfs_delegation *delegation;
struct nfs4_opendata *opendata;
fmode_t delegation_type = 0;
u32 delegation_type = NFS4_OPEN_DELEGATE_NONE;
int status;
opendata = nfs4_open_recoverdata_alloc(ctx, state,
@ -2231,8 +2269,20 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
return PTR_ERR(opendata);
rcu_read_lock();
delegation = rcu_dereference(NFS_I(state->inode)->delegation);
if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0)
delegation_type = delegation->type;
if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) {
switch(delegation->type) {
case FMODE_READ:
delegation_type = NFS4_OPEN_DELEGATE_READ;
if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags))
delegation_type = NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG;
break;
case FMODE_WRITE:
case FMODE_READ|FMODE_WRITE:
delegation_type = NFS4_OPEN_DELEGATE_WRITE;
if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags))
delegation_type = NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG;
}
}
rcu_read_unlock();
opendata->o_arg.u.delegation_type = delegation_type;
status = nfs4_open_recover(opendata, state);
@ -2825,16 +2875,16 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
}
static int nfs40_test_and_free_expired_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
const struct cred *cred)
const nfs4_stateid *stateid,
const struct cred *cred)
{
return -NFS4ERR_BAD_STATEID;
}
#if defined(CONFIG_NFS_V4_1)
static int nfs41_test_and_free_expired_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
const struct cred *cred)
const nfs4_stateid *stateid,
const struct cred *cred)
{
int status;
@ -3111,7 +3161,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
case NFS4_OPEN_CLAIM_DELEGATE_PREV:
if (!opendata->rpc_done)
break;
if (opendata->o_res.delegation_type != 0)
if (opendata->o_res.delegation.type != 0)
dir_verifier = nfs_save_change_attribute(dir);
nfs_set_verifier(dentry, dir_verifier);
}
@ -3394,7 +3444,8 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
.inode = inode,
.stateid = &arg.stateid,
};
unsigned long adjust_flags = NFS_INO_INVALID_CHANGE;
unsigned long adjust_flags = NFS_INO_INVALID_CHANGE |
NFS_INO_INVALID_CTIME;
int err;
if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID))
@ -3700,7 +3751,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) {
/* Close-to-open cache consistency revalidation */
if (!nfs4_have_delegation(inode, FMODE_READ)) {
if (!nfs4_have_delegation(inode, FMODE_READ, 0)) {
nfs4_bitmask_set(calldata->arg.bitmask_store,
server->cache_consistency_bitmask,
inode, 0);
@ -3710,8 +3761,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
}
calldata->arg.share_access =
nfs4_map_atomic_open_share(NFS_SERVER(inode),
calldata->arg.fmode, 0);
nfs4_fmode_to_share_access(calldata->arg.fmode);
if (calldata->res.fattr == NULL)
calldata->arg.bitmask = NULL;
@ -3852,11 +3902,14 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_XATTR_SUPPORT - 1UL)
#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_OPEN_ARGUMENTS - 1UL)
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
u32 bitmask[3] = {}, minorversion = server->nfs_client->cl_minorversion;
u32 minorversion = server->nfs_client->cl_minorversion;
u32 bitmask[3] = {
[0] = FATTR4_WORD0_SUPPORTED_ATTRS,
};
struct nfs4_server_caps_arg args = {
.fhandle = fhandle,
.bitmask = bitmask,
@ -3882,6 +3935,14 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
if (status == 0) {
bitmask[0] = (FATTR4_WORD0_SUPPORTED_ATTRS |
FATTR4_WORD0_FH_EXPIRE_TYPE |
FATTR4_WORD0_LINK_SUPPORT |
FATTR4_WORD0_SYMLINK_SUPPORT |
FATTR4_WORD0_ACLSUPPORT |
FATTR4_WORD0_CASE_INSENSITIVE |
FATTR4_WORD0_CASE_PRESERVING) &
res.attr_bitmask[0];
/* Sanity check the server answers */
switch (minorversion) {
case 0:
@ -3890,9 +3951,14 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
break;
case 1:
res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK;
bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT &
res.attr_bitmask[2];
break;
case 2:
res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
bitmask[2] = (FATTR4_WORD2_SUPPATTR_EXCLCREAT |
FATTR4_WORD2_OPEN_ARGUMENTS) &
res.attr_bitmask[2];
}
memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS |
@ -3915,6 +3981,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
#endif
if (res.attr_bitmask[0] & FATTR4_WORD0_FS_LOCATIONS)
server->caps |= NFS_CAP_FS_LOCATIONS;
if (res.attr_bitmask[2] & FATTR4_WORD2_TIME_DELEG_MODIFY)
server->caps |= NFS_CAP_DELEGTIME;
if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID))
server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID;
if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE))
@ -3939,6 +4007,10 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
sizeof(server->attr_bitmask));
server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
if (res.open_caps.oa_share_access_want[0] &
NFS4_SHARE_WANT_OPEN_XOR_DELEGATION)
server->caps |= NFS_CAP_OPEN_XOR;
memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
@ -4638,7 +4710,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
};
int status = 0;
if (!nfs4_have_delegation(inode, FMODE_READ)) {
if (!nfs4_have_delegation(inode, FMODE_READ, 0)) {
res.fattr = nfs_alloc_fattr();
if (res.fattr == NULL)
return -ENOMEM;
@ -4956,8 +5028,9 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
goto out;
nfs4_inode_make_writeable(inode);
nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), inode,
NFS_INO_INVALID_CHANGE);
nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label),
inode,
NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME);
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start,
@ -5607,7 +5680,7 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
/* Otherwise, request attributes if and only if we don't hold
* a delegation
*/
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
return nfs4_have_delegation(hdr->inode, FMODE_READ, 0) == 0;
}
void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[],
@ -6575,6 +6648,7 @@ struct nfs4_delegreturndata {
u32 roc_barrier;
bool roc;
} lr;
struct nfs4_delegattr sattr;
struct nfs_fattr fattr;
int rpc_status;
struct inode *inode;
@ -6599,6 +6673,30 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
&data->res.lr_ret) == -EAGAIN)
goto out_restart;
if (data->args.sattr_args && task->tk_status != 0) {
switch(data->res.sattr_ret) {
case 0:
data->args.sattr_args = NULL;
data->res.sattr_res = false;
break;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
/* Let the main handler below do stateid recovery */
break;
case -NFS4ERR_OLD_STATEID:
if (nfs4_refresh_delegation_stateid(&data->stateid,
data->inode))
goto out_restart;
fallthrough;
default:
data->args.sattr_args = NULL;
data->res.sattr_res = false;
goto out_restart;
}
}
switch (task->tk_status) {
case 0:
renew_lease(data->res.server, data->timestamp);
@ -6692,7 +6790,10 @@ static const struct rpc_call_ops nfs4_delegreturn_ops = {
.rpc_release = nfs4_delegreturn_release,
};
static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync)
static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
const nfs4_stateid *stateid,
struct nfs_delegation *delegation,
int issync)
{
struct nfs4_delegreturndata *data;
struct nfs_server *server = NFS_SERVER(inode);
@ -6744,12 +6845,27 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
}
}
if (delegation &&
test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) {
if (delegation->type & FMODE_READ) {
data->sattr.atime = inode_get_atime(inode);
data->sattr.atime_set = true;
}
if (delegation->type & FMODE_WRITE) {
data->sattr.mtime = inode_get_mtime(inode);
data->sattr.mtime_set = true;
}
data->args.sattr_args = &data->sattr;
data->res.sattr_res = true;
}
if (!data->inode)
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1,
1);
else
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1,
0);
task_setup_data.callback_data = data;
msg.rpc_argp = &data->args;
msg.rpc_resp = &data->res;
@ -6767,13 +6883,16 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
return status;
}
int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync)
int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
const nfs4_stateid *stateid,
struct nfs_delegation *delegation, int issync)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
int err;
do {
err = _nfs4_proc_delegreturn(inode, cred, stateid, issync);
err = _nfs4_proc_delegreturn(inode, cred, stateid,
delegation, issync);
trace_nfs4_delegreturn(inode, stateid, err);
switch (err) {
case -NFS4ERR_STALE_STATEID:
@ -7629,10 +7748,10 @@ static int nfs4_add_lease(struct file *file, int arg, struct file_lease **lease,
int ret;
/* No delegation, no lease */
if (!nfs4_have_delegation(inode, type))
if (!nfs4_have_delegation(inode, type, 0))
return -EAGAIN;
ret = generic_setlease(file, arg, lease, priv);
if (ret || nfs4_have_delegation(inode, type))
if (ret || nfs4_have_delegation(inode, type, 0))
return ret;
/* We raced with a delegation return */
nfs4_delete_lease(file, priv);
@ -8840,7 +8959,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred,
#ifdef CONFIG_NFS_V4_1_MIGRATION
calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR;
#endif
if (test_bit(NFS_CS_DS, &clp->cl_flags))
if (test_bit(NFS_CS_PNFS, &clp->cl_flags))
calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS;
msg.rpc_argp = &calldata->args;
msg.rpc_resp = &calldata->res;
@ -9854,6 +9973,11 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
if (!nfs41_sequence_process(task, &lrp->res.seq_res))
return;
if (task->tk_rpc_status == -ETIMEDOUT) {
lrp->rpc_status = -EAGAIN;
lrp->res.lrs_present = 0;
return;
}
/*
* Was there an RPC level error? Assume the call succeeded,
* and that we need to release the layout
@ -9876,6 +10000,15 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
fallthrough;
case 0:
break;
case -NFS4ERR_BADSESSION:
case -NFS4ERR_DEADSESSION:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
nfs4_schedule_session_recovery(server->nfs_client->cl_session,
task->tk_status);
lrp->res.lrs_present = 0;
lrp->rpc_status = -EAGAIN;
task->tk_status = 0;
break;
case -NFS4ERR_DELAY:
if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
break;
@ -9893,8 +10026,13 @@ static void nfs4_layoutreturn_release(void *calldata)
struct nfs4_layoutreturn *lrp = calldata;
struct pnfs_layout_hdr *lo = lrp->args.layout;
pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range,
if (lrp->rpc_status == 0 || !lrp->inode)
pnfs_layoutreturn_free_lsegs(
lo, &lrp->args.stateid, &lrp->args.range,
lrp->res.lrs_present ? &lrp->res.stateid : NULL);
else
pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid,
&lrp->args.range);
nfs4_sequence_free_slot(&lrp->res.seq_res);
if (lrp->ld_private.ops && lrp->ld_private.ops->free)
lrp->ld_private.ops->free(&lrp->ld_private);
@ -9910,7 +10048,7 @@ static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
.rpc_release = nfs4_layoutreturn_release,
};
int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, unsigned int flags)
{
struct rpc_task *task;
struct rpc_message msg = {
@ -9933,7 +10071,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
&task_setup_data.rpc_client, &msg);
lrp->inode = nfs_igrab_and_active(lrp->args.inode);
if (!sync) {
if (flags & PNFS_FL_LAYOUTRETURN_ASYNC) {
if (!lrp->inode) {
nfs4_layoutreturn_release(lrp);
return -EAGAIN;
@ -9941,6 +10079,8 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
task_setup_data.flags |= RPC_TASK_ASYNC;
}
if (!lrp->inode)
flags |= PNFS_FL_LAYOUTRETURN_PRIVILEGED;
if (flags & PNFS_FL_LAYOUTRETURN_PRIVILEGED)
nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1,
1);
else
@ -9949,7 +10089,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
if (sync)
if (!(flags & PNFS_FL_LAYOUTRETURN_ASYNC))
status = task->tk_status;
trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status);
dprintk("<-- %s status=%d\n", __func__, status);
@ -10267,12 +10407,12 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
}
static int _nfs41_test_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
const struct cred *cred)
const nfs4_stateid *stateid,
const struct cred *cred)
{
int status;
struct nfs41_test_stateid_args args = {
.stateid = stateid,
.stateid = *stateid,
};
struct nfs41_test_stateid_res res;
struct rpc_message msg = {
@ -10328,8 +10468,8 @@ static void nfs4_handle_delay_or_session_error(struct nfs_server *server,
* failed or the state ID is not currently valid.
*/
static int nfs41_test_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
const struct cred *cred)
const nfs4_stateid *stateid,
const struct cred *cred)
{
struct nfs4_exception exception = {
.interruptible = true,
@ -10759,6 +10899,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.close_context = nfs4_close_context,
.open_context = nfs4_atomic_open,
.have_delegation = nfs4_have_delegation,
.return_delegation = nfs4_inode_return_delegation,
.alloc_client = nfs4_alloc_client,
.init_client = nfs4_init_client,
.free_client = nfs4_free_client,

View file

@ -1863,6 +1863,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
if (!nfs4_state_clear_reclaim_reboot(clp))
return;
pnfs_destroy_all_layouts(clp);
ops = clp->cl_mvops->reboot_recovery_ops;
cred = nfs4_get_clid_cred(clp);
err = nfs4_reclaim_complete(clp, ops, cred);
@ -2068,7 +2069,6 @@ static int nfs4_establish_lease(struct nfs_client *clp)
put_cred(cred);
if (status != 0)
return status;
pnfs_destroy_all_layouts(clp);
return 0;
}
@ -2680,6 +2680,8 @@ static void nfs4_state_manager(struct nfs_client *clp)
section = "reclaim reboot";
status = nfs4_do_reclaim(clp,
clp->cl_mvops->reboot_recovery_ops);
if (status == 0)
status = pnfs_layout_handle_reboot(clp);
if (status == -EAGAIN)
continue;
if (status < 0)

View file

@ -332,6 +332,7 @@ static void __exit exit_nfs_v4(void)
nfs_dns_resolver_destroy();
}
MODULE_DESCRIPTION("NFSv4 client support");
MODULE_LICENSE("GPL");
module_init(init_nfs_v4);

View file

@ -2,6 +2,8 @@
/*
* Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
*/
#include <uapi/linux/pr.h>
#include <linux/blkdev.h>
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "internal.h"
@ -29,5 +31,10 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error);
EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error);
EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error);
EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg);
EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg_err);
EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg);
EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg_err);
EXPORT_TRACEPOINT_SYMBOL_GPL(fl_getdevinfo);
#endif

View file

@ -2153,6 +2153,94 @@ TRACE_EVENT(ff_layout_commit_error,
)
);
DECLARE_EVENT_CLASS(pnfs_bl_pr_key_class,
TP_PROTO(
const struct block_device *bdev,
u64 key
),
TP_ARGS(bdev, key),
TP_STRUCT__entry(
__field(u64, key)
__field(dev_t, dev)
__string(device, bdev->bd_disk->disk_name)
),
TP_fast_assign(
__entry->key = key;
__entry->dev = bdev->bd_dev;
__assign_str(device);
),
TP_printk("dev=%d,%d (%s) key=0x%016llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__get_str(device), __entry->key
)
);
#define DEFINE_NFS4_BLOCK_PRKEY_EVENT(name) \
DEFINE_EVENT(pnfs_bl_pr_key_class, name, \
TP_PROTO( \
const struct block_device *bdev, \
u64 key \
), \
TP_ARGS(bdev, key))
DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_reg);
DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_unreg);
/*
* From uapi/linux/pr.h
*/
TRACE_DEFINE_ENUM(PR_STS_SUCCESS);
TRACE_DEFINE_ENUM(PR_STS_IOERR);
TRACE_DEFINE_ENUM(PR_STS_RESERVATION_CONFLICT);
TRACE_DEFINE_ENUM(PR_STS_RETRY_PATH_FAILURE);
TRACE_DEFINE_ENUM(PR_STS_PATH_FAST_FAILED);
TRACE_DEFINE_ENUM(PR_STS_PATH_FAILED);
#define show_pr_status(x) \
__print_symbolic(x, \
{ PR_STS_SUCCESS, "SUCCESS" }, \
{ PR_STS_IOERR, "IOERR" }, \
{ PR_STS_RESERVATION_CONFLICT, "RESERVATION_CONFLICT" }, \
{ PR_STS_RETRY_PATH_FAILURE, "RETRY_PATH_FAILURE" }, \
{ PR_STS_PATH_FAST_FAILED, "PATH_FAST_FAILED" }, \
{ PR_STS_PATH_FAILED, "PATH_FAILED" })
DECLARE_EVENT_CLASS(pnfs_bl_pr_key_err_class,
TP_PROTO(
const struct block_device *bdev,
u64 key,
int status
),
TP_ARGS(bdev, key, status),
TP_STRUCT__entry(
__field(u64, key)
__field(dev_t, dev)
__field(unsigned long, status)
__string(device, bdev->bd_disk->disk_name)
),
TP_fast_assign(
__entry->key = key;
__entry->dev = bdev->bd_dev;
__entry->status = status;
__assign_str(device);
),
TP_printk("dev=%d,%d (%s) key=0x%016llx status=%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__get_str(device), __entry->key,
show_pr_status(__entry->status)
)
);
#define DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(name) \
DEFINE_EVENT(pnfs_bl_pr_key_err_class, name, \
TP_PROTO( \
const struct block_device *bdev, \
u64 key, \
int status \
), \
TP_ARGS(bdev, key, status))
DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_reg_err);
DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_unreg_err);
#ifdef CONFIG_NFS_V4_2
TRACE_DEFINE_ENUM(NFS4_CONTENT_DATA);
TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE);

View file

@ -224,6 +224,11 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
encode_attrs_maxsz)
#define decode_setattr_maxsz (op_decode_hdr_maxsz + \
nfs4_fattr_bitmap_maxsz)
#define encode_delegattr_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + \
nfs4_fattr_bitmap_maxsz + \
2*nfstime4_maxsz)
#define decode_delegattr_maxsz (decode_setattr_maxsz)
#define encode_read_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 3)
#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + pagepad_maxsz)
@ -758,12 +763,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_layoutreturn_maxsz + \
encode_delegattr_maxsz + \
encode_delegreturn_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_layoutreturn_maxsz + \
decode_delegattr_maxsz + \
decode_delegreturn_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \
@ -1060,9 +1067,10 @@ static void encode_nops(struct compound_hdr *hdr)
*hdr->nops_p = htonl(hdr->nops);
}
static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid)
static void encode_nfs4_stateid(struct xdr_stream *xdr,
const nfs4_stateid *stateid)
{
encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
}
static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
@ -1468,20 +1476,18 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
}
}
static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type)
static inline void encode_delegation_type(struct xdr_stream *xdr, u32 delegation_type)
{
__be32 *p;
p = reserve_space(xdr, 4);
switch (delegation_type) {
case 0:
*p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE);
break;
case FMODE_READ:
*p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ);
break;
case FMODE_WRITE|FMODE_READ:
*p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE);
case NFS4_OPEN_DELEGATE_NONE:
case NFS4_OPEN_DELEGATE_READ:
case NFS4_OPEN_DELEGATE_WRITE:
case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
*p = cpu_to_be32(delegation_type);
break;
default:
BUG();
@ -1497,7 +1503,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
encode_string(xdr, name->len, name->name);
}
static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
static inline void encode_claim_previous(struct xdr_stream *xdr, u32 type)
{
__be32 *p;
@ -1735,6 +1741,33 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
server->attr_bitmask);
}
static void encode_delegattr(struct xdr_stream *xdr,
const nfs4_stateid *stateid,
const struct nfs4_delegattr *attr,
struct compound_hdr *hdr)
{
uint32_t bitmap[3] = { 0 };
uint32_t len = 0;
__be32 *p;
encode_op_hdr(xdr, OP_SETATTR, encode_delegattr_maxsz, hdr);
encode_nfs4_stateid(xdr, stateid);
if (attr->atime_set) {
bitmap[2] |= FATTR4_WORD2_TIME_DELEG_ACCESS;
len += (nfstime4_maxsz << 2);
}
if (attr->mtime_set) {
bitmap[2] |= FATTR4_WORD2_TIME_DELEG_MODIFY;
len += (nfstime4_maxsz << 2);
}
xdr_encode_bitmap4(xdr, bitmap, ARRAY_SIZE(bitmap));
xdr_stream_encode_opaque_inline(xdr, (void **)&p, len);
if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS)
p = xdr_encode_nfstime4(p, &attr->atime);
if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY)
p = xdr_encode_nfstime4(p, &attr->mtime);
}
static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr)
{
__be32 *p;
@ -2105,7 +2138,7 @@ static void encode_test_stateid(struct xdr_stream *xdr,
{
encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr);
encode_uint32(xdr, 1);
encode_nfs4_stateid(xdr, args->stateid);
encode_nfs4_stateid(xdr, &args->stateid);
}
static void encode_free_stateid(struct xdr_stream *xdr,
@ -2812,6 +2845,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
encode_putfh(xdr, args->fhandle, &hdr);
if (args->lr_args)
encode_layoutreturn(xdr, args->lr_args, &hdr);
if (args->sattr_args)
encode_delegattr(xdr, args->stateid, args->sattr_args, &hdr);
if (args->bitmask)
encode_getfattr(xdr, args->bitmask, &hdr);
encode_delegreturn(xdr, args->stateid, &hdr);
@ -4303,6 +4338,28 @@ static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap,
return 0;
}
static int decode_attr_open_arguments(struct xdr_stream *xdr, uint32_t *bitmap,
struct nfs4_open_caps *res)
{
memset(res, 0, sizeof(*res));
if (unlikely(bitmap[2] & (FATTR4_WORD2_OPEN_ARGUMENTS - 1U)))
return -EIO;
if (likely(bitmap[2] & FATTR4_WORD2_OPEN_ARGUMENTS)) {
if (decode_bitmap4(xdr, res->oa_share_access, ARRAY_SIZE(res->oa_share_access)) < 0)
return -EIO;
if (decode_bitmap4(xdr, res->oa_share_deny, ARRAY_SIZE(res->oa_share_deny)) < 0)
return -EIO;
if (decode_bitmap4(xdr, res->oa_share_access_want, ARRAY_SIZE(res->oa_share_access_want)) < 0)
return -EIO;
if (decode_bitmap4(xdr, res->oa_open_claim, ARRAY_SIZE(res->oa_open_claim)) < 0)
return -EIO;
if (decode_bitmap4(xdr, res->oa_createmode, ARRAY_SIZE(res->oa_createmode)) < 0)
return -EIO;
bitmap[2] &= ~FATTR4_WORD2_OPEN_ARGUMENTS;
}
return 0;
}
static int verify_attr_len(struct xdr_stream *xdr, unsigned int savep, uint32_t attrlen)
{
unsigned int attrwords = XDR_QUADLEN(attrlen);
@ -4477,6 +4534,8 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
if ((status = decode_attr_exclcreat_supported(xdr, bitmap,
res->exclcreat_bitmask)) != 0)
goto xdr_error;
if ((status = decode_attr_open_arguments(xdr, bitmap, &res->open_caps)) != 0)
goto xdr_error;
status = verify_attr_len(xdr, savep, attrlen);
xdr_error:
dprintk("%s: xdr returned %d!\n", __func__, -status);
@ -5148,13 +5207,12 @@ static int decode_space_limit(struct xdr_stream *xdr,
}
static int decode_rw_delegation(struct xdr_stream *xdr,
uint32_t delegation_type,
struct nfs_openres *res)
struct nfs4_open_delegation *res)
{
__be32 *p;
int status;
status = decode_delegation_stateid(xdr, &res->delegation);
status = decode_delegation_stateid(xdr, &res->stateid);
if (unlikely(status))
return status;
p = xdr_inline_decode(xdr, 4);
@ -5162,52 +5220,57 @@ static int decode_rw_delegation(struct xdr_stream *xdr,
return -EIO;
res->do_recall = be32_to_cpup(p);
switch (delegation_type) {
switch (res->open_delegation_type) {
case NFS4_OPEN_DELEGATE_READ:
res->delegation_type = FMODE_READ;
case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
res->type = FMODE_READ;
break;
case NFS4_OPEN_DELEGATE_WRITE:
res->delegation_type = FMODE_WRITE|FMODE_READ;
case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
res->type = FMODE_WRITE|FMODE_READ;
if (decode_space_limit(xdr, &res->pagemod_limit) < 0)
return -EIO;
}
return decode_ace(xdr, NULL);
}
static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
static int decode_no_delegation(struct xdr_stream *xdr,
struct nfs4_open_delegation *res)
{
__be32 *p;
uint32_t why_no_delegation;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
why_no_delegation = be32_to_cpup(p);
switch (why_no_delegation) {
res->why_no_delegation = be32_to_cpup(p);
switch (res->why_no_delegation) {
case WND4_CONTENTION:
case WND4_RESOURCE:
xdr_inline_decode(xdr, 4);
/* Ignore for now */
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
res->will_notify = be32_to_cpup(p);
}
return 0;
}
static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
static int decode_delegation(struct xdr_stream *xdr,
struct nfs4_open_delegation *res)
{
__be32 *p;
uint32_t delegation_type;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
delegation_type = be32_to_cpup(p);
res->delegation_type = 0;
switch (delegation_type) {
res->open_delegation_type = be32_to_cpup(p);
switch (res->open_delegation_type) {
case NFS4_OPEN_DELEGATE_NONE:
return 0;
case NFS4_OPEN_DELEGATE_READ:
case NFS4_OPEN_DELEGATE_WRITE:
return decode_rw_delegation(xdr, delegation_type, res);
case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
return decode_rw_delegation(xdr, res);
case NFS4_OPEN_DELEGATE_NONE_EXT:
return decode_no_delegation(xdr, res);
}
@ -5248,7 +5311,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
for (; i < NFS4_BITMAP_SIZE; i++)
res->attrset[i] = 0;
return decode_delegation(xdr, res);
return decode_delegation(xdr, &res->delegation);
xdr_error:
dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen);
return -EIO;
@ -5480,6 +5543,11 @@ static int decode_setattr(struct xdr_stream *xdr)
return -EIO;
}
static int decode_delegattr(struct xdr_stream *xdr)
{
return decode_setattr(xdr);
}
static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res)
{
__be32 *p;
@ -7052,6 +7120,12 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
if (status)
goto out;
}
if (res->sattr_res) {
status = decode_delegattr(xdr);
res->sattr_ret = status;
if (status)
goto out;
}
if (res->fattr) {
status = decode_getfattr(xdr, res->fattr, res->server);
if (status != 0)

View file

@ -939,10 +939,11 @@ TRACE_EVENT(nfs_sillyrename_unlink,
DECLARE_EVENT_CLASS(nfs_folio_event,
TP_PROTO(
const struct inode *inode,
struct folio *folio
loff_t offset,
size_t count
),
TP_ARGS(inode, folio),
TP_ARGS(inode, offset, count),
TP_STRUCT__entry(
__field(dev_t, dev)
@ -950,7 +951,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
__field(u64, fileid)
__field(u64, version)
__field(loff_t, offset)
__field(u32, count)
__field(size_t, count)
),
TP_fast_assign(
@ -960,13 +961,13 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
__entry->offset = folio_file_pos(folio);
__entry->count = nfs_folio_length(folio);
__entry->offset = offset,
__entry->count = count;
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
"offset=%lld count=%u",
"offset=%lld count=%zu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->version,
@ -978,18 +979,20 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
DEFINE_EVENT(nfs_folio_event, name, \
TP_PROTO( \
const struct inode *inode, \
struct folio *folio \
loff_t offset, \
size_t count \
), \
TP_ARGS(inode, folio))
TP_ARGS(inode, offset, count))
DECLARE_EVENT_CLASS(nfs_folio_event_done,
TP_PROTO(
const struct inode *inode,
struct folio *folio,
loff_t offset,
size_t count,
int ret
),
TP_ARGS(inode, folio, ret),
TP_ARGS(inode, offset, count, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
@ -998,7 +1001,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done,
__field(u64, fileid)
__field(u64, version)
__field(loff_t, offset)
__field(u32, count)
__field(size_t, count)
),
TP_fast_assign(
@ -1008,14 +1011,14 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
__entry->offset = folio_file_pos(folio);
__entry->count = nfs_folio_length(folio);
__entry->offset = offset,
__entry->count = count,
__entry->ret = ret;
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
"offset=%lld count=%u ret=%d",
"offset=%lld count=%zu ret=%d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->version,
@ -1027,10 +1030,11 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done,
DEFINE_EVENT(nfs_folio_event_done, name, \
TP_PROTO( \
const struct inode *inode, \
struct folio *folio, \
loff_t offset, \
size_t count, \
int ret \
), \
TP_ARGS(inode, folio, ret))
TP_ARGS(inode, offset, count, ret))
DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage);
DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done);

View file

@ -187,102 +187,6 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx)
}
EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait);
/*
* nfs_page_lock_head_request - page lock the head of the page group
* @req: any member of the page group
*/
struct nfs_page *
nfs_page_group_lock_head(struct nfs_page *req)
{
struct nfs_page *head = req->wb_head;
while (!nfs_lock_request(head)) {
int ret = nfs_wait_on_request(head);
if (ret < 0)
return ERR_PTR(ret);
}
if (head != req)
kref_get(&head->wb_kref);
return head;
}
/*
* nfs_unroll_locks - unlock all newly locked reqs and wait on @req
* @head: head request of page group, must be holding head lock
* @req: request that couldn't lock and needs to wait on the req bit lock
*
* This is a helper function for nfs_lock_and_join_requests
* returns 0 on success, < 0 on error.
*/
static void
nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req)
{
struct nfs_page *tmp;
/* relinquish all the locks successfully grabbed this run */
for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
if (!kref_read(&tmp->wb_kref))
continue;
nfs_unlock_and_release_request(tmp);
}
}
/*
* nfs_page_group_lock_subreq - try to lock a subrequest
* @head: head request of page group
* @subreq: request to lock
*
* This is a helper function for nfs_lock_and_join_requests which
* must be called with the head request and page group both locked.
* On error, it returns with the page group unlocked.
*/
static int
nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq)
{
int ret;
if (!kref_get_unless_zero(&subreq->wb_kref))
return 0;
while (!nfs_lock_request(subreq)) {
nfs_page_group_unlock(head);
ret = nfs_wait_on_request(subreq);
if (!ret)
ret = nfs_page_group_lock(head);
if (ret < 0) {
nfs_unroll_locks(head, subreq);
nfs_release_request(subreq);
return ret;
}
}
return 0;
}
/*
* nfs_page_group_lock_subrequests - try to lock the subrequests
* @head: head request of page group
*
* This is a helper function for nfs_lock_and_join_requests which
* must be called with the head request locked.
*/
int nfs_page_group_lock_subrequests(struct nfs_page *head)
{
struct nfs_page *subreq;
int ret;
ret = nfs_page_group_lock(head);
if (ret < 0)
return ret;
/* lock each request in the page group */
for (subreq = head->wb_this_page; subreq != head;
subreq = subreq->wb_this_page) {
ret = nfs_page_group_lock_subreq(head, subreq);
if (ret < 0)
return ret;
}
nfs_page_group_unlock(head);
return 0;
}
/*
* nfs_page_set_headlock - set the request PG_HEADLOCK
* @req: request that is to be locked
@ -569,7 +473,7 @@ struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx,
if (IS_ERR(l_ctx))
return ERR_CAST(l_ctx);
ret = nfs_page_create(l_ctx, offset, folio_index(folio), offset, count);
ret = nfs_page_create(l_ctx, offset, folio->index, offset, count);
if (!IS_ERR(ret)) {
nfs_page_assign_folio(ret, folio);
nfs_page_group_init(ret, NULL);
@ -694,25 +598,6 @@ void nfs_release_request(struct nfs_page *req)
}
EXPORT_SYMBOL_GPL(nfs_release_request);
/**
* nfs_wait_on_request - Wait for a request to complete.
* @req: request to wait upon.
*
* Interruptible by fatal signals only.
* The user is responsible for holding a count on the request.
*/
int
nfs_wait_on_request(struct nfs_page *req)
{
if (!test_bit(PG_BUSY, &req->wb_flags))
return 0;
set_bit(PG_CONTENDED2, &req->wb_flags);
smp_mb__after_atomic();
return wait_on_bit_io(&req->wb_flags, PG_BUSY,
TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL_GPL(nfs_wait_on_request);
/*
* nfs_generic_pg_test - determine if requests can be coalesced
* @desc: pointer to descriptor

View file

@ -61,6 +61,7 @@ static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
u32 seq);
static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list);
static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo);
/* Return the registered pnfs layout driver module matching given id */
static struct pnfs_layoutdriver_type *
@ -476,6 +477,18 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
return !list_empty(&lo->plh_segs);
}
static int pnfs_mark_layout_stateid_return(struct pnfs_layout_hdr *lo,
struct list_head *lseg_list,
enum pnfs_iomode iomode, u32 seq)
{
struct pnfs_layout_range range = {
.iomode = iomode,
.length = NFS4_MAX_UINT64,
};
return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, seq);
}
static int
pnfs_iomode_to_fail_bit(u32 iomode)
{
@ -846,8 +859,6 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
break;
inode = pnfs_grab_inode_layout_hdr(lo);
if (inode != NULL) {
if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags))
list_del_rcu(&lo->plh_layouts);
if (pnfs_layout_add_bulk_destroy_list(inode,
layout_list))
continue;
@ -868,7 +879,7 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
static int
pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
bool is_bulk_recall)
enum pnfs_layout_destroy_mode mode)
{
struct pnfs_layout_hdr *lo;
struct inode *inode;
@ -886,8 +897,11 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
spin_lock(&inode->i_lock);
list_del_init(&lo->plh_bulk_destroy);
if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
if (is_bulk_recall)
if (mode == PNFS_LAYOUT_FILE_BULK_RETURN) {
pnfs_mark_layout_stateid_return(lo, &lseg_list,
IOMODE_ANY, 0);
} else if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
if (mode == PNFS_LAYOUT_BULK_RETURN)
set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
ret = -EAGAIN;
}
@ -901,10 +915,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
return ret;
}
int
pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
struct nfs_fsid *fsid,
bool is_recall)
int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid,
enum pnfs_layout_destroy_mode mode)
{
struct nfs_server *server;
LIST_HEAD(layout_list);
@ -923,33 +935,40 @@ pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
if (list_empty(&layout_list))
return 0;
return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
return pnfs_layout_free_bulk_destroy_list(&layout_list, mode);
}
int
pnfs_destroy_layouts_byclid(struct nfs_client *clp,
bool is_recall)
static void pnfs_layout_build_destroy_list_byclient(struct nfs_client *clp,
struct list_head *list)
{
struct nfs_server *server;
LIST_HEAD(layout_list);
spin_lock(&clp->cl_lock);
rcu_read_lock();
restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
if (pnfs_layout_bulk_destroy_byserver_locked(clp,
server,
&layout_list) != 0)
if (pnfs_layout_bulk_destroy_byserver_locked(clp, server,
list) != 0)
goto restart;
}
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
}
if (list_empty(&layout_list))
return 0;
return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
static int pnfs_layout_do_destroy_byclid(struct nfs_client *clp,
struct list_head *list,
enum pnfs_layout_destroy_mode mode)
{
pnfs_layout_build_destroy_list_byclient(clp, list);
return pnfs_layout_free_bulk_destroy_list(list, mode);
}
int pnfs_layout_destroy_byclid(struct nfs_client *clp,
enum pnfs_layout_destroy_mode mode)
{
LIST_HEAD(layout_list);
return pnfs_layout_do_destroy_byclid(clp, &layout_list, mode);
}
/*
@ -962,7 +981,68 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
nfs4_deviceid_mark_client_invalid(clp);
nfs4_deviceid_purge_client(clp);
pnfs_destroy_layouts_byclid(clp, false);
pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_INVALIDATE);
}
static void pnfs_layout_build_recover_list_byclient(struct nfs_client *clp,
struct list_head *list)
{
struct nfs_server *server;
spin_lock(&clp->cl_lock);
rcu_read_lock();
restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
if (!(server->caps & NFS_CAP_REBOOT_LAYOUTRETURN))
continue;
if (pnfs_layout_bulk_destroy_byserver_locked(clp, server,
list) != 0)
goto restart;
}
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
}
static int pnfs_layout_bulk_list_reboot(struct list_head *list)
{
struct pnfs_layout_hdr *lo;
struct nfs_server *server;
int ret;
list_for_each_entry(lo, list, plh_bulk_destroy) {
server = NFS_SERVER(lo->plh_inode);
ret = pnfs_layout_return_on_reboot(lo);
switch (ret) {
case 0:
continue;
case -NFS4ERR_BAD_STATEID:
server->caps &= ~NFS_CAP_REBOOT_LAYOUTRETURN;
break;
case -NFS4ERR_NO_GRACE:
break;
default:
goto err;
}
break;
}
return 0;
err:
return ret;
}
int pnfs_layout_handle_reboot(struct nfs_client *clp)
{
LIST_HEAD(list);
int ret = 0, ret2;
pnfs_layout_build_recover_list_byclient(clp, &list);
if (!list_empty(&list))
ret = pnfs_layout_bulk_list_reboot(&list);
ret2 = pnfs_layout_do_destroy_byclid(clp, &list,
PNFS_LAYOUT_INVALIDATE);
if (!ret)
ret = ret2;
return (ret == 0) ? 0 : -EAGAIN;
}
static void
@ -1163,6 +1243,38 @@ static void pnfs_clear_layoutcommit(struct inode *inode,
}
}
static void
pnfs_layoutreturn_retry_later_locked(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range)
{
const struct pnfs_layout_segment *lseg;
u32 seq = be32_to_cpu(arg_stateid->seqid);
if (pnfs_layout_is_valid(lo) &&
nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) {
list_for_each_entry(lseg, &lo->plh_return_segs, pls_list) {
if (pnfs_seqid_is_newer(lseg->pls_seq, seq) ||
!pnfs_should_free_range(&lseg->pls_range, range))
continue;
pnfs_set_plh_return_info(lo, range->iomode, seq);
break;
}
}
}
void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range)
{
struct inode *inode = lo->plh_inode;
spin_lock(&inode->i_lock);
pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range);
pnfs_clear_layoutreturn_waitbit(lo);
spin_unlock(&inode->i_lock);
}
void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range,
@ -1239,7 +1351,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
const nfs4_stateid *stateid,
const struct cred **pcred,
enum pnfs_iomode iomode,
bool sync)
unsigned int flags)
{
struct inode *ino = lo->plh_inode;
struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
@ -1266,33 +1378,21 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
if (ld->prepare_layoutreturn)
ld->prepare_layoutreturn(&lrp->args);
status = nfs4_proc_layoutreturn(lrp, sync);
status = nfs4_proc_layoutreturn(lrp, flags);
out:
dprintk("<-- %s status: %d\n", __func__, status);
return status;
}
static bool
pnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo,
enum pnfs_iomode iomode,
u32 seq)
{
struct pnfs_layout_range recall_range = {
.length = NFS4_MAX_UINT64,
.iomode = iomode,
};
return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
&recall_range, seq) != -EBUSY;
}
/* Return true if layoutreturn is needed */
static bool
pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
{
if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
return false;
return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode,
lo->plh_return_seq);
return pnfs_mark_layout_stateid_return(lo, &lo->plh_return_segs,
lo->plh_return_iomode,
lo->plh_return_seq) != EBUSY;
}
static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
@ -1312,7 +1412,8 @@ static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
spin_unlock(&inode->i_lock);
if (send) {
/* Send an async layoutreturn so we dont deadlock */
pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
pnfs_send_layoutreturn(lo, &stateid, &cred, iomode,
PNFS_FL_LAYOUTRETURN_ASYNC);
}
} else
spin_unlock(&inode->i_lock);
@ -1379,7 +1480,8 @@ _pnfs_return_layout(struct inode *ino)
send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL);
spin_unlock(&ino->i_lock);
if (send)
status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true);
status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY,
0);
out_wait_layoutreturn:
wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE);
out_put_layout_hdr:
@ -1417,6 +1519,24 @@ pnfs_commit_and_return_layout(struct inode *inode)
return ret;
}
static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo)
{
struct inode *inode = lo->plh_inode;
const struct cred *cred;
spin_lock(&inode->i_lock);
if (!pnfs_layout_is_valid(lo)) {
spin_unlock(&inode->i_lock);
return 0;
}
cred = get_cred(lo->plh_lc_cred);
pnfs_get_layout_hdr(lo);
spin_unlock(&inode->i_lock);
return pnfs_send_layoutreturn(lo, &zero_stateid, &cred, IOMODE_ANY,
PNFS_FL_LAYOUTRETURN_PRIVILEGED);
}
bool pnfs_roc(struct inode *ino,
struct nfs4_layoutreturn_args *args,
struct nfs4_layoutreturn_res *res,
@ -1520,7 +1640,7 @@ bool pnfs_roc(struct inode *ino,
return true;
}
if (layoutreturn)
pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true);
pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, 0);
pnfs_put_layout_hdr(lo);
return false;
}
@ -1570,8 +1690,7 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
}
void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
struct nfs4_layoutreturn_res *res,
int ret)
struct nfs4_layoutreturn_res *res, int ret)
{
struct pnfs_layout_hdr *lo = args->layout;
struct inode *inode = args->inode;
@ -1579,11 +1698,13 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
switch (ret) {
case -NFS4ERR_BADSESSION:
case -NFS4ERR_DEADSESSION:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_NOMATCHING_LAYOUT:
spin_lock(&inode->i_lock);
if (pnfs_layout_is_valid(lo) &&
nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid))
pnfs_set_plh_return_info(lo, args->range.iomode, 0);
pnfs_layoutreturn_retry_later_locked(lo, &args->stateid,
&args->range);
pnfs_clear_layoutreturn_waitbit(lo);
spin_unlock(&inode->i_lock);
break;
@ -2566,7 +2687,8 @@ pnfs_mark_layout_for_return(struct inode *inode,
return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
spin_unlock(&inode->i_lock);
if (return_now)
pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
pnfs_send_layoutreturn(lo, &stateid, &cred, iomode,
PNFS_FL_LAYOUTRETURN_ASYNC);
} else {
spin_unlock(&inode->i_lock);
nfs_commit_inode(inode, 0);
@ -2682,7 +2804,8 @@ static int pnfs_layout_return_unused_byserver(struct nfs_server *server,
}
spin_unlock(&inode->i_lock);
rcu_read_unlock();
pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
pnfs_send_layoutreturn(lo, &stateid, &cred, iomode,
PNFS_FL_LAYOUTRETURN_ASYNC);
pnfs_put_layout_hdr(lo);
cond_resched();
goto restart;

View file

@ -118,6 +118,12 @@ enum layoutdriver_policy_flags {
PNFS_LAYOUTGET_ON_OPEN = 1 << 3,
};
enum pnfs_layout_destroy_mode {
PNFS_LAYOUT_INVALIDATE = 0,
PNFS_LAYOUT_BULK_RETURN,
PNFS_LAYOUT_FILE_BULK_RETURN,
};
struct nfs4_deviceid_node;
/* Per-layout driver specific registration structure */
@ -127,7 +133,6 @@ struct pnfs_layoutdriver_type {
const char *name;
struct module *owner;
unsigned flags;
unsigned max_deviceinfo_size;
unsigned max_layoutget_response;
int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *);
@ -193,8 +198,6 @@ struct pnfs_commit_ops {
int max);
void (*recover_commit_reqs) (struct list_head *list,
struct nfs_commit_info *cinfo);
struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
struct folio *folio);
};
struct pnfs_layout_hdr {
@ -242,6 +245,9 @@ extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id);
extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld);
/* nfs4proc.c */
#define PNFS_FL_LAYOUTRETURN_ASYNC (1U << 0)
#define PNFS_FL_LAYOUTRETURN_PRIVILEGED (1U << 1)
extern size_t max_response_pages(struct nfs_server *server);
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *dev,
@ -249,7 +255,8 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
extern struct pnfs_layout_segment *
nfs4_proc_layoutget(struct nfs4_layoutget *lgp,
struct nfs4_exception *exception);
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync);
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp,
unsigned int flags);
/* pnfs.c */
void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
@ -273,11 +280,10 @@ void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_layout_final(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
struct nfs_fsid *fsid,
bool is_recall);
int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
bool is_recall);
int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid,
enum pnfs_layout_destroy_mode mode);
int pnfs_layout_destroy_byclid(struct nfs_client *clp,
enum pnfs_layout_destroy_mode mode);
bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
struct pnfs_layout_range *dst_range,
struct inode *inode);
@ -323,6 +329,9 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
enum pnfs_iomode iomode,
bool strict_iomode,
gfp_t gfp_flags);
void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range);
void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range,
@ -344,6 +353,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg);
void pnfs_layout_return_unused_byclid(struct nfs_client *clp,
enum pnfs_iomode iomode);
int pnfs_layout_handle_reboot(struct nfs_client *clp);
/* nfs4_deviceid_flags */
enum {
@ -396,8 +406,6 @@ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data);
void pnfs_generic_rw_release(void *data);
void pnfs_generic_recover_commit_reqs(struct list_head *dst,
struct nfs_commit_info *cinfo);
struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo,
struct folio *folio);
int pnfs_generic_commit_pagelist(struct inode *inode,
struct list_head *mds_pages,
int how,
@ -557,17 +565,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo)
fl_cinfo->ops->recover_commit_reqs(head, cinfo);
}
static inline struct nfs_page *
pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
struct folio *folio)
{
struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
if (!fl_cinfo->ops || !fl_cinfo->ops->search_commit_reqs)
return NULL;
return fl_cinfo->ops->search_commit_reqs(cinfo, folio);
}
/* Should the pNFS client commit and return the layout upon a setattr */
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
@ -725,6 +722,11 @@ static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
{
}
static inline int pnfs_layout_handle_reboot(struct nfs_client *clp)
{
return 0;
}
static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
@ -864,13 +866,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo)
{
}
static inline struct nfs_page *
pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
struct folio *folio)
{
return NULL;
}
static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
{
return 0;

View file

@ -110,9 +110,6 @@ nfs4_get_device_info(struct nfs_server *server,
* GETDEVICEINFO's maxcount
*/
max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
if (server->pnfs_curr_ld->max_deviceinfo_size &&
server->pnfs_curr_ld->max_deviceinfo_size < max_resp_sz)
max_resp_sz = server->pnfs_curr_ld->max_deviceinfo_size;
max_pages = nfs_page_array_len(0, max_resp_sz);
dprintk("%s: server %p max_resp_sz %u max_pages %d\n",
__func__, server, max_resp_sz, max_pages);

View file

@ -351,53 +351,6 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst,
}
EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
static struct nfs_page *
pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets,
unsigned int nbuckets, struct folio *folio)
{
struct nfs_page *req;
struct pnfs_commit_bucket *b;
unsigned int i;
/* Linearly search the commit lists for each bucket until a matching
* request is found */
for (i = 0, b = buckets; i < nbuckets; i++, b++) {
list_for_each_entry(req, &b->written, wb_list) {
if (nfs_page_to_folio(req) == folio)
return req->wb_head;
}
list_for_each_entry(req, &b->committing, wb_list) {
if (nfs_page_to_folio(req) == folio)
return req->wb_head;
}
}
return NULL;
}
/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request
* for @folio
* @cinfo - commit info for current inode
* @folio - page to search for matching head request
*
* Return: the head request if one is found, otherwise %NULL.
*/
struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo,
struct folio *folio)
{
struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
struct pnfs_commit_array *array;
struct nfs_page *req;
list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) {
req = pnfs_bucket_search_commit_reqs(array->buckets,
array->nbuckets, folio);
if (req)
return req;
}
return NULL;
}
EXPORT_SYMBOL_GPL(pnfs_generic_search_commit_reqs);
static struct pnfs_layout_segment *
pnfs_bucket_get_committing(struct list_head *head,
struct pnfs_commit_bucket *bucket,

View file

@ -687,11 +687,18 @@ static int nfs_lock_check_bounds(const struct file_lock *fl)
return -EINVAL;
}
static int nfs_have_delegation(struct inode *inode, fmode_t flags)
static int nfs_have_delegation(struct inode *inode, fmode_t type, int flags)
{
return 0;
}
static int nfs_return_delegation(struct inode *inode)
{
if (S_ISREG(inode->i_mode))
nfs_wb_all(inode);
return 0;
}
static const struct inode_operations nfs_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
@ -757,6 +764,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.lock_check_bounds = nfs_lock_check_bounds,
.close_context = nfs_close_context,
.have_delegation = nfs_have_delegation,
.return_delegation = nfs_return_delegation,
.alloc_client = nfs_alloc_client,
.init_client = nfs_init_client,
.free_client = nfs_free_client,

View file

@ -28,6 +28,7 @@
#include "fscache.h"
#include "pnfs.h"
#include "nfstrace.h"
#include "delegation.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
@ -286,7 +287,7 @@ int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
struct nfs_open_context *ctx,
struct folio *folio)
{
struct inode *inode = folio_file_mapping(folio)->host;
struct inode *inode = folio->mapping->host;
struct nfs_server *server = NFS_SERVER(inode);
size_t fsize = folio_size(folio);
unsigned int rsize = server->rsize;
@ -322,21 +323,57 @@ int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
}
/*
* Read a page over NFS.
* We read the page synchronously in the following case:
* - The error flag is set for this page. This happens only when a
* previous async read operation failed.
* Actually read a folio over the wire.
*/
int nfs_read_folio(struct file *file, struct folio *folio)
static int nfs_do_read_folio(struct file *file, struct folio *folio)
{
struct inode *inode = file_inode(file);
struct nfs_pageio_descriptor pgio;
struct nfs_open_context *ctx;
int ret;
trace_nfs_aop_readpage(inode, folio);
ctx = get_nfs_open_context(nfs_file_open_context(file));
xchg(&ctx->error, 0);
nfs_pageio_init_read(&pgio, inode, false,
&nfs_async_read_completion_ops);
ret = nfs_read_add_folio(&pgio, ctx, folio);
if (ret)
goto out_put;
nfs_pageio_complete_read(&pgio);
nfs_update_delegated_atime(inode);
if (pgio.pg_error < 0) {
ret = pgio.pg_error;
goto out_put;
}
ret = folio_wait_locked_killable(folio);
if (!folio_test_uptodate(folio) && !ret)
ret = xchg(&ctx->error, 0);
out_put:
put_nfs_open_context(ctx);
return ret;
}
/*
* Synchronously read a folio.
*
* This is not heavily used as most users to try an asynchronous
* large read through ->readahead first.
*/
int nfs_read_folio(struct file *file, struct folio *folio)
{
struct inode *inode = file_inode(file);
loff_t pos = folio_pos(folio);
size_t len = folio_size(folio);
int ret;
trace_nfs_aop_readpage(inode, pos, len);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
task_io_account_read(folio_size(folio));
task_io_account_read(len);
/*
* Try to flush any pending writes to the file..
@ -356,30 +393,10 @@ int nfs_read_folio(struct file *file, struct folio *folio)
goto out_unlock;
ret = nfs_netfs_read_folio(file, folio);
if (!ret)
goto out;
ctx = get_nfs_open_context(nfs_file_open_context(file));
xchg(&ctx->error, 0);
nfs_pageio_init_read(&pgio, inode, false,
&nfs_async_read_completion_ops);
ret = nfs_read_add_folio(&pgio, ctx, folio);
if (ret)
goto out_put;
nfs_pageio_complete_read(&pgio);
ret = pgio.pg_error < 0 ? pgio.pg_error : 0;
if (!ret) {
ret = folio_wait_locked_killable(folio);
if (!folio_test_uptodate(folio) && !ret)
ret = xchg(&ctx->error, 0);
}
out_put:
put_nfs_open_context(ctx);
ret = nfs_do_read_folio(file, folio);
out:
trace_nfs_aop_readpage_done(inode, folio, ret);
trace_nfs_aop_readpage_done(inode, pos, len, ret);
return ret;
out_unlock:
folio_unlock(folio);
@ -426,6 +443,7 @@ void nfs_readahead(struct readahead_control *ractl)
}
nfs_pageio_complete_read(&pgio);
nfs_update_delegated_atime(inode);
put_nfs_open_context(ctx);
out:

View file

@ -232,6 +232,8 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
dentry->d_fsdata = NULL;
spin_unlock(&dentry->d_lock);
NFS_PROTO(inode)->return_delegation(inode);
if (NFS_STALE(inode) || !nfs_call_unlink(dentry, inode, data))
nfs_free_unlinkdata(data);
}

View file

@ -63,9 +63,6 @@ static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
struct nfs_page *req);
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
struct inode *inode);
static struct nfs_page *
nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
struct folio *folio);
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@ -147,53 +144,23 @@ static void nfs_io_completion_put(struct nfs_io_completion *ioc)
kref_put(&ioc->refcount, nfs_io_completion_release);
}
static void
nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode)
{
if (!test_and_set_bit(PG_INODE_REF, &req->wb_flags)) {
kref_get(&req->wb_kref);
atomic_long_inc(&NFS_I(inode)->nrequests);
}
}
static int
nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
{
int ret;
if (!test_bit(PG_REMOVE, &req->wb_flags))
return 0;
ret = nfs_page_group_lock(req);
if (ret)
return ret;
if (test_and_clear_bit(PG_REMOVE, &req->wb_flags))
nfs_page_set_inode_ref(req, inode);
nfs_page_group_unlock(req);
return 0;
}
static struct nfs_page *nfs_folio_private_request(struct folio *folio)
{
return folio_get_private(folio);
}
/**
* nfs_folio_find_private_request - find head request associated with a folio
* nfs_folio_find_head_request - find head request associated with a folio
* @folio: pointer to folio
*
* must be called while holding the inode lock.
*
* returns matching head request with reference held, or NULL if not found.
*/
static struct nfs_page *nfs_folio_find_private_request(struct folio *folio)
static struct nfs_page *nfs_folio_find_head_request(struct folio *folio)
{
struct address_space *mapping = folio_file_mapping(folio);
struct address_space *mapping = folio->mapping;
struct nfs_page *req;
if (!folio_test_private(folio))
return NULL;
spin_lock(&mapping->i_private_lock);
req = nfs_folio_private_request(folio);
req = folio->private;
if (req) {
WARN_ON_ONCE(req->wb_head != req);
kref_get(&req->wb_kref);
@ -202,84 +169,18 @@ static struct nfs_page *nfs_folio_find_private_request(struct folio *folio)
return req;
}
static struct nfs_page *nfs_folio_find_swap_request(struct folio *folio)
{
struct inode *inode = folio_file_mapping(folio)->host;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *req = NULL;
if (!folio_test_swapcache(folio))
return NULL;
mutex_lock(&nfsi->commit_mutex);
if (folio_test_swapcache(folio)) {
req = nfs_page_search_commits_for_head_request_locked(nfsi,
folio);
if (req) {
WARN_ON_ONCE(req->wb_head != req);
kref_get(&req->wb_kref);
}
}
mutex_unlock(&nfsi->commit_mutex);
return req;
}
/**
* nfs_folio_find_head_request - find head request associated with a folio
* @folio: pointer to folio
*
* returns matching head request with reference held, or NULL if not found.
*/
static struct nfs_page *nfs_folio_find_head_request(struct folio *folio)
{
struct nfs_page *req;
req = nfs_folio_find_private_request(folio);
if (!req)
req = nfs_folio_find_swap_request(folio);
return req;
}
static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio)
{
struct inode *inode = folio_file_mapping(folio)->host;
struct nfs_page *req, *head;
int ret;
for (;;) {
req = nfs_folio_find_head_request(folio);
if (!req)
return req;
head = nfs_page_group_lock_head(req);
if (head != req)
nfs_release_request(req);
if (IS_ERR(head))
return head;
ret = nfs_cancel_remove_inode(head, inode);
if (ret < 0) {
nfs_unlock_and_release_request(head);
return ERR_PTR(ret);
}
/* Ensure that nobody removed the request before we locked it */
if (head == nfs_folio_private_request(folio))
break;
if (folio_test_swapcache(folio))
break;
nfs_unlock_and_release_request(head);
}
return head;
}
/* Adjust the file length if we're writing beyond the end */
static void nfs_grow_file(struct folio *folio, unsigned int offset,
unsigned int count)
{
struct inode *inode = folio_file_mapping(folio)->host;
struct inode *inode = folio->mapping->host;
loff_t end, i_size;
pgoff_t end_index;
spin_lock(&inode->i_lock);
i_size = i_size_read(inode);
end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio);
if (i_size > 0 && folio_index(folio) < end_index)
if (i_size > 0 && folio->index < end_index)
goto out;
end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count;
if (i_size >= end)
@ -289,6 +190,8 @@ static void nfs_grow_file(struct folio *folio, unsigned int offset,
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
out:
/* Atomically update timestamps if they are delegated to us. */
nfs_update_delegated_mtime_locked(inode);
spin_unlock(&inode->i_lock);
nfs_fscache_invalidate(inode, 0);
}
@ -309,7 +212,7 @@ static void nfs_set_pageerror(struct address_space *mapping)
static void nfs_mapping_set_error(struct folio *folio, int error)
{
struct address_space *mapping = folio_file_mapping(folio);
struct address_space *mapping = folio->mapping;
filemap_set_wb_err(mapping, error);
if (mapping->host)
@ -409,7 +312,7 @@ int nfs_congestion_kb;
static void nfs_folio_set_writeback(struct folio *folio)
{
struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host);
struct nfs_server *nfss = NFS_SERVER(folio->mapping->host);
folio_start_writeback(folio);
if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH)
@ -418,12 +321,14 @@ static void nfs_folio_set_writeback(struct folio *folio)
static void nfs_folio_end_writeback(struct folio *folio)
{
struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host);
struct nfs_server *nfss = NFS_SERVER(folio->mapping->host);
folio_end_writeback(folio);
if (atomic_long_dec_return(&nfss->writeback) <
NFS_CONGESTION_OFF_THRESH)
NFS_CONGESTION_OFF_THRESH) {
nfss->write_congested = 0;
wake_up_all(&nfss->write_congestion_wait);
}
}
static void nfs_page_end_writeback(struct nfs_page *req)
@ -547,6 +452,74 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo,
nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
}
/**
* nfs_wait_on_request - Wait for a request to complete.
* @req: request to wait upon.
*
* Interruptible by fatal signals only.
* The user is responsible for holding a count on the request.
*/
static int nfs_wait_on_request(struct nfs_page *req)
{
if (!test_bit(PG_BUSY, &req->wb_flags))
return 0;
set_bit(PG_CONTENDED2, &req->wb_flags);
smp_mb__after_atomic();
return wait_on_bit_io(&req->wb_flags, PG_BUSY,
TASK_UNINTERRUPTIBLE);
}
/*
* nfs_unroll_locks - unlock all newly locked reqs and wait on @req
* @head: head request of page group, must be holding head lock
* @req: request that couldn't lock and needs to wait on the req bit lock
*
* This is a helper function for nfs_lock_and_join_requests
* returns 0 on success, < 0 on error.
*/
static void
nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req)
{
struct nfs_page *tmp;
/* relinquish all the locks successfully grabbed this run */
for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
if (!kref_read(&tmp->wb_kref))
continue;
nfs_unlock_and_release_request(tmp);
}
}
/*
* nfs_page_group_lock_subreq - try to lock a subrequest
* @head: head request of page group
* @subreq: request to lock
*
* This is a helper function for nfs_lock_and_join_requests which
* must be called with the head request and page group both locked.
* On error, it returns with the page group unlocked.
*/
static int
nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq)
{
int ret;
if (!kref_get_unless_zero(&subreq->wb_kref))
return 0;
while (!nfs_lock_request(subreq)) {
nfs_page_group_unlock(head);
ret = nfs_wait_on_request(subreq);
if (!ret)
ret = nfs_page_group_lock(head);
if (ret < 0) {
nfs_unroll_locks(head, subreq);
nfs_release_request(subreq);
return ret;
}
}
return 0;
}
/*
* nfs_lock_and_join_requests - join all subreqs to the head req
* @folio: the folio used to lookup the "page group" of nfs_page structures
@ -564,31 +537,75 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo,
*/
static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio)
{
struct inode *inode = folio_file_mapping(folio)->host;
struct nfs_page *head;
struct inode *inode = folio->mapping->host;
struct nfs_page *head, *subreq;
struct nfs_commit_info cinfo;
bool removed;
int ret;
nfs_init_cinfo_from_inode(&cinfo, inode);
/*
* A reference is taken only on the head request which acts as a
* reference to the whole page group - the group will not be destroyed
* until the head reference is released.
*/
head = nfs_folio_find_and_lock_request(folio);
if (IS_ERR_OR_NULL(head))
return head;
retry:
head = nfs_folio_find_head_request(folio);
if (!head)
return NULL;
/* lock each request in the page group */
ret = nfs_page_group_lock_subrequests(head);
if (ret < 0) {
nfs_unlock_and_release_request(head);
return ERR_PTR(ret);
while (!nfs_lock_request(head)) {
ret = nfs_wait_on_request(head);
if (ret < 0)
return ERR_PTR(ret);
}
nfs_join_page_group(head, &cinfo, inode);
/* Ensure that nobody removed the request before we locked it */
if (head != folio->private) {
nfs_unlock_and_release_request(head);
goto retry;
}
ret = nfs_page_group_lock(head);
if (ret < 0)
goto out_unlock;
removed = test_bit(PG_REMOVE, &head->wb_flags);
/* lock each request in the page group */
for (subreq = head->wb_this_page;
subreq != head;
subreq = subreq->wb_this_page) {
if (test_bit(PG_REMOVE, &subreq->wb_flags))
removed = true;
ret = nfs_page_group_lock_subreq(head, subreq);
if (ret < 0)
goto out_unlock;
}
nfs_page_group_unlock(head);
/*
* If PG_REMOVE is set on any request, I/O on that request has
* completed, but some requests were still under I/O at the time
* we locked the head request.
*
* In that case the above wait for all requests means that all I/O
* has now finished, and we can restart from a clean slate. Let the
* old requests go away and start from scratch instead.
*/
if (removed) {
nfs_unroll_locks(head, head);
nfs_unlock_and_release_request(head);
goto retry;
}
nfs_init_cinfo_from_inode(&cinfo, inode);
nfs_join_page_group(head, &cinfo, inode);
return head;
out_unlock:
nfs_unlock_and_release_request(head);
return ERR_PTR(ret);
}
static void nfs_write_error(struct nfs_page *req, int error)
@ -640,7 +657,7 @@ static int nfs_page_async_flush(struct folio *folio,
nfs_redirty_request(req);
pgio->pg_error = 0;
} else
nfs_add_stats(folio_file_mapping(folio)->host,
nfs_add_stats(folio->mapping->host,
NFSIOS_WRITEPAGES, 1);
out:
return ret;
@ -652,7 +669,7 @@ static int nfs_page_async_flush(struct folio *folio,
static int nfs_do_writepage(struct folio *folio, struct writeback_control *wbc,
struct nfs_pageio_descriptor *pgio)
{
nfs_pageio_cond_complete(pgio, folio_index(folio));
nfs_pageio_cond_complete(pgio, folio->index);
return nfs_page_async_flush(folio, wbc, pgio);
}
@ -663,7 +680,7 @@ static int nfs_writepage_locked(struct folio *folio,
struct writeback_control *wbc)
{
struct nfs_pageio_descriptor pgio;
struct inode *inode = folio_file_mapping(folio)->host;
struct inode *inode = folio->mapping->host;
int err;
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
@ -697,12 +714,17 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
struct nfs_pageio_descriptor pgio;
struct nfs_io_completion *ioc = NULL;
unsigned int mntflags = NFS_SERVER(inode)->flags;
struct nfs_server *nfss = NFS_SERVER(inode);
int priority = 0;
int err;
if (wbc->sync_mode == WB_SYNC_NONE &&
NFS_SERVER(inode)->write_congested)
return 0;
/* Wait with writeback until write congestion eases */
if (wbc->sync_mode == WB_SYNC_NONE && nfss->write_congested) {
err = wait_event_killable(nfss->write_congestion_wait,
nfss->write_congested == 0);
if (err)
return err;
}
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
@ -741,24 +763,17 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
static void nfs_inode_add_request(struct nfs_page *req)
{
struct folio *folio = nfs_page_to_folio(req);
struct address_space *mapping = folio_file_mapping(folio);
struct address_space *mapping = folio->mapping;
struct nfs_inode *nfsi = NFS_I(mapping->host);
WARN_ON_ONCE(req->wb_this_page != req);
/* Lock the request! */
nfs_lock_request(req);
/*
* Swap-space should not get truncated. Hence no need to plug the race
* with invalidate/truncate.
*/
spin_lock(&mapping->i_private_lock);
if (likely(!folio_test_swapcache(folio))) {
set_bit(PG_MAPPED, &req->wb_flags);
folio_set_private(folio);
folio->private = req;
}
set_bit(PG_MAPPED, &req->wb_flags);
folio_set_private(folio);
folio->private = req;
spin_unlock(&mapping->i_private_lock);
atomic_long_inc(&nfsi->nrequests);
/* this a head request for a page group - mark it as having an
@ -778,10 +793,10 @@ static void nfs_inode_remove_request(struct nfs_page *req)
if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
struct folio *folio = nfs_page_to_folio(req->wb_head);
struct address_space *mapping = folio_file_mapping(folio);
struct address_space *mapping = folio->mapping;
spin_lock(&mapping->i_private_lock);
if (likely(folio && !folio_test_swapcache(folio))) {
if (likely(folio)) {
folio->private = NULL;
folio_clear_private(folio);
clear_bit(PG_MAPPED, &req->wb_head->wb_flags);
@ -802,38 +817,6 @@ static void nfs_mark_request_dirty(struct nfs_page *req)
filemap_dirty_folio(folio_mapping(folio), folio);
}
/*
* nfs_page_search_commits_for_head_request_locked
*
* Search through commit lists on @inode for the head request for @folio.
* Must be called while holding the inode (which is cinfo) lock.
*
* Returns the head request if found, or NULL if not found.
*/
static struct nfs_page *
nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
struct folio *folio)
{
struct nfs_page *freq, *t;
struct nfs_commit_info cinfo;
struct inode *inode = &nfsi->vfs_inode;
nfs_init_cinfo_from_inode(&cinfo, inode);
/* search through pnfs commit lists */
freq = pnfs_search_commit_reqs(inode, &cinfo, folio);
if (freq)
return freq->wb_head;
/* Linearly search the commit list for the correct request */
list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
if (nfs_page_to_folio(freq) == folio)
return freq->wb_head;
}
return NULL;
}
/**
* nfs_request_add_commit_list_locked - add request to a commit list
* @req: pointer to a struct nfs_page
@ -940,7 +923,7 @@ static void nfs_folio_clear_commit(struct folio *folio)
long nr = folio_nr_pages(folio);
node_stat_mod_folio(folio, NR_WRITEBACK, -nr);
wb_stat_mod(&inode_to_bdi(folio_file_mapping(folio)->host)->wb,
wb_stat_mod(&inode_to_bdi(folio->mapping->host)->wb,
WB_WRITEBACK, -nr);
}
}
@ -1125,7 +1108,7 @@ static struct nfs_page *nfs_try_to_update_request(struct folio *folio,
*/
nfs_mark_request_dirty(req);
nfs_unlock_and_release_request(req);
error = nfs_wb_folio(folio_file_mapping(folio)->host, folio);
error = nfs_wb_folio(folio->mapping->host, folio);
return (error < 0) ? ERR_PTR(error) : NULL;
}
@ -1201,7 +1184,7 @@ int nfs_flush_incompatible(struct file *file, struct folio *folio)
nfs_release_request(req);
if (!do_flush)
return 0;
status = nfs_wb_folio(folio_file_mapping(folio)->host, folio);
status = nfs_wb_folio(folio->mapping->host, folio);
} while (status == 0);
return status;
}
@ -1275,7 +1258,7 @@ bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode)
*/
static bool nfs_folio_write_uptodate(struct folio *folio, unsigned int pagelen)
{
struct inode *inode = folio_file_mapping(folio)->host;
struct inode *inode = folio->mapping->host;
struct nfs_inode *nfsi = NFS_I(inode);
if (nfs_have_delegated_attributes(inode))
@ -1319,7 +1302,7 @@ static int nfs_can_extend_write(struct file *file, struct folio *folio,
return 0;
if (!nfs_folio_write_uptodate(folio, pagelen))
return 0;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
if (nfs_have_write_delegation(inode))
return 1;
if (!flctx || (list_empty_careful(&flctx->flc_flock) &&
list_empty_careful(&flctx->flc_posix)))
@ -1353,7 +1336,7 @@ int nfs_update_folio(struct file *file, struct folio *folio,
unsigned int offset, unsigned int count)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct address_space *mapping = folio_file_mapping(folio);
struct address_space *mapping = folio->mapping;
struct inode *inode = mapping->host;
unsigned int pagelen = nfs_folio_length(folio);
int status = 0;
@ -1367,8 +1350,12 @@ int nfs_update_folio(struct file *file, struct folio *folio,
goto out;
if (nfs_can_extend_write(file, folio, pagelen)) {
count = max(count + offset, pagelen);
offset = 0;
unsigned int end = count + offset;
offset = round_down(offset, PAGE_SIZE);
if (end < pagelen)
end = min(round_up(end, PAGE_SIZE), pagelen);
count = end - offset;
}
status = nfs_writepage_setup(ctx, folio, offset, count);
@ -1513,6 +1500,13 @@ void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
struct nfs_fattr *fattr = &hdr->fattr;
struct inode *inode = hdr->inode;
if (nfs_have_delegated_mtime(inode)) {
spin_lock(&inode->i_lock);
nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
spin_unlock(&inode->i_lock);
return;
}
spin_lock(&inode->i_lock);
nfs_writeback_check_extend(hdr, fattr);
nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
@ -1836,7 +1830,6 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
struct nfs_page *req;
int status = data->task.tk_status;
struct nfs_commit_info cinfo;
struct nfs_server *nfss;
struct folio *folio;
while (!list_empty(&data->pages)) {
@ -1879,9 +1872,6 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* Latency breaker */
cond_resched();
}
nfss = NFS_SERVER(data->inode);
if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
nfss->write_congested = 0;
nfs_init_cinfo(&cinfo, data->inode, data->dreq);
nfs_commit_end(cinfo.mds);
@ -2072,17 +2062,17 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio)
*/
int nfs_wb_folio(struct inode *inode, struct folio *folio)
{
loff_t range_start = folio_file_pos(folio);
loff_t range_end = range_start + (loff_t)folio_size(folio) - 1;
loff_t range_start = folio_pos(folio);
size_t len = folio_size(folio);
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0,
.range_start = range_start,
.range_end = range_end,
.range_end = range_start + len - 1,
};
int ret;
trace_nfs_writeback_folio(inode, folio);
trace_nfs_writeback_folio(inode, range_start, len);
for (;;) {
folio_wait_writeback(folio);
@ -2100,7 +2090,7 @@ int nfs_wb_folio(struct inode *inode, struct folio *folio)
goto out_error;
}
out_error:
trace_nfs_writeback_folio_done(inode, folio, ret);
trace_nfs_writeback_folio_done(inode, range_start, len, ret);
return ret;
}

View file

@ -139,6 +139,7 @@ exit_grace(void)
}
MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>");
MODULE_DESCRIPTION("NFS client and server infrastructure");
MODULE_LICENSE("GPL");
module_init(init_grace)
module_exit(exit_grace)

View file

@ -29,6 +29,7 @@
#include <linux/nfs3.h>
#include <linux/sort.h>
MODULE_DESCRIPTION("NFS ACL support");
MODULE_LICENSE("GPL");
struct nfsacl_encode_desc {

View file

@ -367,6 +367,8 @@ enum open_delegation_type4 {
NFS4_OPEN_DELEGATE_READ = 1,
NFS4_OPEN_DELEGATE_WRITE = 2,
NFS4_OPEN_DELEGATE_NONE_EXT = 3, /* 4.1 */
NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG = 4,
NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG = 5,
};
enum why_no_delegation4 { /* new to v4.1 */
@ -507,6 +509,12 @@ enum {
FATTR4_XATTR_SUPPORT = 82,
};
enum {
FATTR4_TIME_DELEG_ACCESS = 84,
FATTR4_TIME_DELEG_MODIFY = 85,
FATTR4_OPEN_ARGUMENTS = 86,
};
/*
* The following internal definitions enable processing the above
* attribute bits within 32-bit word boundaries.
@ -586,6 +594,9 @@ enum {
#define FATTR4_WORD2_SECURITY_LABEL BIT(FATTR4_SEC_LABEL - 64)
#define FATTR4_WORD2_MODE_UMASK BIT(FATTR4_MODE_UMASK - 64)
#define FATTR4_WORD2_XATTR_SUPPORT BIT(FATTR4_XATTR_SUPPORT - 64)
#define FATTR4_WORD2_TIME_DELEG_ACCESS BIT(FATTR4_TIME_DELEG_ACCESS - 64)
#define FATTR4_WORD2_TIME_DELEG_MODIFY BIT(FATTR4_TIME_DELEG_MODIFY - 64)
#define FATTR4_WORD2_OPEN_ARGUMENTS BIT(FATTR4_OPEN_ARGUMENTS - 64)
/* MDS threshold bitmap bits */
#define THRESHOLD_RD (1UL << 0)

View file

@ -140,6 +140,7 @@ struct nfs_server {
struct rpc_clnt * client_acl; /* ACL RPC client handle */
struct nlm_host *nlm_host; /* NLM client handle */
struct nfs_iostats __percpu *io_stats; /* I/O statistics */
wait_queue_head_t write_congestion_wait; /* wait until write congestion eases */
atomic_long_t writeback; /* number of writeback pages */
unsigned int write_congested;/* flag set when writeback gets too high */
unsigned int flags; /* various flags */
@ -278,6 +279,9 @@ struct nfs_server {
#define NFS_CAP_LGOPEN (1U << 5)
#define NFS_CAP_CASE_INSENSITIVE (1U << 6)
#define NFS_CAP_CASE_PRESERVING (1U << 7)
#define NFS_CAP_REBOOT_LAYOUTRETURN (1U << 8)
#define NFS_CAP_OPEN_XOR (1U << 12)
#define NFS_CAP_DELEGTIME (1U << 13)
#define NFS_CAP_POSIX_LOCK (1U << 14)
#define NFS_CAP_UIDGID_NOMAP (1U << 15)
#define NFS_CAP_STATEID_NFSV41 (1U << 16)

View file

@ -152,11 +152,8 @@ extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
struct nfs_page *prev,
struct nfs_page *req);
extern int nfs_wait_on_request(struct nfs_page *);
extern void nfs_unlock_request(struct nfs_page *req);
extern void nfs_unlock_and_release_request(struct nfs_page *);
extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req);
extern int nfs_page_group_lock_subrequests(struct nfs_page *head);
extern void nfs_join_page_group(struct nfs_page *head,
struct nfs_commit_info *cinfo,
struct inode *inode);
@ -208,8 +205,8 @@ static inline struct inode *nfs_page_to_inode(const struct nfs_page *req)
struct folio *folio = nfs_page_to_folio(req);
if (folio == NULL)
return page_file_mapping(req->wb_page)->host;
return folio_file_mapping(folio)->host;
return req->wb_page->mapping->host;
return folio->mapping->host;
}
/**

View file

@ -449,6 +449,22 @@ struct stateowner_id {
__u32 uniquifier;
};
struct nfs4_open_delegation {
__u32 open_delegation_type;
union {
struct {
fmode_t type;
__u32 do_recall;
nfs4_stateid stateid;
unsigned long pagemod_limit;
};
struct {
__u32 why_no_delegation;
__u32 will_notify;
};
};
};
/*
* Arguments to the open call.
*/
@ -468,7 +484,7 @@ struct nfs_openargs {
nfs4_verifier verifier; /* EXCLUSIVE */
};
nfs4_stateid delegation; /* CLAIM_DELEGATE_CUR */
fmode_t delegation_type; /* CLAIM_PREVIOUS */
__u32 delegation_type; /* CLAIM_PREVIOUS */
} u;
const struct qstr * name;
const struct nfs_server *server; /* Needed for ID mapping */
@ -490,13 +506,10 @@ struct nfs_openres {
struct nfs_fattr * f_attr;
struct nfs_seqid * seqid;
const struct nfs_server *server;
fmode_t delegation_type;
nfs4_stateid delegation;
unsigned long pagemod_limit;
__u32 do_recall;
__u32 attrset[NFS4_BITMAP_SIZE];
struct nfs4_string *owner;
struct nfs4_string *group_owner;
struct nfs4_open_delegation delegation;
__u32 access_request;
__u32 access_supported;
__u32 access_result;
@ -609,6 +622,13 @@ struct nfs_release_lockowner_res {
struct nfs4_sequence_res seq_res;
};
struct nfs4_delegattr {
struct timespec64 atime;
struct timespec64 mtime;
bool atime_set;
bool mtime_set;
};
struct nfs4_delegreturnargs {
struct nfs4_sequence_args seq_args;
const struct nfs_fh *fhandle;
@ -616,6 +636,7 @@ struct nfs4_delegreturnargs {
const u32 *bitmask;
u32 bitmask_store[NFS_BITMASK_SZ];
struct nfs4_layoutreturn_args *lr_args;
struct nfs4_delegattr *sattr_args;
};
struct nfs4_delegreturnres {
@ -624,6 +645,8 @@ struct nfs4_delegreturnres {
struct nfs_server *server;
struct nfs4_layoutreturn_res *lr_res;
int lr_ret;
bool sattr_res;
int sattr_ret;
};
/*
@ -1190,6 +1213,14 @@ struct nfs4_statfs_res {
struct nfs_fsstat *fsstat;
};
struct nfs4_open_caps {
u32 oa_share_access[1];
u32 oa_share_deny[1];
u32 oa_share_access_want[1];
u32 oa_open_claim[1];
u32 oa_createmode[1];
};
struct nfs4_server_caps_arg {
struct nfs4_sequence_args seq_args;
struct nfs_fh *fhandle;
@ -1206,6 +1237,7 @@ struct nfs4_server_caps_res {
u32 fh_expire_type;
u32 case_insensitive;
u32 case_preserving;
struct nfs4_open_caps open_caps;
};
#define NFS4_PATHNAME_MAXCOMPONENTS 512
@ -1406,7 +1438,7 @@ struct nfs41_secinfo_no_name_args {
struct nfs41_test_stateid_args {
struct nfs4_sequence_args seq_args;
nfs4_stateid *stateid;
nfs4_stateid stateid;
};
struct nfs41_test_stateid_res {
@ -1807,7 +1839,8 @@ struct nfs_rpc_ops {
int open_flags,
struct iattr *iattr,
int *);
int (*have_delegation)(struct inode *, fmode_t);
int (*have_delegation)(struct inode *, fmode_t, int);
int (*return_delegation)(struct inode *);
struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *);
struct nfs_client *(*init_client) (struct nfs_client *,
const struct nfs_client_initdata *);

View file

@ -0,0 +1,27 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* * Copyright (c) 2024, Oracle and/or its affiliates.
*/
#ifndef _LINUX_SUNRPC_RDMA_RN_H
#define _LINUX_SUNRPC_RDMA_RN_H
#include <rdma/ib_verbs.h>
/**
* rpcrdma_notification - request removal notification
*/
struct rpcrdma_notification {
void (*rn_done)(struct rpcrdma_notification *rn);
u32 rn_index;
};
int rpcrdma_rn_register(struct ib_device *device,
struct rpcrdma_notification *rn,
void (*done)(struct rpcrdma_notification *rn));
void rpcrdma_rn_unregister(struct ib_device *device,
struct rpcrdma_notification *rn);
int rpcrdma_ib_client_register(void);
void rpcrdma_ib_client_unregister(void);
#endif /* _LINUX_SUNRPC_RDMA_RN_H */

View file

@ -54,7 +54,7 @@ TRACE_DEFINE_ENUM(GSS_S_UNSEQ_TOKEN);
TRACE_DEFINE_ENUM(GSS_S_GAP_TOKEN);
#define show_gss_status(x) \
__print_flags(x, "|", \
__print_symbolic(x, \
{ GSS_S_BAD_MECH, "GSS_S_BAD_MECH" }, \
{ GSS_S_BAD_NAME, "GSS_S_BAD_NAME" }, \
{ GSS_S_BAD_NAMETYPE, "GSS_S_BAD_NAMETYPE" }, \

View file

@ -669,6 +669,29 @@ TRACE_EVENT(xprtrdma_inline_thresh,
DEFINE_CONN_EVENT(connect);
DEFINE_CONN_EVENT(disconnect);
TRACE_EVENT(xprtrdma_device_removal,
TP_PROTO(
const struct rdma_cm_id *id
),
TP_ARGS(id),
TP_STRUCT__entry(
__string(name, id->device->name)
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
__assign_str(name);
memcpy(__entry->addr, &id->route.addr.dst_addr,
sizeof(struct sockaddr_in6));
),
TP_printk("device %s to be removed, disconnecting %pISpc\n",
__get_str(name), __entry->addr
)
);
DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc);
TRACE_EVENT(xprtrdma_op_connect,
@ -2220,6 +2243,40 @@ TRACE_EVENT(svcrdma_sq_post_err,
)
);
DECLARE_EVENT_CLASS(rpcrdma_client_device_class,
TP_PROTO(
const struct ib_device *device
),
TP_ARGS(device),
TP_STRUCT__entry(
__string(name, device->name)
),
TP_fast_assign(
__assign_str(name);
),
TP_printk("device=%s",
__get_str(name)
)
);
#define DEFINE_CLIENT_DEVICE_EVENT(name) \
DEFINE_EVENT(rpcrdma_client_device_class, name, \
TP_PROTO( \
const struct ib_device *device \
), \
TP_ARGS(device) \
)
DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_completion);
DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_add_one);
DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one);
DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_wait_on);
DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one_done);
#endif /* _TRACE_RPCRDMA_H */
#include <trace/define_trace.h>

View file

@ -46,6 +46,7 @@
#define NFS4_OPEN_RESULT_CONFIRM 0x0002
#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004
#define NFS4_OPEN_RESULT_PRESERVE_UNLINKED 0x0008
#define NFS4_OPEN_RESULT_NO_OPEN_STATEID 0x0010
#define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020
#define NFS4_SHARE_ACCESS_MASK 0x000F
@ -69,6 +70,9 @@
#define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL 0x10000
#define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000
#define NFS4_SHARE_WANT_DELEG_TIMESTAMPS 0x100000
#define NFS4_SHARE_WANT_OPEN_XOR_DELEGATION 0x200000
#define NFS4_CDFC4_FORE 0x1
#define NFS4_CDFC4_BACK 0x2
#define NFS4_CDFC4_BOTH 0x3

View file

@ -3982,21 +3982,24 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
loff_t pos = iocb->ki_pos;
struct address_space *mapping = file->f_mapping;
const struct address_space_operations *a_ops = mapping->a_ops;
size_t chunk = mapping_max_folio_size(mapping);
long status = 0;
ssize_t written = 0;
do {
struct page *page;
unsigned long offset; /* Offset into pagecache page */
unsigned long bytes; /* Bytes to write to page */
struct folio *folio;
size_t offset; /* Offset into folio */
size_t bytes; /* Bytes to write to folio */
size_t copied; /* Bytes copied from user */
void *fsdata = NULL;
offset = (pos & (PAGE_SIZE - 1));
bytes = min_t(unsigned long, PAGE_SIZE - offset,
iov_iter_count(i));
bytes = iov_iter_count(i);
retry:
offset = pos & (chunk - 1);
bytes = min(chunk - offset, bytes);
balance_dirty_pages_ratelimited(mapping);
again:
/*
* Bring in the user page that we will copy from _first_.
* Otherwise there's a nasty deadlock on copying from the
@ -4018,11 +4021,16 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
if (unlikely(status < 0))
break;
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
folio = page_folio(page);
offset = offset_in_folio(folio, pos);
if (bytes > folio_size(folio) - offset)
bytes = folio_size(folio) - offset;
copied = copy_page_from_iter_atomic(page, offset, bytes, i);
flush_dcache_page(page);
if (mapping_writably_mapped(mapping))
flush_dcache_folio(folio);
copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
flush_dcache_folio(folio);
status = a_ops->write_end(file, mapping, pos, bytes, copied,
page, fsdata);
@ -4040,14 +4048,16 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
* halfway through, might be a race with munmap,
* might be severe memory pressure.
*/
if (copied)
if (chunk > PAGE_SIZE)
chunk /= 2;
if (copied) {
bytes = copied;
goto again;
goto retry;
}
} else {
pos += status;
written += status;
}
pos += status;
written += status;
balance_dirty_pages_ratelimited(mapping);
} while (iov_iter_count(i));
if (!written)

View file

@ -2326,12 +2326,13 @@ call_transmit_status(struct rpc_task *task)
task->tk_action = call_transmit;
task->tk_status = 0;
break;
case -ECONNREFUSED:
case -EHOSTDOWN:
case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPERM:
break;
case -ECONNREFUSED:
if (RPC_IS_SOFTCONN(task)) {
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt,

View file

@ -369,8 +369,10 @@ static void rpc_make_runnable(struct workqueue_struct *wq,
if (RPC_IS_ASYNC(task)) {
INIT_WORK(&task->u.tk_work, rpc_async_schedule);
queue_work(wq, &task->u.tk_work);
} else
} else {
smp_mb__after_atomic();
wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
}
}
/*

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \
rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o ib_client.o \
svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
svc_rdma_pcl.o module.o

View file

@ -54,7 +54,7 @@ static void frwr_cid_init(struct rpcrdma_ep *ep,
cid->ci_completion_id = mr->mr_ibmr->res.id;
}
static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
static void frwr_mr_unmap(struct rpcrdma_mr *mr)
{
if (mr->mr_device) {
trace_xprtrdma_mr_unmap(mr);
@ -73,7 +73,7 @@ void frwr_mr_release(struct rpcrdma_mr *mr)
{
int rc;
frwr_mr_unmap(mr->mr_xprt, mr);
frwr_mr_unmap(mr);
rc = ib_dereg_mr(mr->mr_ibmr);
if (rc)
@ -84,7 +84,7 @@ void frwr_mr_release(struct rpcrdma_mr *mr)
static void frwr_mr_put(struct rpcrdma_mr *mr)
{
frwr_mr_unmap(mr->mr_xprt, mr);
frwr_mr_unmap(mr);
/* The MR is returned to the req's MR free list instead
* of to the xprt's MR free list. No spinlock is needed.
@ -92,7 +92,8 @@ static void frwr_mr_put(struct rpcrdma_mr *mr)
rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
}
/* frwr_reset - Place MRs back on the free list
/**
* frwr_reset - Place MRs back on @req's free list
* @req: request to reset
*
* Used after a failed marshal. For FRWR, this means the MRs

View file

@ -0,0 +1,181 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (c) 2024 Oracle. All rights reserved.
*/
/* #include <linux/module.h>
#include <linux/slab.h> */
#include <linux/xarray.h>
#include <linux/types.h>
#include <linux/kref.h>
#include <linux/completion.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/sunrpc/rdma_rn.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
/* Per-ib_device private data for rpcrdma */
struct rpcrdma_device {
struct kref rd_kref;
unsigned long rd_flags;
struct ib_device *rd_device;
struct xarray rd_xa;
struct completion rd_done;
};
#define RPCRDMA_RD_F_REMOVING (0)
static struct ib_client rpcrdma_ib_client;
/*
* Listeners have no associated device, so we never register them.
* Note that ib_get_client_data() does not check if @device is
* NULL for us.
*/
static struct rpcrdma_device *rpcrdma_get_client_data(struct ib_device *device)
{
if (!device)
return NULL;
return ib_get_client_data(device, &rpcrdma_ib_client);
}
/**
* rpcrdma_rn_register - register to get device removal notifications
* @device: device to monitor
* @rn: notification object that wishes to be notified
* @done: callback to notify caller of device removal
*
* Returns zero on success. The callback in rn_done is guaranteed
* to be invoked when the device is removed, unless this notification
* is unregistered first.
*
* On failure, a negative errno is returned.
*/
int rpcrdma_rn_register(struct ib_device *device,
struct rpcrdma_notification *rn,
void (*done)(struct rpcrdma_notification *rn))
{
struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags))
return -ENETUNREACH;
kref_get(&rd->rd_kref);
if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0)
return -ENOMEM;
rn->rn_done = done;
return 0;
}
static void rpcrdma_rn_release(struct kref *kref)
{
struct rpcrdma_device *rd = container_of(kref, struct rpcrdma_device,
rd_kref);
trace_rpcrdma_client_completion(rd->rd_device);
complete(&rd->rd_done);
}
/**
* rpcrdma_rn_unregister - stop device removal notifications
* @device: monitored device
* @rn: notification object that no longer wishes to be notified
*/
void rpcrdma_rn_unregister(struct ib_device *device,
struct rpcrdma_notification *rn)
{
struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
if (!rd)
return;
xa_erase(&rd->rd_xa, rn->rn_index);
kref_put(&rd->rd_kref, rpcrdma_rn_release);
}
/**
* rpcrdma_add_one - ib_client device insertion callback
* @device: device about to be inserted
*
* Returns zero on success. xprtrdma private data has been allocated
* for this device. On failure, a negative errno is returned.
*/
static int rpcrdma_add_one(struct ib_device *device)
{
struct rpcrdma_device *rd;
rd = kzalloc(sizeof(*rd), GFP_KERNEL);
if (!rd)
return -ENOMEM;
kref_init(&rd->rd_kref);
xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC1);
rd->rd_device = device;
init_completion(&rd->rd_done);
ib_set_client_data(device, &rpcrdma_ib_client, rd);
trace_rpcrdma_client_add_one(device);
return 0;
}
/**
* rpcrdma_remove_one - ib_client device removal callback
* @device: device about to be removed
* @client_data: this module's private per-device data
*
* Upon return, all transports associated with @device have divested
* themselves from IB hardware resources.
*/
static void rpcrdma_remove_one(struct ib_device *device,
void *client_data)
{
struct rpcrdma_device *rd = client_data;
struct rpcrdma_notification *rn;
unsigned long index;
trace_rpcrdma_client_remove_one(device);
set_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags);
xa_for_each(&rd->rd_xa, index, rn)
rn->rn_done(rn);
/*
* Wait only if there are still outstanding notification
* registrants for this device.
*/
if (!refcount_dec_and_test(&rd->rd_kref.refcount)) {
trace_rpcrdma_client_wait_on(device);
wait_for_completion(&rd->rd_done);
}
trace_rpcrdma_client_remove_one_done(device);
kfree(rd);
}
static struct ib_client rpcrdma_ib_client = {
.name = "rpcrdma",
.add = rpcrdma_add_one,
.remove = rpcrdma_remove_one,
};
/**
* rpcrdma_ib_client_unregister - unregister ib_client for xprtrdma
*
* cel: watch for orphaned rpcrdma_device objects on module unload
*/
void rpcrdma_ib_client_unregister(void)
{
ib_unregister_client(&rpcrdma_ib_client);
}
/**
* rpcrdma_ib_client_register - register ib_client for rpcrdma
*
* Returns zero on success, or a negative errno.
*/
int rpcrdma_ib_client_register(void)
{
return ib_register_client(&rpcrdma_ib_client);
}

View file

@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/sunrpc/rdma_rn.h>
#include <asm/swab.h>
@ -30,21 +31,32 @@ static void __exit rpc_rdma_cleanup(void)
{
xprt_rdma_cleanup();
svc_rdma_cleanup();
rpcrdma_ib_client_unregister();
}
static int __init rpc_rdma_init(void)
{
int rc;
rc = rpcrdma_ib_client_register();
if (rc)
goto out_rc;
rc = svc_rdma_init();
if (rc)
goto out;
goto out_ib_client;
rc = xprt_rdma_init();
if (rc)
svc_rdma_cleanup();
goto out_svc_rdma;
out:
return 0;
out_svc_rdma:
svc_rdma_cleanup();
out_ib_client:
rpcrdma_ib_client_unregister();
out_rc:
return rc;
}

View file

@ -1471,8 +1471,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_ep->re_max_requests)
credits = r_xprt->rx_ep->re_max_requests;
rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
false);
rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1));
if (buf->rb_credits != credits)
rpcrdma_update_cwnd(r_xprt, credits);

View file

@ -49,14 +49,14 @@
* o buffer memory
*/
#include <linux/bitops.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/log2.h>
#include <asm-generic/barrier.h>
#include <asm/bitops.h>
#include <asm/barrier.h>
#include <rdma/ib_cm.h>
@ -69,13 +69,15 @@ static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_sendctx *sc);
static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep);
static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_ep_get(struct rpcrdma_ep *ep);
static int rpcrdma_ep_put(struct rpcrdma_ep *ep);
static struct rpcrdma_regbuf *
rpcrdma_regbuf_alloc_node(size_t size, enum dma_data_direction direction,
int node);
static struct rpcrdma_regbuf *
rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction);
static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
@ -222,7 +224,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep,
static int
rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr;
struct rpcrdma_ep *ep = id->context;
might_sleep();
@ -241,14 +242,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
ep->re_async_rc = -ENETUNREACH;
complete(&ep->re_done);
return 0;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
pr_info("rpcrdma: removing device %s for %pISpc\n",
ep->re_id->device->name, sap);
switch (xchg(&ep->re_connect_status, -ENODEV)) {
case 0: goto wake_connect_worker;
case 1: goto disconnected;
}
return 0;
case RDMA_CM_EVENT_ADDR_CHANGE:
ep->re_connect_status = -ENODEV;
goto disconnected;
@ -284,6 +277,14 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
return 0;
}
static void rpcrdma_ep_removal_done(struct rpcrdma_notification *rn)
{
struct rpcrdma_ep *ep = container_of(rn, struct rpcrdma_ep, re_rn);
trace_xprtrdma_device_removal(ep->re_id);
xprt_force_disconnect(ep->re_xprt);
}
static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_ep *ep)
{
@ -323,6 +324,10 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
if (rc)
goto out;
rc = rpcrdma_rn_register(id->device, &ep->re_rn, rpcrdma_ep_removal_done);
if (rc)
goto out;
return id;
out:
@ -350,6 +355,8 @@ static void rpcrdma_ep_destroy(struct kref *kref)
ib_dealloc_pd(ep->re_pd);
ep->re_pd = NULL;
rpcrdma_rn_unregister(ep->re_id->device, &ep->re_rn);
kfree(ep);
module_put(THIS_MODULE);
}
@ -505,7 +512,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
* outstanding Receives.
*/
rpcrdma_ep_get(ep);
rpcrdma_post_recvs(r_xprt, 1, true);
rpcrdma_post_recvs(r_xprt, 1);
rc = rdma_connect(ep->re_id, &ep->re_remote_cma);
if (rc)
@ -897,6 +904,8 @@ static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt)
static void rpcrdma_req_reset(struct rpcrdma_req *req)
{
struct rpcrdma_mr *mr;
/* Credits are valid for only one connection */
req->rl_slot.rq_cong = 0;
@ -906,7 +915,19 @@ static void rpcrdma_req_reset(struct rpcrdma_req *req)
rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
frwr_reset(req);
/* The verbs consumer can't know the state of an MR on the
* req->rl_registered list unless a successful completion
* has occurred, so they cannot be re-used.
*/
while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf;
spin_lock(&buf->rb_lock);
list_del(&mr->mr_all);
spin_unlock(&buf->rb_lock);
frwr_mr_release(mr);
}
}
/* ASSUMPTION: the rb_allreqs list is stable for the duration,
@ -924,18 +945,20 @@ static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt)
}
static noinline
struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
bool temp)
struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct ib_device *device = ep->re_id->device;
struct rpcrdma_rep *rep;
rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS);
if (rep == NULL)
goto out;
rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv,
DMA_FROM_DEVICE);
rep->rr_rdmabuf = rpcrdma_regbuf_alloc_node(ep->re_inline_recv,
DMA_FROM_DEVICE,
ibdev_to_node(device));
if (!rep->rr_rdmabuf)
goto out_free;
@ -950,7 +973,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
rep->rr_temp = temp;
spin_lock(&buf->rb_lock);
list_add(&rep->rr_all, &buf->rb_all_reps);
@ -969,17 +991,6 @@ static void rpcrdma_rep_free(struct rpcrdma_rep *rep)
kfree(rep);
}
static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
{
struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
spin_lock(&buf->rb_lock);
list_del(&rep->rr_all);
spin_unlock(&buf->rb_lock);
rpcrdma_rep_free(rep);
}
static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
{
struct llist_node *node;
@ -1011,10 +1022,8 @@ static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
list_for_each_entry(rep, &buf->rb_all_reps, rr_all) {
list_for_each_entry(rep, &buf->rb_all_reps, rr_all)
rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
rep->rr_temp = true; /* Mark this rep for destruction */
}
}
static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
@ -1231,14 +1240,15 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
* or Replies they may be registered externally via frwr_map.
*/
static struct rpcrdma_regbuf *
rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
rpcrdma_regbuf_alloc_node(size_t size, enum dma_data_direction direction,
int node)
{
struct rpcrdma_regbuf *rb;
rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS);
rb = kmalloc_node(sizeof(*rb), XPRTRDMA_GFP_FLAGS, node);
if (!rb)
return NULL;
rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS);
rb->rg_data = kmalloc_node(size, XPRTRDMA_GFP_FLAGS, node);
if (!rb->rg_data) {
kfree(rb);
return NULL;
@ -1250,6 +1260,12 @@ rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
return rb;
}
static struct rpcrdma_regbuf *
rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
{
return rpcrdma_regbuf_alloc_node(size, direction, NUMA_NO_NODE);
}
/**
* rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer
* @rb: regbuf to reallocate
@ -1327,10 +1343,9 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
* rpcrdma_post_recvs - Refill the Receive Queue
* @r_xprt: controlling transport instance
* @needed: current credit grant
* @temp: mark Receive buffers to be deleted after one use
*
*/
void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
@ -1344,8 +1359,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
if (likely(ep->re_receive_count > needed))
goto out;
needed -= ep->re_receive_count;
if (!temp)
needed += RPCRDMA_MAX_RECV_BATCH;
needed += RPCRDMA_MAX_RECV_BATCH;
if (atomic_inc_return(&ep->re_receiving) > 1)
goto out;
@ -1354,12 +1368,8 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
wr = NULL;
while (needed) {
rep = rpcrdma_rep_get_locked(buf);
if (rep && rep->rr_temp) {
rpcrdma_rep_destroy(rep);
continue;
}
if (!rep)
rep = rpcrdma_rep_create(r_xprt, temp);
rep = rpcrdma_rep_create(r_xprt);
if (!rep)
break;
if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) {

View file

@ -56,6 +56,7 @@
#include <linux/sunrpc/rpc_rdma_cid.h> /* completion IDs */
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
#include <linux/sunrpc/rdma_rn.h> /* removal notifications */
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
@ -92,6 +93,7 @@ struct rpcrdma_ep {
struct rpcrdma_connect_private
re_cm_private;
struct rdma_conn_param re_remote_cma;
struct rpcrdma_notification re_rn;
int re_receive_count;
unsigned int re_max_requests; /* depends on device */
unsigned int re_inline_send; /* negotiated */
@ -198,7 +200,6 @@ struct rpcrdma_rep {
__be32 rr_proc;
int rr_wc_flags;
u32 rr_inv_rkey;
bool rr_temp;
struct rpcrdma_regbuf *rr_rdmabuf;
struct rpcrdma_xprt *rr_rxprt;
struct rpc_rqst *rr_rqst;
@ -466,7 +467,7 @@ void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc);
int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed);
/*
* Buffer calls - xprtrdma/verbs.c