From 98801506552593c9b8ac11021b0cdad12cab4f6b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 10:02:19 +0000 Subject: [PATCH 1/4] fscache: Fix the default for fscache_maybe_release_page() Fix the default for fscache_maybe_release_page() for when the cookie isn't valid or the page isn't cached. It mustn't return false as that indicates the page cannot yet be freed. The problem with the default is that if, say, there's no cache, but a network filesystem's pages are using up almost all the available memory, a system can OOM because the filesystem ->releasepage() op will not allow them to be released as fscache_maybe_release_page() incorrectly prevents it. This can be tested by writing a sequence of 512MiB files to an AFS mount. It does not affect NFS or CIFS because both of those wrap the call in a check of PG_fscache and it shouldn't bother Ceph as that only has PG_private set whilst writeback is in progress. This might be an issue for 9P, however. Note that the pages aren't entirely stuck. Removing a file or unmounting will clear things because that uses ->invalidatepage() instead. Fixes: 201a15428bd5 ("FS-Cache: Handle pages pending storage that get evicted under OOM conditions") Reported-by: Marc Dionne Signed-off-by: David Howells Reviewed-by: Jeff Layton Acked-by: Al Viro Tested-by: Marc Dionne cc: stable@vger.kernel.org # 2.6.32+ --- include/linux/fscache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index f4ff47d4a893..fe0c349684fa 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -755,7 +755,7 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie, { if (fscache_cookie_valid(cookie) && PageFsCache(page)) return __fscache_maybe_release_page(cookie, page, gfp); - return false; + return true; } /** From 7888da95832d50a87bbfdb9f40620ddc66f94b3c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Jan 2018 10:02:19 +0000 Subject: [PATCH 2/4] afs: Potential uninitialized variable in afs_extract_data() Smatch warns that: fs/afs/rxrpc.c:922 afs_extract_data() error: uninitialized symbol 'remote_abort'. Smatch is right that "remote_abort" might be uninitialized when we pass it to afs_set_call_complete(). I don't know if that function uses the uninitialized variable. Anyway, the comment for rxrpc_kernel_recv_data(), says that "*_abort should also be initialised to 0." and this patch does that. Signed-off-by: Dan Carpenter Signed-off-by: David Howells --- fs/afs/rxrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ea1460b9b71a..e1126659f043 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -885,7 +885,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, { struct afs_net *net = call->net; enum afs_call_state state; - u32 remote_abort; + u32 remote_abort = 0; int ret; _enter("{%s,%zu},,%zu,%d", From 440fbc3a8a694467ba641234cedb96c28ab2d5fb Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 10:02:19 +0000 Subject: [PATCH 3/4] afs: Fix unlink Repeating creation and deletion of a file on an afs mount will run the box out of memory, e.g.: dd if=/dev/zero of=/afs/scratch/m0 bs=$((1024*1024)) count=512 rm /afs/scratch/m0 The problem seems to be that it's not properly decrementing the nlink count so that the inode can be scrapped. Note that this doesn't fix local creation followed by remote deletion. That's harder to handle and will require a separate patch as we're not told that the file has been deleted - only that the directory has changed. Reported-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/dir.c | 37 +++++++++++++++++++++++++++++-------- fs/afs/inode.c | 4 ++++ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index ff8d5bf4354f..23c7f395d718 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -895,20 +895,38 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) * However, if we didn't have a callback promise outstanding, or it was * outstanding on a different server, then it won't break it either... */ -static int afs_dir_remove_link(struct dentry *dentry, struct key *key) +static int afs_dir_remove_link(struct dentry *dentry, struct key *key, + unsigned long d_version_before, + unsigned long d_version_after) { + bool dir_valid; int ret = 0; + /* There were no intervening changes on the server if the version + * number we got back was incremented by exactly 1. + */ + dir_valid = (d_version_after == d_version_before + 1); + if (d_really_is_positive(dentry)) { struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - kdebug("AFS_VNODE_DELETED"); - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - - ret = afs_validate(vnode, key); - if (ret == -ESTALE) + if (dir_valid) { + drop_nlink(&vnode->vfs_inode); + if (vnode->vfs_inode.i_nlink == 0) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } ret = 0; + } else { + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + kdebug("AFS_VNODE_DELETED"); + + ret = afs_validate(vnode, key); + if (ret == -ESTALE) + ret = 0; + } _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); } @@ -923,6 +941,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) struct afs_fs_cursor fc; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; struct key *key; + unsigned long d_version = (unsigned long)dentry->d_fsdata; int ret; _enter("{%x:%u},{%pd}", @@ -955,7 +974,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) afs_vnode_commit_status(&fc, dvnode, fc.cb_break); ret = afs_end_vnode_operation(&fc); if (ret == 0) - ret = afs_dir_remove_link(dentry, key); + ret = afs_dir_remove_link( + dentry, key, d_version, + (unsigned long)dvnode->status.data_version); } error_key: diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 3415eb7484f6..1e81864ef0b2 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -377,6 +377,10 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) } read_sequnlock_excl(&vnode->cb_lock); + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + clear_nlink(&vnode->vfs_inode); + if (valid) goto valid; From afae457d874860a7e299d334f59eede5f3ad4b47 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 10:02:19 +0000 Subject: [PATCH 4/4] afs: Fix missing error handling in afs_write_end() afs_write_end() is missing page unlock and put if afs_fill_page() fails. Reported-by: Al Viro Signed-off-by: David Howells --- fs/afs/write.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index cb5f8a3df577..9370e2feb999 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -198,7 +198,7 @@ int afs_write_end(struct file *file, struct address_space *mapping, ret = afs_fill_page(vnode, key, pos + copied, len - copied, page); if (ret < 0) - return ret; + goto out; } SetPageUptodate(page); } @@ -206,10 +206,12 @@ int afs_write_end(struct file *file, struct address_space *mapping, set_page_dirty(page); if (PageDirty(page)) _debug("dirtied"); + ret = copied; + +out: unlock_page(page); put_page(page); - - return copied; + return ret; } /*