shmem: move spinlock into shmem_recalc_inode() to fix quota support

Commit "shmem: fix quota lock nesting in huge hole handling" was not so
good: Smatch caught shmem_recalc_inode()'s shmem_inode_unacct_blocks()
descending into quota_send_warning(): where blocking GFP_NOFS is used,
yet shmem_recalc_inode() is called holding the shmem inode's info->lock.

Yes, both __dquot_alloc_space() and __dquot_free_space() are commented
"This operation can block, but only after everything is updated" - when
calling flush_warnings() at the end - both its print_warning() and its
quota_send_warning() may block.

Rework shmem_recalc_inode() to take the shmem inode's info->lock inside,
and drop it before calling shmem_inode_unacct_blocks().

And why were the spin_locks disabling interrupts?  That was just a relic
from when shmem_charge() and shmem_uncharge() were called while holding
i_pages xa_lock: stop disabling interrupts for info->lock now.

To help stop me from making the same mistake again, add a might_sleep()
into shmem_inode_acct_block() and shmem_inode_unacct_blocks(); and those
functions have grown, so let the compiler decide whether to inline them.

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/linux-fsdevel/ffd7ca34-7f2a-44ee-b05d-b54d920ce076@moroto.mountain/
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <29f48045-2cb5-7db-ecf1-72462f1bef5@google.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Hugh Dickins 2023-08-03 22:46:11 -07:00 committed by Christian Brauner
parent 2be4f05af7
commit 3c1b7528d8
1 changed files with 44 additions and 63 deletions

View File

@ -203,7 +203,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
}
static inline int shmem_inode_acct_block(struct inode *inode, long pages)
static int shmem_inode_acct_block(struct inode *inode, long pages)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
@ -212,6 +212,7 @@ static inline int shmem_inode_acct_block(struct inode *inode, long pages)
if (shmem_acct_block(info->flags, pages))
return err;
might_sleep(); /* when quotas */
if (sbinfo->max_blocks) {
if (percpu_counter_compare(&sbinfo->used_blocks,
sbinfo->max_blocks - pages) > 0)
@ -235,11 +236,12 @@ unacct:
return err;
}
static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
static void shmem_inode_unacct_blocks(struct inode *inode, long pages)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
might_sleep(); /* when quotas */
dquot_free_block_nodirty(inode, pages);
if (sbinfo->max_blocks)
@ -400,30 +402,45 @@ static void shmem_free_inode(struct super_block *sb)
/**
* shmem_recalc_inode - recalculate the block usage of an inode
* @inode: inode to recalc
* @alloced: the change in number of pages allocated to inode
* @swapped: the change in number of pages swapped from inode
*
* We have to calculate the free blocks since the mm can drop
* undirtied hole pages behind our back.
*
* But normally info->alloced == inode->i_mapping->nrpages + info->swapped
* So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
*
* It has to be called with the spinlock held.
*/
static void shmem_recalc_inode(struct inode *inode)
static void shmem_recalc_inode(struct inode *inode, long alloced, long swapped)
{
struct shmem_inode_info *info = SHMEM_I(inode);
long freed;
freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
if (freed > 0) {
spin_lock(&info->lock);
info->alloced += alloced;
info->swapped += swapped;
freed = info->alloced - info->swapped -
READ_ONCE(inode->i_mapping->nrpages);
/*
* Special case: whereas normally shmem_recalc_inode() is called
* after i_mapping->nrpages has already been adjusted (up or down),
* shmem_writepage() has to raise swapped before nrpages is lowered -
* to stop a racing shmem_recalc_inode() from thinking that a page has
* been freed. Compensate here, to avoid the need for a followup call.
*/
if (swapped > 0)
freed += swapped;
if (freed > 0)
info->alloced -= freed;
spin_unlock(&info->lock);
/* The quota case may block */
if (freed > 0)
shmem_inode_unacct_blocks(inode, freed);
}
}
bool shmem_charge(struct inode *inode, long pages)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct address_space *mapping = inode->i_mapping;
if (shmem_inode_acct_block(inode, pages))
@ -434,24 +451,16 @@ bool shmem_charge(struct inode *inode, long pages)
mapping->nrpages += pages;
xa_unlock_irq(&mapping->i_pages);
spin_lock_irq(&info->lock);
info->alloced += pages;
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
shmem_recalc_inode(inode, pages, 0);
return true;
}
void shmem_uncharge(struct inode *inode, long pages)
{
struct shmem_inode_info *info = SHMEM_I(inode);
/* pages argument is currently unused: keep it to help debugging */
/* nrpages adjustment done by __filemap_remove_folio() or caller */
spin_lock_irq(&info->lock);
shmem_recalc_inode(inode);
/* which has called shmem_inode_unacct_blocks() if necessary */
spin_unlock_irq(&info->lock);
shmem_recalc_inode(inode, 0, 0);
}
/*
@ -1091,10 +1100,7 @@ whole_folios:
folio_batch_release(&fbatch);
}
spin_lock_irq(&info->lock);
info->swapped -= nr_swaps_freed;
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
shmem_recalc_inode(inode, 0, -nr_swaps_freed);
}
void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
@ -1112,11 +1118,9 @@ static int shmem_getattr(struct mnt_idmap *idmap,
struct inode *inode = path->dentry->d_inode;
struct shmem_inode_info *info = SHMEM_I(inode);
if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
spin_lock_irq(&info->lock);
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
}
if (info->alloced - info->swapped != inode->i_mapping->nrpages)
shmem_recalc_inode(inode, 0, 0);
if (info->fsflags & FS_APPEND_FL)
stat->attributes |= STATX_ATTR_APPEND;
if (info->fsflags & FS_IMMUTABLE_FL)
@ -1501,11 +1505,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
if (add_to_swap_cache(folio, swap,
__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN,
NULL) == 0) {
spin_lock_irq(&info->lock);
shmem_recalc_inode(inode);
info->swapped++;
spin_unlock_irq(&info->lock);
shmem_recalc_inode(inode, 0, 1);
swap_shmem_alloc(swap);
shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap));
@ -1776,7 +1776,6 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
struct folio *folio, swp_entry_t swap)
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
swp_entry_t swapin_error;
void *old;
@ -1789,16 +1788,12 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
folio_wait_writeback(folio);
delete_from_swap_cache(folio);
spin_lock_irq(&info->lock);
/*
* Don't treat swapin error folio as alloced. Otherwise inode->i_blocks won't
* be 0 when inode is released and thus trigger WARN_ON(inode->i_blocks) in
* shmem_evict_inode.
* Don't treat swapin error folio as alloced. Otherwise inode->i_blocks
* won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
* in shmem_evict_inode().
*/
info->alloced--;
info->swapped--;
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
shmem_recalc_inode(inode, -1, -1);
swap_free(swap);
}
@ -1885,10 +1880,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
if (error)
goto failed;
spin_lock_irq(&info->lock);
info->swapped--;
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
shmem_recalc_inode(inode, 0, -1);
if (sgp == SGP_WRITE)
folio_mark_accessed(folio);
@ -2053,12 +2045,9 @@ alloc_nohuge:
charge_mm);
if (error)
goto unacct;
folio_add_lru(folio);
spin_lock_irq(&info->lock);
info->alloced += folio_nr_pages(folio);
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
folio_add_lru(folio);
shmem_recalc_inode(inode, folio_nr_pages(folio), 0);
alloced = true;
if (folio_test_pmd_mappable(folio) &&
@ -2107,9 +2096,7 @@ clear:
if (alloced) {
folio_clear_dirty(folio);
filemap_remove_folio(folio);
spin_lock_irq(&info->lock);
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
shmem_recalc_inode(inode, 0, 0);
}
error = -EINVAL;
goto unlock;
@ -2135,9 +2122,7 @@ unlock:
folio_put(folio);
}
if (error == -ENOSPC && !once++) {
spin_lock_irq(&info->lock);
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
shmem_recalc_inode(inode, 0, 0);
goto repeat;
}
if (error == -EEXIST)
@ -2650,11 +2635,7 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
if (ret)
goto out_delete_from_cache;
spin_lock_irq(&info->lock);
info->alloced++;
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
shmem_recalc_inode(inode, 1, 0);
folio_unlock(folio);
return 0;
out_delete_from_cache: