for-6.8-rc5-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmXV2z8ACgkQxWXV+ddt
 WDsudxAAoKcp1DbuOtaOzG/XVnIKt36drK4cwyZnGo9PZ9vlgT6k+T0efto4DkOF
 fNWy2d/9iGy9RHy4oxZL6ceb3rcWW0NbhiKHeTPNqL4ZCPa7t6bxMWXSYBh6pYgZ
 6EUS6H9Don05F7rQs8rERc+VIW6u1HFTLn4wS1cmlTyTQzZwlk9B2V6KtDtHBi0k
 B4CCxY6jX2bl7BncXdYteb13Xjg7+JnWvfSKb7ouSVnL8VEcGG13QkPFNV2Xsoi2
 uDDsw+QKBEcPNgBIubBUwbLS5V5vYa1H1meUFJkciaeblHVlVIMN3h7+Y8VNKMTC
 qpxEo3Hx6oqmw9LdEIU7WsvFs0JJum2fKOjOx3vr1d3AiyFG6W6lrm1fKwnp3dt7
 dHdAYuo8+Q4rirGlMDcEoYqpy7AcV8QqtSYajdrdpB1dqHcHhukSNqJ0dx5lYElU
 HtnMXD9vLc4uJDcl9Z1aTWEmB+7nj5HwukSnTqQhgwpCZM7mrz6pe1DuD5iKinG6
 Yth9QFhgcbcnchPA+3SOtC6uuje9chHo6L6eTVacnKAKyKgLY8qTTsh3zYQNVhMX
 M2aWcAkizq20vKe7JFxs7M/tClyuswTjOP6RYzeayY21Rn8gGwe7uhXhv5MKpEh5
 TjXjiixsrxxsyyaED7Kl69i54BvmM/TI35p7Jbx6Ln7PPD8lf8M=
 =pXyL
 -----END PGP SIGNATURE-----

Merge tag 'for-6.8-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - Fix a deadlock in fiemap.

   There was a big lock around the whole operation that can interfere
   with a page fault and mkwrite.

   Reducing the lock scope can also speed up fiemap

 - Fix range condition for extent defragmentation which could lead to
   worse layout in some cases

* tag 'for-6.8-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix deadlock with fiemap and extent locking
  btrfs: defrag: avoid unnecessary defrag caused by incorrect extent size
This commit is contained in:
Linus Torvalds 2024-02-21 08:45:07 -08:00
commit 8da8d88455
2 changed files with 46 additions and 18 deletions

View File

@ -1046,7 +1046,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
goto add;
/* Skip too large extent */
if (range_len >= extent_thresh)
if (em->len >= extent_thresh)
goto next;
/*

View File

@ -2689,16 +2689,34 @@ static int fiemap_process_hole(struct btrfs_inode *inode,
* it beyond i_size.
*/
while (cur_offset < end && cur_offset < i_size) {
struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
u64 prealloc_start;
u64 lockstart;
u64 lockend;
u64 prealloc_len = 0;
bool delalloc;
lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
lockend = round_up(end, inode->root->fs_info->sectorsize);
/*
* We are only locking for the delalloc range because that's the
* only thing that can change here. With fiemap we have a lock
* on the inode, so no buffered or direct writes can happen.
*
* However mmaps and normal page writeback will cause this to
* change arbitrarily. We have to lock the extent lock here to
* make sure that nobody messes with the tree while we're doing
* btrfs_find_delalloc_in_range.
*/
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
delalloc_cached_state,
&delalloc_start,
&delalloc_end);
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
break;
@ -2866,15 +2884,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
const u64 ino = btrfs_ino(inode);
struct extent_state *cached_state = NULL;
struct extent_state *delalloc_cached_state = NULL;
struct btrfs_path *path;
struct fiemap_cache cache = { 0 };
struct btrfs_backref_share_check_ctx *backref_ctx;
u64 last_extent_end;
u64 prev_extent_end;
u64 lockstart;
u64 lockend;
u64 range_start;
u64 range_end;
const u64 sectorsize = inode->root->fs_info->sectorsize;
bool stopped = false;
int ret;
@ -2885,12 +2903,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
goto out;
}
lockstart = round_down(start, inode->root->fs_info->sectorsize);
lockend = round_up(start + len, inode->root->fs_info->sectorsize);
prev_extent_end = lockstart;
range_start = round_down(start, sectorsize);
range_end = round_up(start + len, sectorsize);
prev_extent_end = range_start;
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
if (ret < 0)
@ -2898,7 +2915,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
btrfs_release_path(path);
path->reada = READA_FORWARD;
ret = fiemap_search_slot(inode, path, lockstart);
ret = fiemap_search_slot(inode, path, range_start);
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
@ -2910,7 +2927,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
goto check_eof_delalloc;
}
while (prev_extent_end < lockend) {
while (prev_extent_end < range_end) {
struct extent_buffer *leaf = path->nodes[0];
struct btrfs_file_extent_item *ei;
struct btrfs_key key;
@ -2933,19 +2950,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
* The first iteration can leave us at an extent item that ends
* before our range's start. Move to the next item.
*/
if (extent_end <= lockstart)
if (extent_end <= range_start)
goto next_item;
backref_ctx->curr_leaf_bytenr = leaf->start;
/* We have in implicit hole (NO_HOLES feature enabled). */
if (prev_extent_end < key.offset) {
const u64 range_end = min(key.offset, lockend) - 1;
const u64 hole_end = min(key.offset, range_end) - 1;
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state,
backref_ctx, 0, 0, 0,
prev_extent_end, range_end);
prev_extent_end, hole_end);
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
@ -2955,7 +2972,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
}
/* We've reached the end of the fiemap range, stop. */
if (key.offset >= lockend) {
if (key.offset >= range_end) {
stopped = true;
break;
}
@ -3049,29 +3066,41 @@ check_eof_delalloc:
btrfs_free_path(path);
path = NULL;
if (!stopped && prev_extent_end < lockend) {
if (!stopped && prev_extent_end < range_end) {
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state, backref_ctx,
0, 0, 0, prev_extent_end, lockend - 1);
0, 0, 0, prev_extent_end, range_end - 1);
if (ret < 0)
goto out_unlock;
prev_extent_end = lockend;
prev_extent_end = range_end;
}
if (cache.cached && cache.offset + cache.len >= last_extent_end) {
const u64 i_size = i_size_read(&inode->vfs_inode);
if (prev_extent_end < i_size) {
struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
u64 lockstart;
u64 lockend;
bool delalloc;
lockstart = round_down(prev_extent_end, sectorsize);
lockend = round_up(i_size, sectorsize);
/*
* See the comment in fiemap_process_hole as to why
* we're doing the locking here.
*/
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode,
prev_extent_end,
i_size - 1,
&delalloc_cached_state,
&delalloc_start,
&delalloc_end);
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
cache.flags |= FIEMAP_EXTENT_LAST;
} else {
@ -3082,7 +3111,6 @@ check_eof_delalloc:
ret = emit_last_fiemap_cache(fieinfo, &cache);
out_unlock:
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
out:
free_extent_state(delalloc_cached_state);