for-6.8-rc6-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmXh1GgACgkQxWXV+ddt WDtnvA/7BN7BZ6QmwWv9UyxhgSBtzI19AXPi/kBsssnnjNuzXoHFaVHj68lQCCOB a4YjRxAg7nmwFGHdVDTdnwXgUECzqlVkeX9cXg1ZpJy0IfP9RriGedRlC/93z7aV pg6DnKMh2FlkibK7yO6kRBR8RYLc5aVIytqHXgUeqbaquuhj2Hh8EpqRo2X0RsoE wDXlK0qgrU8HyrA3fHdqKYPcm1+cYABGTCwGx65iRffy8vH+KFSAr71G8jOJVEUj DgNWJCpBjXJNs0dsKrik5oGmvLd3GDBKinNX7R2mAvMAMGWrL+fVVTVTfBS/clUT FBiVFNYCJuphMcO3Qjs6JIuEez0GuGEeh1m+tQ8W795At1FSiINtE5J7LjmJUl5X FuUwOUpxco1lTXBLX149Y9kk7AEOaqYxy0XbH4r5bbKyuzQegRGB9/qQX4sSaCt3 3T+Td9PvS+6Jo+CDO0dsYhG/h3bsHeHtHGR6f2CiO/m1zHDnTX9aYVcLMM3hsrMI 8OUEy1jkuKnDZQuZuIWES/3V9FlJL34dR3Cb236Pv/yIH1iujIc27g0qXrC1vzPg wnUL1wheLQ9IRLedXoiHtX2Y2ZfFQGQDrIKNCJFD+WNPkZYffih5QNTV/mPZmL80 9EbjoVTu+6rygzdD43R1RWvK0kFsY44RKhHreI8SItO+e3/0TAs= =hMf8 -----END PGP SIGNATURE----- Merge tag 'for-6.8-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: - fix freeing allocated id for anon dev when snapshot creation fails - fiemap fixes: - followup for a recent deadlock fix, ranges that fiemap can access can still race with ordered extent completion - make sure fiemap with SYNC flag does not race with writes * tag 'for-6.8-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: fix double free of anonymous device after snapshot creation failure btrfs: ensure fiemap doesn't race with writes when FIEMAP_FLAG_SYNC is given btrfs: fix race between ordered extent completion and fiemap
This commit is contained in:
commit
7505aa147a
|
@ -1307,12 +1307,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
|
|||
*
|
||||
* @objectid: root id
|
||||
* @anon_dev: preallocated anonymous block device number for new roots,
|
||||
* pass 0 for new allocation.
|
||||
* pass NULL for a new allocation.
|
||||
* @check_ref: whether to check root item references, If true, return -ENOENT
|
||||
* for orphan roots
|
||||
*/
|
||||
static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
|
||||
u64 objectid, dev_t anon_dev,
|
||||
u64 objectid, dev_t *anon_dev,
|
||||
bool check_ref)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
|
@ -1342,9 +1342,9 @@ again:
|
|||
* that common but still possible. In that case, we just need
|
||||
* to free the anon_dev.
|
||||
*/
|
||||
if (unlikely(anon_dev)) {
|
||||
free_anon_bdev(anon_dev);
|
||||
anon_dev = 0;
|
||||
if (unlikely(anon_dev && *anon_dev)) {
|
||||
free_anon_bdev(*anon_dev);
|
||||
*anon_dev = 0;
|
||||
}
|
||||
|
||||
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
|
||||
|
@ -1366,7 +1366,7 @@ again:
|
|||
goto fail;
|
||||
}
|
||||
|
||||
ret = btrfs_init_fs_root(root, anon_dev);
|
||||
ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0);
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
|
@ -1402,7 +1402,7 @@ fail:
|
|||
* root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
|
||||
* and once again by our caller.
|
||||
*/
|
||||
if (anon_dev)
|
||||
if (anon_dev && *anon_dev)
|
||||
root->anon_dev = 0;
|
||||
btrfs_put_root(root);
|
||||
return ERR_PTR(ret);
|
||||
|
@ -1418,7 +1418,7 @@ fail:
|
|||
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
|
||||
u64 objectid, bool check_ref)
|
||||
{
|
||||
return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
|
||||
return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1426,11 +1426,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
|
|||
* the anonymous block device id
|
||||
*
|
||||
* @objectid: tree objectid
|
||||
* @anon_dev: if zero, allocate a new anonymous block device or use the
|
||||
* parameter value
|
||||
* @anon_dev: if NULL, allocate a new anonymous block device or use the
|
||||
* parameter value if not NULL
|
||||
*/
|
||||
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
|
||||
u64 objectid, dev_t anon_dev)
|
||||
u64 objectid, dev_t *anon_dev)
|
||||
{
|
||||
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
|
||||
}
|
||||
|
|
|
@ -61,7 +61,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
|
|||
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
|
||||
u64 objectid, bool check_ref);
|
||||
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
|
||||
u64 objectid, dev_t anon_dev);
|
||||
u64 objectid, dev_t *anon_dev);
|
||||
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path,
|
||||
u64 objectid);
|
||||
|
|
|
@ -2480,6 +2480,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
|
|||
struct fiemap_cache *cache,
|
||||
u64 offset, u64 phys, u64 len, u32 flags)
|
||||
{
|
||||
u64 cache_end;
|
||||
int ret = 0;
|
||||
|
||||
/* Set at the end of extent_fiemap(). */
|
||||
|
@ -2489,15 +2490,102 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
|
|||
goto assign;
|
||||
|
||||
/*
|
||||
* Sanity check, extent_fiemap() should have ensured that new
|
||||
* fiemap extent won't overlap with cached one.
|
||||
* Not recoverable.
|
||||
* When iterating the extents of the inode, at extent_fiemap(), we may
|
||||
* find an extent that starts at an offset behind the end offset of the
|
||||
* previous extent we processed. This happens if fiemap is called
|
||||
* without FIEMAP_FLAG_SYNC and there are ordered extents completing
|
||||
* while we call btrfs_next_leaf() (through fiemap_next_leaf_item()).
|
||||
*
|
||||
* NOTE: Physical address can overlap, due to compression
|
||||
* For example we are in leaf X processing its last item, which is the
|
||||
* file extent item for file range [512K, 1M[, and after
|
||||
* btrfs_next_leaf() releases the path, there's an ordered extent that
|
||||
* completes for the file range [768K, 2M[, and that results in trimming
|
||||
* the file extent item so that it now corresponds to the file range
|
||||
* [512K, 768K[ and a new file extent item is inserted for the file
|
||||
* range [768K, 2M[, which may end up as the last item of leaf X or as
|
||||
* the first item of the next leaf - in either case btrfs_next_leaf()
|
||||
* will leave us with a path pointing to the new extent item, for the
|
||||
* file range [768K, 2M[, since that's the first key that follows the
|
||||
* last one we processed. So in order not to report overlapping extents
|
||||
* to user space, we trim the length of the previously cached extent and
|
||||
* emit it.
|
||||
*
|
||||
* Upon calling btrfs_next_leaf() we may also find an extent with an
|
||||
* offset smaller than or equals to cache->offset, and this happens
|
||||
* when we had a hole or prealloc extent with several delalloc ranges in
|
||||
* it, but after btrfs_next_leaf() released the path, delalloc was
|
||||
* flushed and the resulting ordered extents were completed, so we can
|
||||
* now have found a file extent item for an offset that is smaller than
|
||||
* or equals to what we have in cache->offset. We deal with this as
|
||||
* described below.
|
||||
*/
|
||||
if (cache->offset + cache->len > offset) {
|
||||
WARN_ON(1);
|
||||
return -EINVAL;
|
||||
cache_end = cache->offset + cache->len;
|
||||
if (cache_end > offset) {
|
||||
if (offset == cache->offset) {
|
||||
/*
|
||||
* We cached a dealloc range (found in the io tree) for
|
||||
* a hole or prealloc extent and we have now found a
|
||||
* file extent item for the same offset. What we have
|
||||
* now is more recent and up to date, so discard what
|
||||
* we had in the cache and use what we have just found.
|
||||
*/
|
||||
goto assign;
|
||||
} else if (offset > cache->offset) {
|
||||
/*
|
||||
* The extent range we previously found ends after the
|
||||
* offset of the file extent item we found and that
|
||||
* offset falls somewhere in the middle of that previous
|
||||
* extent range. So adjust the range we previously found
|
||||
* to end at the offset of the file extent item we have
|
||||
* just found, since this extent is more up to date.
|
||||
* Emit that adjusted range and cache the file extent
|
||||
* item we have just found. This corresponds to the case
|
||||
* where a previously found file extent item was split
|
||||
* due to an ordered extent completing.
|
||||
*/
|
||||
cache->len = offset - cache->offset;
|
||||
goto emit;
|
||||
} else {
|
||||
const u64 range_end = offset + len;
|
||||
|
||||
/*
|
||||
* The offset of the file extent item we have just found
|
||||
* is behind the cached offset. This means we were
|
||||
* processing a hole or prealloc extent for which we
|
||||
* have found delalloc ranges (in the io tree), so what
|
||||
* we have in the cache is the last delalloc range we
|
||||
* found while the file extent item we found can be
|
||||
* either for a whole delalloc range we previously
|
||||
* emmitted or only a part of that range.
|
||||
*
|
||||
* We have two cases here:
|
||||
*
|
||||
* 1) The file extent item's range ends at or behind the
|
||||
* cached extent's end. In this case just ignore the
|
||||
* current file extent item because we don't want to
|
||||
* overlap with previous ranges that may have been
|
||||
* emmitted already;
|
||||
*
|
||||
* 2) The file extent item starts behind the currently
|
||||
* cached extent but its end offset goes beyond the
|
||||
* end offset of the cached extent. We don't want to
|
||||
* overlap with a previous range that may have been
|
||||
* emmitted already, so we emit the currently cached
|
||||
* extent and then partially store the current file
|
||||
* extent item's range in the cache, for the subrange
|
||||
* going the cached extent's end to the end of the
|
||||
* file extent item.
|
||||
*/
|
||||
if (range_end <= cache_end)
|
||||
return 0;
|
||||
|
||||
if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC)))
|
||||
phys += cache_end - offset;
|
||||
|
||||
offset = cache_end;
|
||||
len = range_end - cache_end;
|
||||
goto emit;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2517,6 +2605,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
|
|||
return 0;
|
||||
}
|
||||
|
||||
emit:
|
||||
/* Not mergeable, need to submit cached one */
|
||||
ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
|
||||
cache->len, cache->flags);
|
||||
|
@ -2907,17 +2996,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
|
|||
range_end = round_up(start + len, sectorsize);
|
||||
prev_extent_end = range_start;
|
||||
|
||||
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
|
||||
|
||||
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
btrfs_release_path(path);
|
||||
|
||||
path->reada = READA_FORWARD;
|
||||
ret = fiemap_search_slot(inode, path, range_start);
|
||||
if (ret < 0) {
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
} else if (ret > 0) {
|
||||
/*
|
||||
* No file extent item found, but we may have delalloc between
|
||||
|
@ -2964,7 +3051,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
|
|||
backref_ctx, 0, 0, 0,
|
||||
prev_extent_end, hole_end);
|
||||
if (ret < 0) {
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
} else if (ret > 0) {
|
||||
/* fiemap_fill_next_extent() told us to stop. */
|
||||
stopped = true;
|
||||
|
@ -3020,7 +3107,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
|
|||
extent_gen,
|
||||
backref_ctx);
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
else if (ret > 0)
|
||||
flags |= FIEMAP_EXTENT_SHARED;
|
||||
}
|
||||
|
@ -3031,7 +3118,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
|
|||
}
|
||||
|
||||
if (ret < 0) {
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
} else if (ret > 0) {
|
||||
/* fiemap_fill_next_extent() told us to stop. */
|
||||
stopped = true;
|
||||
|
@ -3042,12 +3129,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
|
|||
next_item:
|
||||
if (fatal_signal_pending(current)) {
|
||||
ret = -EINTR;
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = fiemap_next_leaf_item(inode, path);
|
||||
if (ret < 0) {
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
} else if (ret > 0) {
|
||||
/* No more file extent items for this inode. */
|
||||
break;
|
||||
|
@ -3071,7 +3158,7 @@ check_eof_delalloc:
|
|||
&delalloc_cached_state, backref_ctx,
|
||||
0, 0, 0, prev_extent_end, range_end - 1);
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
prev_extent_end = range_end;
|
||||
}
|
||||
|
||||
|
@ -3109,9 +3196,6 @@ check_eof_delalloc:
|
|||
}
|
||||
|
||||
ret = emit_last_fiemap_cache(fieinfo, &cache);
|
||||
|
||||
out_unlock:
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
|
||||
out:
|
||||
free_extent_state(delalloc_cached_state);
|
||||
btrfs_free_backref_share_ctx(backref_ctx);
|
||||
|
|
|
@ -7835,6 +7835,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
|
|||
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
|
||||
int ret;
|
||||
|
||||
ret = fiemap_prep(inode, fieinfo, start, &len, 0);
|
||||
|
@ -7860,7 +7861,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
|||
return ret;
|
||||
}
|
||||
|
||||
return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
|
||||
btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED);
|
||||
|
||||
/*
|
||||
* We did an initial flush to avoid holding the inode's lock while
|
||||
* triggering writeback and waiting for the completion of IO and ordered
|
||||
* extents. Now after we locked the inode we do it again, because it's
|
||||
* possible a new write may have happened in between those two steps.
|
||||
*/
|
||||
if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
|
||||
ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX);
|
||||
if (ret) {
|
||||
btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ret = extent_fiemap(btrfs_inode, fieinfo, start, len);
|
||||
btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_writepages(struct address_space *mapping,
|
||||
|
|
|
@ -721,7 +721,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
|
|||
free_extent_buffer(leaf);
|
||||
leaf = NULL;
|
||||
|
||||
new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
|
||||
new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev);
|
||||
if (IS_ERR(new_root)) {
|
||||
ret = PTR_ERR(new_root);
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
|
|
|
@ -1834,7 +1834,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
|
||||
key.offset = (u64)-1;
|
||||
pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
|
||||
pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev);
|
||||
if (IS_ERR(pending->snap)) {
|
||||
ret = PTR_ERR(pending->snap);
|
||||
pending->snap = NULL;
|
||||
|
|
Loading…
Reference in New Issue