for-6.7-rc5-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmV5rTIACgkQxWXV+ddt
 WDuLUg/+Ix/CeA+JY6VZMA2kBHMzmRexSjYONWfQwIL7LPBy4sOuSEaTZt+QQMs+
 AEKau1YfTgo7e9S2DlbZhIWp6P87VFui7Q1E99uJEmKelakvf94DbMrufPTTKjaD
 JG2KB6LsD59yWwfbGHEAVVNGSMRk2LDXzcUWMK6/uzu/7Bcr4ataOymWd86/blUV
 cw5g87uAHpBn+R1ARTf1CkqyYiI9UldNUJmW1q7dwxOyYG+weUtJImosw2Uda76y
 wQXAFQAH3vsFzTC+qjC9Vz7cnyAX9qAw48ODRH7rIT1BQ3yAFQbfXE20jJ/fSE+C
 lz3p05tA9373KAOtLUHmANBwe3NafCnlut6ZYRfpTcEzUslAO5PnajPaHh5Al7uC
 Iwdpy49byoyVFeNf0yECBsuDP8s86HlUALF8mdJabPI1Kl66MUea6KgS1oyO3pCB
 hfqLbpofV4JTywtIRLGQTQvzSwkjPHTbSwtZ9nftTw520a5f7memDu5vi4XzFd+B
 NrJxmz2DrMRlwrLgWg9OXXgx1riWPvHnIoqzjG5W6A9N74Ud1/oz7t3VzjGSQ5S2
 UikRB6iofPE0deD8IF6H6DvFfvQxU9d9BJ6IS9V2zRt5vdgJ2w08FlqbLZewSY4x
 iaQ+L7UYKDjC9hdosXVNu/6fAspyBVdSp2NbKk14fraZtNAoPNs=
 =uF/Q
 -----END PGP SIGNATURE-----

Merge tag 'for-6.7-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
  "Some fixes to quota accounting code, mostly around error handling and
   correctness:

   - free reserves on various error paths, after IO errors or
     transaction abort

   - don't clear reserved range at the folio release time, it'll be
     properly cleared after final write

   - fix integer overflow due to int used when passing around size of
     freed reservations

   - fix a regression in squota accounting that missed some cases with
     delayed refs"

* tag 'for-6.7-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: ensure releasing squota reserve on head refs
  btrfs: don't clear qgroup reserved bit in release_folio
  btrfs: free qgroup pertrans reserve on transaction abort
  btrfs: fix qgroup_free_reserved_data int overflow
  btrfs: free qgroup reserve when ORDERED_IOERR is set
This commit is contained in:
Linus Torvalds 2023-12-14 11:53:00 -08:00
commit bdb2701f0b
11 changed files with 116 additions and 50 deletions

View File

@ -199,7 +199,7 @@ void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
start = round_down(start, fs_info->sectorsize);
btrfs_free_reserved_data_space_noquota(fs_info, len);
btrfs_qgroup_free_data(inode, reserved, start, len);
btrfs_qgroup_free_data(inode, reserved, start, len, NULL);
}
/*

View File

@ -4799,6 +4799,32 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
}
}
static void btrfs_free_all_qgroup_pertrans(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *gang[8];
int i;
int ret;
spin_lock(&fs_info->fs_roots_radix_lock);
while (1) {
ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
(void **)gang, 0,
ARRAY_SIZE(gang),
BTRFS_ROOT_TRANS_TAG);
if (ret == 0)
break;
for (i = 0; i < ret; i++) {
struct btrfs_root *root = gang[i];
btrfs_qgroup_free_meta_all_pertrans(root);
radix_tree_tag_clear(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
}
}
spin_unlock(&fs_info->fs_roots_radix_lock);
}
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
struct btrfs_fs_info *fs_info)
{
@ -4827,6 +4853,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
EXTENT_DIRTY);
btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents);
btrfs_free_all_qgroup_pertrans(fs_info);
cur_trans->state =TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
}

View File

@ -1547,6 +1547,23 @@ out:
return ret;
}
static void free_head_ref_squota_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *href)
{
u64 root = href->owning_root;
/*
* Don't check must_insert_reserved, as this is called from contexts
* where it has already been unset.
*/
if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE ||
!href->is_data || !is_fstree(root))
return;
btrfs_qgroup_free_refroot(fs_info, root, href->reserved_bytes,
BTRFS_QGROUP_RSV_DATA);
}
static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *node,
@ -1569,7 +1586,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_squota_delta delta = {
.root = href->owning_root,
.num_bytes = node->num_bytes,
.rsv_bytes = href->reserved_bytes,
.is_data = true,
.is_inc = true,
.generation = trans->transid,
@ -1586,11 +1602,9 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
flags, ref->objectid,
ref->offset, &key,
node->ref_mod, href->owning_root);
free_head_ref_squota_rsv(trans->fs_info, href);
if (!ret)
ret = btrfs_record_squota_delta(trans->fs_info, &delta);
else
btrfs_qgroup_free_refroot(trans->fs_info, delta.root,
delta.rsv_bytes, BTRFS_QGROUP_RSV_DATA);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, node, parent, ref->root,
ref->objectid, ref->offset,
@ -1742,7 +1756,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
struct btrfs_squota_delta delta = {
.root = href->owning_root,
.num_bytes = fs_info->nodesize,
.rsv_bytes = 0,
.is_data = false,
.is_inc = true,
.generation = trans->transid,
@ -1774,8 +1787,10 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
int ret = 0;
if (TRANS_ABORTED(trans)) {
if (insert_reserved)
if (insert_reserved) {
btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
free_head_ref_squota_rsv(trans->fs_info, href);
}
return 0;
}
@ -1871,6 +1886,8 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head)
{
u64 ret = 0;
/*
* We had csum deletions accounted for in our delayed refs rsv, we need
* to drop the csum leaves for this update from our delayed_refs_rsv.
@ -1885,14 +1902,13 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
btrfs_delayed_refs_rsv_release(fs_info, 0, nr_csums);
return btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums);
ret = btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums);
}
if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE &&
head->must_insert_reserved && head->is_data)
btrfs_qgroup_free_refroot(fs_info, head->owning_root,
head->reserved_bytes, BTRFS_QGROUP_RSV_DATA);
/* must_insert_reserved can be set only if we didn't run the head ref. */
if (head->must_insert_reserved)
free_head_ref_squota_rsv(fs_info, head);
return 0;
return ret;
}
static int cleanup_ref_head(struct btrfs_trans_handle *trans,
@ -2033,6 +2049,12 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
* spin lock.
*/
must_insert_reserved = locked_ref->must_insert_reserved;
/*
* Unsetting this on the head ref relinquishes ownership of
* the rsv_bytes, so it is critical that every possible code
* path from here forward frees all reserves including qgroup
* reserve.
*/
locked_ref->must_insert_reserved = false;
extent_op = locked_ref->extent_op;
@ -3292,7 +3314,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_squota_delta delta = {
.root = delayed_ref_root,
.num_bytes = num_bytes,
.rsv_bytes = 0,
.is_data = is_data,
.is_inc = false,
.generation = btrfs_extent_generation(leaf, ei),
@ -4937,7 +4958,6 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
.root = root_objectid,
.num_bytes = ins->offset,
.generation = trans->transid,
.rsv_bytes = 0,
.is_data = true,
.is_inc = true,
};

View File

@ -2302,7 +2302,8 @@ static int try_release_extent_state(struct extent_io_tree *tree,
ret = 0;
} else {
u32 clear_bits = ~(EXTENT_LOCKED | EXTENT_NODATASUM |
EXTENT_DELALLOC_NEW | EXTENT_CTLBITS);
EXTENT_DELALLOC_NEW | EXTENT_CTLBITS |
EXTENT_QGROUP_RESERVED);
/*
* At this point we can safely clear everything except the

View File

@ -3192,7 +3192,7 @@ static long btrfs_fallocate(struct file *file, int mode,
qgroup_reserved -= range->len;
} else if (qgroup_reserved > 0) {
btrfs_qgroup_free_data(BTRFS_I(inode), data_reserved,
range->start, range->len);
range->start, range->len, NULL);
qgroup_reserved -= range->len;
}
list_del(&range->list);

View File

@ -688,7 +688,7 @@ out:
* And at reserve time, it's always aligned to page size, so
* just free one page here.
*/
btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE, NULL);
btrfs_free_path(path);
btrfs_end_transaction(trans);
return ret;
@ -5132,7 +5132,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
*/
if (state_flags & EXTENT_DELALLOC)
btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start,
end - start + 1);
end - start + 1, NULL);
clear_extent_bit(io_tree, start, end,
EXTENT_CLEAR_ALL_BITS | EXTENT_DO_ACCOUNTING,
@ -8059,7 +8059,7 @@ next:
* reserved data space.
* Since the IO will never happen for this page.
*/
btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur);
btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur, NULL);
if (!inode_evicting) {
clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_UPTODATE |
@ -9491,7 +9491,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
struct btrfs_path *path;
u64 start = ins->objectid;
u64 len = ins->offset;
int qgroup_released;
u64 qgroup_released = 0;
int ret;
memset(&stack_fi, 0, sizeof(stack_fi));
@ -9504,9 +9504,9 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
/* Encryption and other encoding is reserved and all 0 */
qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len);
if (qgroup_released < 0)
return ERR_PTR(qgroup_released);
ret = btrfs_qgroup_release_data(inode, file_offset, len, &qgroup_released);
if (ret < 0)
return ERR_PTR(ret);
if (trans) {
ret = insert_reserved_file_extent(trans, inode,
@ -10401,7 +10401,7 @@ out_delalloc_release:
btrfs_delalloc_release_metadata(inode, disk_num_bytes, ret < 0);
out_qgroup_free_data:
if (ret < 0)
btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes);
btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes, NULL);
out_free_data_space:
/*
* If btrfs_reserve_extent() succeeded, then we already decremented

View File

@ -152,11 +152,12 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
{
struct btrfs_ordered_extent *entry;
int ret;
u64 qgroup_rsv = 0;
if (flags &
((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
/* For nocow write, we can release the qgroup rsv right now */
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes, &qgroup_rsv);
if (ret < 0)
return ERR_PTR(ret);
} else {
@ -164,7 +165,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
* The ordered extent has reserved qgroup space, release now
* and pass the reserved number for qgroup_record to free.
*/
ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes);
ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes, &qgroup_rsv);
if (ret < 0)
return ERR_PTR(ret);
}
@ -182,7 +183,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
entry->inode = igrab(&inode->vfs_inode);
entry->compress_type = compress_type;
entry->truncated_len = (u64)-1;
entry->qgroup_rsv = ret;
entry->qgroup_rsv = qgroup_rsv;
entry->flags = flags;
refcount_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait);
@ -599,7 +600,9 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
release = entry->disk_num_bytes;
else
release = entry->num_bytes;
btrfs_delalloc_release_metadata(btrfs_inode, release, false);
btrfs_delalloc_release_metadata(btrfs_inode, release,
test_bit(BTRFS_ORDERED_IOERR,
&entry->flags));
}
percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,

View File

@ -4057,13 +4057,14 @@ int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
/* Free ranges specified by @reserved, normally in error path */
static int qgroup_free_reserved_data(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start, u64 len)
struct extent_changeset *reserved,
u64 start, u64 len, u64 *freed_ret)
{
struct btrfs_root *root = inode->root;
struct ulist_node *unode;
struct ulist_iterator uiter;
struct extent_changeset changeset;
int freed = 0;
u64 freed = 0;
int ret;
extent_changeset_init(&changeset);
@ -4104,7 +4105,9 @@ static int qgroup_free_reserved_data(struct btrfs_inode *inode,
}
btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed,
BTRFS_QGROUP_RSV_DATA);
ret = freed;
if (freed_ret)
*freed_ret = freed;
ret = 0;
out:
extent_changeset_release(&changeset);
return ret;
@ -4112,7 +4115,7 @@ out:
static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start, u64 len,
int free)
u64 *released, int free)
{
struct extent_changeset changeset;
int trace_op = QGROUP_RELEASE;
@ -4128,7 +4131,7 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
/* In release case, we shouldn't have @reserved */
WARN_ON(!free && reserved);
if (free && reserved)
return qgroup_free_reserved_data(inode, reserved, start, len);
return qgroup_free_reserved_data(inode, reserved, start, len, released);
extent_changeset_init(&changeset);
ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1,
EXTENT_QGROUP_RESERVED, &changeset);
@ -4143,7 +4146,8 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
btrfs_qgroup_free_refroot(inode->root->fs_info,
inode->root->root_key.objectid,
changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
ret = changeset.bytes_changed;
if (released)
*released = changeset.bytes_changed;
out:
extent_changeset_release(&changeset);
return ret;
@ -4162,9 +4166,10 @@ out:
* NOTE: This function may sleep for memory allocation.
*/
int btrfs_qgroup_free_data(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start, u64 len)
struct extent_changeset *reserved,
u64 start, u64 len, u64 *freed)
{
return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
return __btrfs_qgroup_release_data(inode, reserved, start, len, freed, 1);
}
/*
@ -4182,9 +4187,9 @@ int btrfs_qgroup_free_data(struct btrfs_inode *inode,
*
* NOTE: This function may sleep for memory allocation.
*/
int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len)
int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released)
{
return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
return __btrfs_qgroup_release_data(inode, NULL, start, len, released, 0);
}
static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes,
@ -4332,8 +4337,9 @@ static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
qgroup_rsv_release(fs_info, qgroup, num_bytes,
BTRFS_QGROUP_RSV_META_PREALLOC);
qgroup_rsv_add(fs_info, qgroup, num_bytes,
BTRFS_QGROUP_RSV_META_PERTRANS);
if (!sb_rdonly(fs_info->sb))
qgroup_rsv_add(fs_info, qgroup, num_bytes,
BTRFS_QGROUP_RSV_META_PERTRANS);
list_for_each_entry(glist, &qgroup->groups, next_group)
qgroup_iterator_add(&qgroup_list, glist->group);
@ -4655,6 +4661,17 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
*root = RB_ROOT;
}
void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes)
{
if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE)
return;
if (!is_fstree(root))
return;
btrfs_qgroup_free_refroot(fs_info, root, rsv_bytes, BTRFS_QGROUP_RSV_DATA);
}
int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info,
struct btrfs_squota_delta *delta)
{
@ -4699,8 +4716,5 @@ int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info,
out:
spin_unlock(&fs_info->qgroup_lock);
if (!ret && delta->rsv_bytes)
btrfs_qgroup_free_refroot(fs_info, root, delta->rsv_bytes,
BTRFS_QGROUP_RSV_DATA);
return ret;
}

View File

@ -274,8 +274,6 @@ struct btrfs_squota_delta {
u64 root;
/* The number of bytes in the extent being counted. */
u64 num_bytes;
/* The number of bytes reserved for this extent. */
u64 rsv_bytes;
/* The generation the extent was created in. */
u64 generation;
/* Whether we are using or freeing the extent. */
@ -358,10 +356,10 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
/* New io_tree based accurate qgroup reserve API */
int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len);
int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len);
int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released);
int btrfs_qgroup_free_data(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start,
u64 len);
u64 len, u64 *freed);
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type, bool enforce);
int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
@ -422,6 +420,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *eb);
void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans);
bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info);
void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes);
int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info,
struct btrfs_squota_delta *delta);

View File

@ -37,8 +37,6 @@
static struct kmem_cache *btrfs_trans_handle_cachep;
#define BTRFS_ROOT_TRANS_TAG 0
/*
* Transaction states and transitions
*

View File

@ -12,6 +12,9 @@
#include "ctree.h"
#include "misc.h"
/* Radix-tree tag for roots that are part of the trasaction. */
#define BTRFS_ROOT_TRANS_TAG 0
enum btrfs_trans_state {
TRANS_STATE_RUNNING,
TRANS_STATE_COMMIT_PREP,