bcachefs: Fix rebalance_work accounting

rebalance_work was keying off of the presence of rebelance_opts in the
extent - but that was incorrect, we keep those around after rebalance
for indirect extents since the inode's options are not directly
available

Fixes: 20ac515a9c ("bcachefs: bch_acct_rebalance_work")
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-08-23 15:35:22 -04:00
parent d3204616a6
commit 49aa783039
5 changed files with 98 additions and 27 deletions

View file

@ -677,7 +677,8 @@ struct bch_sb_field_ext {
x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \
x(disk_accounting_v2, BCH_VERSION(1, 9)) \
x(disk_accounting_v3, BCH_VERSION(1, 10)) \
x(disk_accounting_inum, BCH_VERSION(1, 11))
x(disk_accounting_inum, BCH_VERSION(1, 11)) \
x(rebalance_work_acct_fix, BCH_VERSION(1, 12))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,

View file

@ -699,7 +699,8 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
static int __trigger_extent(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k,
enum btree_iter_update_trigger_flags flags)
enum btree_iter_update_trigger_flags flags,
s64 *replicas_sectors)
{
bool gc = flags & BTREE_TRIGGER_gc;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
@ -708,7 +709,6 @@ static int __trigger_extent(struct btree_trans *trans,
enum bch_data_type data_type = bkey_is_btree_ptr(k.k)
? BCH_DATA_btree
: BCH_DATA_user;
s64 replicas_sectors = 0;
int ret = 0;
struct disk_accounting_pos acc_replicas_key = {
@ -739,7 +739,7 @@ static int __trigger_extent(struct btree_trans *trans,
if (ret)
return ret;
} else if (!p.has_ec) {
replicas_sectors += disk_sectors;
*replicas_sectors += disk_sectors;
acc_replicas_key.replicas.devs[acc_replicas_key.replicas.nr_devs++] = p.ptr.dev;
} else {
ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags);
@ -777,7 +777,7 @@ static int __trigger_extent(struct btree_trans *trans,
}
if (acc_replicas_key.replicas.nr_devs) {
ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, &replicas_sectors, 1, gc);
ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, replicas_sectors, 1, gc);
if (ret)
return ret;
}
@ -787,7 +787,7 @@ static int __trigger_extent(struct btree_trans *trans,
.type = BCH_DISK_ACCOUNTING_snapshot,
.snapshot.id = k.k->p.snapshot,
};
ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, &replicas_sectors, 1, gc);
ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc);
if (ret)
return ret;
}
@ -807,7 +807,7 @@ static int __trigger_extent(struct btree_trans *trans,
.type = BCH_DISK_ACCOUNTING_btree,
.btree.id = btree_id,
};
ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc);
ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc);
if (ret)
return ret;
} else {
@ -819,22 +819,13 @@ static int __trigger_extent(struct btree_trans *trans,
s64 v[3] = {
insert ? 1 : -1,
insert ? k.k->size : -((s64) k.k->size),
replicas_sectors,
*replicas_sectors,
};
ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc);
if (ret)
return ret;
}
if (bch2_bkey_rebalance_opts(k)) {
struct disk_accounting_pos acc = {
.type = BCH_DISK_ACCOUNTING_rebalance_work,
};
ret = bch2_disk_accounting_mod(trans, &acc, &replicas_sectors, 1, gc);
if (ret)
return ret;
}
return 0;
}
@ -843,6 +834,7 @@ int bch2_trigger_extent(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s new,
enum btree_iter_update_trigger_flags flags)
{
struct bch_fs *c = trans->c;
struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(new.s_c);
struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(old);
unsigned new_ptrs_bytes = (void *) new_ptrs.end - (void *) new_ptrs.start;
@ -858,22 +850,54 @@ int bch2_trigger_extent(struct btree_trans *trans,
new_ptrs_bytes))
return 0;
if (flags & BTREE_TRIGGER_transactional) {
struct bch_fs *c = trans->c;
int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) -
(int) bch2_bkey_needs_rebalance(c, old);
if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
s64 old_replicas_sectors = 0, new_replicas_sectors = 0;
if (mod) {
if (old.k->type) {
int ret = __trigger_extent(trans, btree, level, old,
flags & ~BTREE_TRIGGER_insert,
&old_replicas_sectors);
if (ret)
return ret;
}
if (new.k->type) {
int ret = __trigger_extent(trans, btree, level, new.s_c,
flags & ~BTREE_TRIGGER_overwrite,
&new_replicas_sectors);
if (ret)
return ret;
}
int need_rebalance_delta = 0;
s64 need_rebalance_sectors_delta = 0;
s64 s = bch2_bkey_sectors_need_rebalance(c, old);
need_rebalance_delta -= s != 0;
need_rebalance_sectors_delta -= s;
s = bch2_bkey_sectors_need_rebalance(c, old);
need_rebalance_delta += s != 0;
need_rebalance_sectors_delta += s;
if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) {
int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
new.k->p, mod > 0);
new.k->p, need_rebalance_delta > 0);
if (ret)
return ret;
}
if (need_rebalance_sectors_delta) {
struct disk_accounting_pos acc = {
.type = BCH_DISK_ACCOUNTING_rebalance_work,
};
int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1,
flags & BTREE_TRIGGER_gc);
if (ret)
return ret;
}
}
if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc))
return trigger_run_overwrite_then_insert(__trigger_extent, trans, btree, level, old, new, flags);
return 0;
}

View file

@ -1379,6 +1379,45 @@ bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k)
return r != NULL;
}
static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k,
unsigned target, unsigned compression)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
u64 sectors = 0;
if (compression) {
unsigned compression_type = bch2_compression_opt_to_type(compression);
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible ||
p.ptr.unwritten) {
sectors = 0;
goto incompressible;
}
if (!p.ptr.cached && p.crc.compression_type != compression_type)
sectors += p.crc.compressed_size;
}
}
incompressible:
if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) {
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, target))
sectors += p.crc.compressed_size;
}
return sectors;
}
u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
{
const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
return r ? __bch2_bkey_sectors_need_rebalance(c, k, r->target, r->compression) : 0;
}
int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k,
struct bch_io_opts *opts)
{

View file

@ -692,6 +692,7 @@ const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c);
unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c,
unsigned, unsigned);
bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c);
u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c);
int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *,
struct bch_io_opts *);

View file

@ -74,6 +74,9 @@
BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \
BCH_FSCK_ERR_accounting_key_junk_at_end) \
x(disk_accounting_inum, \
BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
BCH_FSCK_ERR_accounting_mismatch) \
x(rebalance_work_acct_fix, \
BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
BCH_FSCK_ERR_accounting_mismatch)
@ -108,7 +111,10 @@
BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \
BCH_FSCK_ERR_fs_usage_replicas_wrong, \
BCH_FSCK_ERR_accounting_replicas_not_marked, \
BCH_FSCK_ERR_bkey_version_in_future)
BCH_FSCK_ERR_bkey_version_in_future) \
x(rebalance_work_acct_fix, \
BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
BCH_FSCK_ERR_accounting_mismatch)
struct upgrade_downgrade_entry {
u64 recovery_passes;