bcachefs: Improved btree write statistics
This replaces sysfs btree_avg_write_size with btree_write_stats, which now breaks out statistics by the source of the btree write. Btree writes that are too small are a source of inefficiency, and excessive btree resort overhead - this will let us see what's causing them. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
8852501fe5
commit
46fee692ee
|
@ -596,6 +596,23 @@ typedef struct {
|
|||
#define BCACHEFS_ROOT_SUBVOL_INUM \
|
||||
((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
|
||||
|
||||
#define BCH_BTREE_WRITE_TYPES() \
|
||||
x(initial, 0) \
|
||||
x(init_next_bset, 1) \
|
||||
x(cache_reclaim, 2) \
|
||||
x(journal_reclaim, 3) \
|
||||
x(interior, 4)
|
||||
|
||||
enum btree_write_type {
|
||||
#define x(t, n) BTREE_WRITE_##t,
|
||||
BCH_BTREE_WRITE_TYPES()
|
||||
#undef x
|
||||
BTREE_WRITE_TYPE_NR,
|
||||
};
|
||||
|
||||
#define BTREE_WRITE_TYPE_MASK (roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1)
|
||||
#define BTREE_WRITE_TYPE_BITS ilog2(BTREE_WRITE_TYPE_MASK)
|
||||
|
||||
struct bch_fs {
|
||||
struct closure cl;
|
||||
|
||||
|
@ -705,6 +722,13 @@ struct bch_fs {
|
|||
struct workqueue_struct *btree_interior_update_worker;
|
||||
struct work_struct btree_interior_update_work;
|
||||
|
||||
/* btree_io.c: */
|
||||
spinlock_t btree_write_error_lock;
|
||||
struct btree_write_stats {
|
||||
atomic64_t nr;
|
||||
atomic64_t bytes;
|
||||
} btree_write_stats[BTREE_WRITE_TYPE_NR];
|
||||
|
||||
/* btree_iter.c: */
|
||||
struct mutex btree_trans_lock;
|
||||
struct list_head btree_trans_list;
|
||||
|
@ -880,11 +904,6 @@ mempool_t bio_bounce_pages;
|
|||
struct bio_set dio_write_bioset;
|
||||
struct bio_set dio_read_bioset;
|
||||
|
||||
|
||||
atomic64_t btree_writes_nr;
|
||||
atomic64_t btree_writes_sectors;
|
||||
spinlock_t btree_write_error_lock;
|
||||
|
||||
/* ERRORS */
|
||||
struct list_head fsck_errors;
|
||||
struct mutex fsck_error_lock;
|
||||
|
|
|
@ -241,9 +241,11 @@ wait_on_io:
|
|||
* the post write cleanup:
|
||||
*/
|
||||
if (bch2_verify_btree_ondisk)
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_intent, 0);
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_intent,
|
||||
BTREE_WRITE_cache_reclaim);
|
||||
else
|
||||
__bch2_btree_node_write(c, b, 0);
|
||||
__bch2_btree_node_write(c, b,
|
||||
BTREE_WRITE_cache_reclaim);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
|
@ -347,7 +349,7 @@ restart:
|
|||
six_trylock_read(&b->c.lock)) {
|
||||
list_move(&bc->live, &b->list);
|
||||
mutex_unlock(&bc->lock);
|
||||
__bch2_btree_node_write(c, b, 0);
|
||||
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
|
||||
six_unlock_read(&b->c.lock);
|
||||
if (touched >= nr)
|
||||
goto out_nounlock;
|
||||
|
@ -624,6 +626,7 @@ out:
|
|||
b->flags = 0;
|
||||
b->written = 0;
|
||||
b->nsets = 0;
|
||||
b->write_type = 0;
|
||||
b->sib_u64s[0] = 0;
|
||||
b->sib_u64s[1] = 0;
|
||||
b->whiteout_u64s = 0;
|
||||
|
@ -1067,7 +1070,7 @@ wait_on_io:
|
|||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
|
||||
|
||||
if (btree_node_dirty(b)) {
|
||||
__bch2_btree_node_write(c, b, 0);
|
||||
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
goto wait_on_io;
|
||||
|
|
|
@ -471,7 +471,8 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
|
|||
};
|
||||
|
||||
if (log_u64s[1] >= (log_u64s[0] + log_u64s[2]) / 2) {
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_write, 0);
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_write,
|
||||
BTREE_WRITE_init_next_bset);
|
||||
reinit_iter = true;
|
||||
}
|
||||
}
|
||||
|
@ -1646,7 +1647,7 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
|||
} while ((v = cmpxchg(&b->flags, old, new)) != old);
|
||||
|
||||
if (new & (1U << BTREE_NODE_write_in_flight))
|
||||
__bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED);
|
||||
__bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|b->write_type);
|
||||
else
|
||||
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
|
||||
}
|
||||
|
@ -1795,6 +1796,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
|||
bool used_mempool;
|
||||
unsigned long old, new;
|
||||
bool validate_before_checksum = false;
|
||||
enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK;
|
||||
void *data;
|
||||
int ret;
|
||||
|
||||
|
@ -1841,6 +1843,12 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
|||
if (new & (1U << BTREE_NODE_need_write))
|
||||
return;
|
||||
do_write:
|
||||
if ((flags & BTREE_WRITE_ONLY_IF_NEED))
|
||||
type = b->write_type;
|
||||
b->write_type = 0;
|
||||
|
||||
BUG_ON((type == BTREE_WRITE_initial) != (b->written == 0));
|
||||
|
||||
atomic_dec(&c->btree_cache.dirty);
|
||||
|
||||
BUG_ON(btree_node_fake(b));
|
||||
|
@ -2015,8 +2023,8 @@ do_write:
|
|||
bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written =
|
||||
cpu_to_le16(b->written);
|
||||
|
||||
atomic64_inc(&c->btree_writes_nr);
|
||||
atomic64_add(sectors_to_write, &c->btree_writes_sectors);
|
||||
atomic64_inc(&c->btree_write_stats[type].nr);
|
||||
atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes);
|
||||
|
||||
INIT_WORK(&wbio->work, btree_write_submit);
|
||||
queue_work(c->io_complete_wq, &wbio->work);
|
||||
|
@ -2144,3 +2152,33 @@ bool bch2_btree_flush_all_writes(struct bch_fs *c)
|
|||
{
|
||||
return __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
|
||||
}
|
||||
|
||||
const char * const bch2_btree_write_types[] = {
|
||||
#define x(t, n) [n] = #t,
|
||||
BCH_BTREE_WRITE_TYPES()
|
||||
NULL
|
||||
};
|
||||
|
||||
void bch2_btree_write_stats_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
printbuf_tabstop_push(out, 20);
|
||||
printbuf_tabstop_push(out, 10);
|
||||
|
||||
prt_tab(out);
|
||||
prt_str(out, "nr");
|
||||
prt_tab(out);
|
||||
prt_str(out, "size");
|
||||
prt_newline(out);
|
||||
|
||||
for (unsigned i = 0; i < BTREE_WRITE_TYPE_NR; i++) {
|
||||
u64 nr = atomic64_read(&c->btree_write_stats[i].nr);
|
||||
u64 bytes = atomic64_read(&c->btree_write_stats[i].bytes);
|
||||
|
||||
prt_printf(out, "%s:", bch2_btree_write_types[i]);
|
||||
prt_tab(out);
|
||||
prt_u64(out, nr);
|
||||
prt_tab(out);
|
||||
prt_human_readable_u64(out, nr ? div64_u64(bytes, nr) : 0);
|
||||
prt_newline(out);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -139,8 +139,12 @@ void bch2_btree_complete_write(struct bch_fs *, struct btree *,
|
|||
|
||||
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
|
||||
|
||||
#define BTREE_WRITE_ONLY_IF_NEED (1U << 0)
|
||||
#define BTREE_WRITE_ALREADY_STARTED (1U << 1)
|
||||
enum btree_write_flags {
|
||||
__BTREE_WRITE_ONLY_IF_NEED = BTREE_WRITE_TYPE_BITS,
|
||||
__BTREE_WRITE_ALREADY_STARTED,
|
||||
};
|
||||
#define BTREE_WRITE_ONLY_IF_NEED (1U << __BTREE_WRITE_ONLY_IF_NEED )
|
||||
#define BTREE_WRITE_ALREADY_STARTED (1U << __BTREE_WRITE_ALREADY_STARTED)
|
||||
|
||||
void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned);
|
||||
void bch2_btree_node_write(struct bch_fs *, struct btree *,
|
||||
|
@ -219,4 +223,6 @@ static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
|
|||
bn->min_key = bpos_nosnap_successor(bn->min_key);
|
||||
}
|
||||
|
||||
void bch2_btree_write_stats_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_IO_H */
|
||||
|
|
|
@ -77,6 +77,7 @@ struct btree {
|
|||
u8 nsets;
|
||||
u8 nr_key_bits;
|
||||
u16 version_ondisk;
|
||||
u8 write_type;
|
||||
|
||||
struct bkey_format format;
|
||||
|
||||
|
|
|
@ -1308,6 +1308,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
|
|||
bch2_btree_bset_insert_key(trans, path, b, node_iter, insert);
|
||||
set_btree_node_dirty_acct(c, b);
|
||||
set_btree_node_need_write(b);
|
||||
b->write_type = BTREE_WRITE_interior;
|
||||
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
|
|
@ -282,6 +282,7 @@ static inline void push_whiteout(struct bch_fs *c, struct btree *b,
|
|||
struct bkey_packed k;
|
||||
|
||||
BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s);
|
||||
EBUG_ON(btree_node_just_written(b));
|
||||
|
||||
if (!bkey_pack_pos(&k, pos, b)) {
|
||||
struct bkey *u = (void *) &k;
|
||||
|
|
|
@ -181,6 +181,8 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
|||
new |= 1 << BTREE_NODE_need_write;
|
||||
} while ((v = cmpxchg(&b->flags, old, new)) != old);
|
||||
|
||||
b->write_type = BTREE_WRITE_journal_reclaim;
|
||||
|
||||
btree_node_write_if_need(c, b, SIX_LOCK_read);
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
|
|
|
@ -183,7 +183,7 @@ read_attribute(io_latency_stats_read);
|
|||
read_attribute(io_latency_stats_write);
|
||||
read_attribute(congested);
|
||||
|
||||
read_attribute(btree_avg_write_size);
|
||||
read_attribute(btree_write_stats);
|
||||
|
||||
read_attribute(btree_cache_size);
|
||||
read_attribute(compression_stats);
|
||||
|
@ -250,14 +250,6 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static size_t bch2_btree_avg_write_size(struct bch_fs *c)
|
||||
{
|
||||
u64 nr = atomic64_read(&c->btree_writes_nr);
|
||||
u64 sectors = atomic64_read(&c->btree_writes_sectors);
|
||||
|
||||
return nr ? div64_u64(sectors, nr) : 0;
|
||||
}
|
||||
|
||||
static long data_progress_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
long ret = 0;
|
||||
|
@ -396,7 +388,9 @@ SHOW(bch2_fs)
|
|||
sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
|
||||
|
||||
sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
|
||||
sysfs_hprint(btree_avg_write_size, bch2_btree_avg_write_size(c));
|
||||
|
||||
if (attr == &sysfs_btree_write_stats)
|
||||
bch2_btree_write_stats_to_text(out, c);
|
||||
|
||||
sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic);
|
||||
|
||||
|
@ -557,7 +551,7 @@ SYSFS_OPS(bch2_fs);
|
|||
struct attribute *bch2_fs_files[] = {
|
||||
&sysfs_minor,
|
||||
&sysfs_btree_cache_size,
|
||||
&sysfs_btree_avg_write_size,
|
||||
&sysfs_btree_write_stats,
|
||||
|
||||
&sysfs_promote_whole_extents,
|
||||
|
||||
|
|
Loading…
Reference in New Issue