bcachefs: Add IO error counts to bch_member

We now track IO errors per device since filesystem creation.

IO error counts can be viewed in sysfs, or with the 'bcachefs
show-super' command.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-10-25 16:29:37 -04:00
parent 5394fe9494
commit 94119eeb02
17 changed files with 225 additions and 86 deletions

View File

@ -502,6 +502,8 @@ struct bch_dev {
* Committed by bch2_write_super() -> bch_fs_mi_update()
*/
struct bch_member_cpu mi;
atomic64_t errors[BCH_MEMBER_ERROR_NR];
__uuid_t uuid;
char name[BDEVNAME_SIZE];

View File

@ -1268,6 +1268,18 @@ enum bch_iops_measurement {
BCH_IOPS_NR
};
#define BCH_MEMBER_ERROR_TYPES() \
x(read, 0) \
x(write, 1) \
x(checksum, 2)
enum bch_member_error_type {
#define x(t, n) BCH_MEMBER_ERROR_##t = n,
BCH_MEMBER_ERROR_TYPES()
#undef x
BCH_MEMBER_ERROR_NR
};
struct bch_member {
__uuid_t uuid;
__le64 nbuckets; /* device size */
@ -1278,6 +1290,9 @@ struct bch_member {
__le64 flags;
__le32 iops[4];
__le64 errors[BCH_MEMBER_ERROR_NR];
__le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
__le64 errors_reset_time;
};
#define BCH_MEMBER_V1_BYTES 56

View File

@ -934,8 +934,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
while (b->written < (ptr_written ?: btree_sectors(c))) {
unsigned sectors;
struct nonce nonce;
struct bch_csum csum;
bool first = !b->written;
bool csum_bad;
if (!b->written) {
i = &b->data->keys;
@ -946,9 +946,13 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
BSET_CSUM_TYPE(i));
nonce = btree_nonce(i, b->written << 9);
csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
btree_err_on(bch2_crc_cmp(csum, b->data->csum),
csum_bad = bch2_crc_cmp(b->data->csum,
csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data));
if (csum_bad)
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
btree_err_on(csum_bad,
-BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i,
"invalid checksum");
@ -976,9 +980,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
BSET_CSUM_TYPE(i));
nonce = btree_nonce(i, b->written << 9);
csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
csum_bad = bch2_crc_cmp(bne->csum,
csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne));
if (csum_bad)
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
btree_err_on(bch2_crc_cmp(csum, bne->csum),
btree_err_on(csum_bad,
-BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i,
"invalid checksum");
@ -1168,7 +1175,8 @@ static void btree_node_read_work(struct work_struct *work)
start:
printbuf_reset(&buf);
bch2_btree_pos_to_text(&buf, c, b);
bch2_dev_io_err_on(bio->bi_status, ca, "btree read error %s for %s",
bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
"btree read error %s for %s",
bch2_blk_status_to_str(bio->bi_status), buf.buf);
if (rb->have_ioref)
percpu_ref_put(&ca->io_ref);
@ -1749,7 +1757,8 @@ static void btree_node_write_endio(struct bio *bio)
if (wbio->have_ioref)
bch2_latency_acct(ca, wbio->submit_time, WRITE);
if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write error: %s",
if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
"btree write error: %s",
bch2_blk_status_to_str(bio->bi_status)) ||
bch2_meta_write_fault("btree")) {
spin_lock_irqsave(&c->btree_write_error_lock, flags);

View File

@ -373,7 +373,11 @@ static void ec_block_endio(struct bio *bio)
struct bch_dev *ca = ec_bio->ca;
struct closure *cl = bio->bi_private;
if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s error: %s",
if (bch2_dev_io_err_on(bio->bi_status, ca,
bio_data_dir(bio)
? BCH_MEMBER_ERROR_write
: BCH_MEMBER_ERROR_read,
"erasure coding %s error: %s",
bio_data_dir(bio) ? "write" : "read",
bch2_blk_status_to_str(bio->bi_status)))
clear_bit(ec_bio->idx, ec_bio->buf->valid);

View File

@ -56,8 +56,9 @@ void bch2_io_error_work(struct work_struct *work)
up_write(&c->state_lock);
}
void bch2_io_error(struct bch_dev *ca)
void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type)
{
atomic64_inc(&ca->errors[type]);
//queue_work(system_long_wq, &ca->io_error_work);
}

View File

@ -179,26 +179,26 @@ do { \
void bch2_io_error_work(struct work_struct *);
/* Does the error handling without logging a message */
void bch2_io_error(struct bch_dev *);
void bch2_io_error(struct bch_dev *, enum bch_member_error_type);
#define bch2_dev_io_err_on(cond, ca, ...) \
#define bch2_dev_io_err_on(cond, ca, _type, ...) \
({ \
bool _ret = (cond); \
\
if (_ret) { \
bch_err_dev_ratelimited(ca, __VA_ARGS__); \
bch2_io_error(ca); \
bch2_io_error(ca, _type); \
} \
_ret; \
})
#define bch2_dev_inum_io_err_on(cond, ca, ...) \
#define bch2_dev_inum_io_err_on(cond, ca, _type, ...) \
({ \
bool _ret = (cond); \
\
if (_ret) { \
bch_err_inum_offset_ratelimited(ca, __VA_ARGS__); \
bch2_io_error(ca); \
bch2_io_error(ca, _type); \
} \
_ret; \
})

View File

@ -643,7 +643,7 @@ csum_err:
"data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)",
rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
csum.hi, csum.lo, bch2_csum_types[crc.csum_type]);
bch2_io_error(ca);
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
goto out;
decompression_err:
@ -677,7 +677,7 @@ static void bch2_read_endio(struct bio *bio)
if (!rbio->split)
rbio->bio.bi_end_io = rbio->end_io;
if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
rbio->read_pos.inode,
rbio->read_pos.offset,
"data read error: %s",

View File

@ -637,7 +637,7 @@ static void bch2_write_endio(struct bio *bio)
struct bch_fs *c = wbio->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev);
if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
op->pos.inode,
wbio->inode_offset << 9,
"data write error: %s",

View File

@ -900,7 +900,7 @@ reread:
ret = submit_bio_wait(bio);
kfree(bio);
if (bch2_dev_io_err_on(ret, ca,
if (bch2_dev_io_err_on(ret, ca, BCH_MEMBER_ERROR_read,
"journal read error: sector %llu",
offset) ||
bch2_meta_read_fault("journal")) {
@ -956,7 +956,8 @@ reread:
ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
csum_good = jset_csum_good(c, j);
if (!csum_good)
if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum,
"journal checksum error"))
saw_bad = true;
ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
@ -1581,7 +1582,8 @@ static void journal_write_endio(struct bio *bio)
struct journal_buf *w = journal_last_unwritten_buf(j);
unsigned long flags;
if (bch2_dev_io_err_on(bio->bi_status, ca, "error writing journal entry %llu: %s",
if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
"error writing journal entry %llu: %s",
le64_to_cpu(w->data->seq),
bch2_blk_status_to_str(bio->bi_status)) ||
bch2_meta_write_fault("journal")) {

View File

@ -12,11 +12,6 @@
#define x(t, n, ...) [n] = #t,
const char * const bch2_iops_measurements[] = {
BCH_IOPS_MEASUREMENTS()
NULL
};
const char * const bch2_error_actions[] = {
BCH_ERROR_ACTIONS()
NULL

View File

@ -10,7 +10,6 @@
struct bch_fs;
extern const char * const bch2_iops_measurements[];
extern const char * const bch2_error_actions[];
extern const char * const bch2_fsck_fix_opts[];
extern const char * const bch2_version_upgrade_opts[];

View File

@ -7,6 +7,18 @@
#include "sb-members.h"
#include "super-io.h"
#define x(t, n, ...) [n] = #t,
static const char * const bch2_iops_measurements[] = {
BCH_IOPS_MEASUREMENTS()
NULL
};
char * const bch2_member_error_strs[] = {
BCH_MEMBER_ERROR_TYPES()
NULL
};
#undef x
/* Code for bch_sb_field_members_v1: */
static struct bch_member *members_v2_get_mut(struct bch_sb_field_members_v2 *mi, int i)
@ -92,7 +104,7 @@ int bch2_members_v2_init(struct bch_fs *c)
return sb_members_v2_resize_entries(c);
}
int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
{
struct bch_sb_field_members_v1 *mi1;
struct bch_sb_field_members_v2 *mi2;
@ -156,7 +168,6 @@ static void member_to_text(struct printbuf *out,
u64 bucket_size = le16_to_cpu(m.bucket_size);
u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size;
prt_printf(out, "Device:");
prt_tab(out);
prt_printf(out, "%u", i);
@ -164,6 +175,21 @@ static void member_to_text(struct printbuf *out,
printbuf_indent_add(out, 2);
prt_printf(out, "Label:");
prt_tab(out);
if (BCH_MEMBER_GROUP(&m)) {
unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
if (idx < disk_groups_nr(gi))
prt_printf(out, "%s (%u)",
gi->entries[idx].label, idx);
else
prt_printf(out, "(bad disk labels section)");
} else {
prt_printf(out, "(none)");
}
prt_newline(out);
prt_printf(out, "UUID:");
prt_tab(out);
pr_uuid(out, m.uuid.b);
@ -174,6 +200,13 @@ static void member_to_text(struct printbuf *out,
prt_units_u64(out, device_size << 9);
prt_newline(out);
for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
prt_printf(out, "%s errors:", bch2_member_error_strs[i]);
prt_tab(out);
prt_u64(out, le64_to_cpu(m.errors[i]));
prt_newline(out);
}
for (unsigned i = 0; i < BCH_IOPS_NR; i++) {
prt_printf(out, "%s iops:", bch2_iops_measurements[i]);
prt_tab(out);
@ -212,21 +245,6 @@ static void member_to_text(struct printbuf *out,
: "unknown");
prt_newline(out);
prt_printf(out, "Label:");
prt_tab(out);
if (BCH_MEMBER_GROUP(&m)) {
unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
if (idx < disk_groups_nr(gi))
prt_printf(out, "%s (%u)",
gi->entries[idx].label, idx);
else
prt_printf(out, "(bad disk labels section)");
} else {
prt_printf(out, "(none)");
}
prt_newline(out);
prt_printf(out, "Data allowed:");
prt_tab(out);
if (BCH_MEMBER_DATA_ALLOWED(&m))
@ -337,3 +355,72 @@ const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = {
.validate = bch2_sb_members_v2_validate,
.to_text = bch2_sb_members_v2_to_text,
};
void bch2_sb_members_from_cpu(struct bch_fs *c)
{
struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
struct bch_dev *ca;
unsigned i, e;
rcu_read_lock();
for_each_member_device_rcu(ca, c, i, NULL) {
struct bch_member *m = members_v2_get_mut(mi, i);
for (e = 0; e < BCH_MEMBER_ERROR_NR; e++)
m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e]));
}
rcu_read_unlock();
}
void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
struct bch_member m;
mutex_lock(&ca->fs->sb_lock);
m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
mutex_unlock(&ca->fs->sb_lock);
printbuf_tabstop_push(out, 12);
prt_str(out, "IO errors since filesystem creation");
prt_newline(out);
printbuf_indent_add(out, 2);
for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
prt_printf(out, "%s:", bch2_member_error_strs[i]);
prt_tab(out);
prt_u64(out, atomic64_read(&ca->errors[i]));
prt_newline(out);
}
printbuf_indent_sub(out, 2);
prt_str(out, "IO errors since ");
bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC);
prt_str(out, " ago");
prt_newline(out);
printbuf_indent_add(out, 2);
for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
prt_printf(out, "%s:", bch2_member_error_strs[i]);
prt_tab(out);
prt_u64(out, atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
prt_newline(out);
}
printbuf_indent_sub(out, 2);
}
void bch2_dev_errors_reset(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
struct bch_member *m;
mutex_lock(&c->sb_lock);
m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
m->errors_reset_time = ktime_get_real_seconds();
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
}

View File

@ -2,8 +2,10 @@
#ifndef _BCACHEFS_SB_MEMBERS_H
#define _BCACHEFS_SB_MEMBERS_H
extern char * const bch2_member_error_strs[];
int bch2_members_v2_init(struct bch_fs *c);
int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb);
int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb);
struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i);
struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i);
@ -179,4 +181,42 @@ static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
extern const struct bch_sb_field_ops bch_sb_field_ops_members_v1;
extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2;
static inline bool bch2_member_exists(struct bch_member *m)
{
return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
}
static inline bool bch2_dev_exists(struct bch_sb *sb,
unsigned dev)
{
if (dev < sb->nr_devices) {
struct bch_member m = bch2_sb_member_get(sb, dev);
return bch2_member_exists(&m);
}
return false;
}
static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
{
return (struct bch_member_cpu) {
.nbuckets = le64_to_cpu(mi->nbuckets),
.first_bucket = le16_to_cpu(mi->first_bucket),
.bucket_size = le16_to_cpu(mi->bucket_size),
.group = BCH_MEMBER_GROUP(mi),
.state = BCH_MEMBER_STATE(mi),
.discard = BCH_MEMBER_DISCARD(mi),
.data_allowed = BCH_MEMBER_DATA_ALLOWED(mi),
.durability = BCH_MEMBER_DURABILITY(mi)
? BCH_MEMBER_DURABILITY(mi) - 1
: 1,
.freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
.valid = bch2_member_exists(mi),
};
}
void bch2_sb_members_from_cpu(struct bch_fs *);
void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
void bch2_dev_errors_reset(struct bch_dev *);
#endif /* _BCACHEFS_SB_MEMBERS_H */

View File

@ -805,7 +805,12 @@ static void write_super_endio(struct bio *bio)
/* XXX: return errors directly */
if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write error: %s",
if (bch2_dev_io_err_on(bio->bi_status, ca,
bio_data_dir(bio)
? BCH_MEMBER_ERROR_write
: BCH_MEMBER_ERROR_read,
"superblock %s error: %s",
bio_data_dir(bio) ? "write" : "read",
bch2_blk_status_to_str(bio->bi_status)))
ca->sb_write_error = 1;
@ -892,7 +897,7 @@ int bch2_write_super(struct bch_fs *c)
SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN);
bch2_sb_counters_from_cpu(c);
bch_members_cpy_v2_v1(&c->disk_sb);
bch2_sb_members_cpy_v2_v1(&c->disk_sb);
for_each_online_member(ca, c, i)
bch2_sb_from_fs(c, ca);

View File

@ -78,41 +78,6 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
__bch2_check_set_feature(c, feat);
}
/* BCH_SB_FIELD_members_v1: */
static inline bool bch2_member_exists(struct bch_member *m)
{
return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
}
static inline bool bch2_dev_exists(struct bch_sb *sb,
unsigned dev)
{
if (dev < sb->nr_devices) {
struct bch_member m = bch2_sb_member_get(sb, dev);
return bch2_member_exists(&m);
}
return false;
}
static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
{
return (struct bch_member_cpu) {
.nbuckets = le64_to_cpu(mi->nbuckets),
.first_bucket = le16_to_cpu(mi->first_bucket),
.bucket_size = le16_to_cpu(mi->bucket_size),
.group = BCH_MEMBER_GROUP(mi),
.state = BCH_MEMBER_STATE(mi),
.discard = BCH_MEMBER_DISCARD(mi),
.data_allowed = BCH_MEMBER_DATA_ALLOWED(mi),
.durability = BCH_MEMBER_DURABILITY(mi)
? BCH_MEMBER_DURABILITY(mi) - 1
: 1,
.freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
.valid = bch2_member_exists(mi),
};
}
void bch2_sb_maybe_downgrade(struct bch_fs *);
void bch2_sb_upgrade(struct bch_fs *, unsigned);

View File

@ -1131,6 +1131,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
struct bch_member *member)
{
struct bch_dev *ca;
unsigned i;
ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca)
@ -1148,6 +1149,10 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
bch2_time_stats_init(&ca->io_latency[WRITE]);
ca->mi = bch2_mi_to_cpu(member);
for (i = 0; i < ARRAY_SIZE(member->errors); i++)
atomic64_set(&ca->errors[i], le64_to_cpu(member->errors[i]));
ca->uuid = member->uuid;
ca->nr_btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,

View File

@ -149,7 +149,9 @@ read_attribute(bucket_size);
read_attribute(first_bucket);
read_attribute(nbuckets);
rw_attribute(durability);
read_attribute(iodone);
read_attribute(io_done);
read_attribute(io_errors);
write_attribute(io_errors_reset);
read_attribute(io_latency_read);
read_attribute(io_latency_write);
@ -880,7 +882,7 @@ static const char * const bch2_rw[] = {
NULL
};
static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca)
static void dev_io_done_to_text(struct printbuf *out, struct bch_dev *ca)
{
int rw, i;
@ -923,8 +925,11 @@ SHOW(bch2_dev)
prt_char(out, '\n');
}
if (attr == &sysfs_iodone)
dev_iodone_to_text(out, ca);
if (attr == &sysfs_io_done)
dev_io_done_to_text(out, ca);
if (attr == &sysfs_io_errors)
bch2_dev_io_errors_to_text(out, ca);
sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ]));
sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE]));
@ -991,6 +996,9 @@ STORE(bch2_dev)
return ret;
}
if (attr == &sysfs_io_errors_reset)
bch2_dev_errors_reset(ca);
return size;
}
SYSFS_OPS(bch2_dev);
@ -1008,7 +1016,9 @@ struct attribute *bch2_dev_files[] = {
&sysfs_label,
&sysfs_has_data,
&sysfs_iodone,
&sysfs_io_done,
&sysfs_io_errors,
&sysfs_io_errors_reset,
&sysfs_io_latency_read,
&sysfs_io_latency_write,