bcachefs: bch_sb_field_downgrade

Add a new superblock section that contains a list of
  { minor version, recovery passes, errors_to_fix }

that is - a list of recovery passes that must be run when downgrading
past a given version, and a list of errors to silently fix.

The upcoming disk accounting rewrite is not going to be fully
compatible: we're going to have to regenerate accounting both when
upgrading to the new version, and also from downgrading from the new
version, since the new method of doing disk space accounting is a
completely different architecture based on deltas, and synchronizing
them for every jounal entry write to maintain compatibility is going to
be too expensive and impractical.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-12-29 15:25:07 -05:00
parent 8b16413cda
commit 84f1638795
10 changed files with 255 additions and 9 deletions

View File

@ -71,6 +71,7 @@ bcachefs-y := \
reflink.o \
replicas.o \
sb-clean.o \
sb-downgrade.o \
sb-errors.o \
sb-members.o \
siphash.o \

View File

@ -1220,7 +1220,8 @@ struct bch_sb_field {
x(counters, 10) \
x(members_v2, 11) \
x(errors, 12) \
x(ext, 13)
x(ext, 13) \
x(downgrade, 14)
enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr,
@ -1638,6 +1639,18 @@ struct bch_sb_field_ext {
__le64 errors_silent[8];
};
struct bch_sb_field_downgrade_entry {
__le16 version;
__le64 recovery_passes[2];
__le16 nr_errors;
__le16 errors[] __counted_by(nr_errors);
} __packed __aligned(2);
struct bch_sb_field_downgrade {
struct bch_sb_field field;
struct bch_sb_field_downgrade_entry entries[];
};
/* Superblock: */
/*
@ -1651,6 +1664,11 @@ struct bch_sb_field_ext {
#define RECOVERY_PASS_ALL_FSCK (1ULL << 63)
/*
* field 1: version name
* field 2: BCH_VERSION(major, minor)
* field 3: recovery passess required on upgrade
*/
#define BCH_METADATA_VERSIONS() \
x(bkey_renumber, BCH_VERSION(0, 10), \
RECOVERY_PASS_ALL_FSCK) \

View File

@ -95,6 +95,7 @@
x(ENOSPC, ENOSPC_sb_members) \
x(ENOSPC, ENOSPC_sb_members_v2) \
x(ENOSPC, ENOSPC_sb_crypt) \
x(ENOSPC, ENOSPC_sb_downgrade) \
x(ENOSPC, ENOSPC_btree_slot) \
x(ENOSPC, ENOSPC_snapshot_tree) \
x(ENOENT, ENOENT_bkey_type_mismatch) \
@ -219,6 +220,7 @@
x(BCH_ERR_invalid_sb, invalid_sb_errors) \
x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \
x(BCH_ERR_invalid_sb, invalid_sb_ext) \
x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \
x(BCH_ERR_invalid, invalid_bkey) \
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
x(EIO, btree_node_read_err) \

View File

@ -27,6 +27,7 @@
#include "recovery.h"
#include "replicas.h"
#include "sb-clean.h"
#include "sb-downgrade.h"
#include "snapshot.h"
#include "subvolume.h"
#include "super-io.h"
@ -744,6 +745,27 @@ int bch2_fs_recovery(struct bch_fs *c)
printbuf_exit(&buf);
}
if (bch2_check_version_downgrade(c)) {
struct printbuf buf = PRINTBUF;
prt_str(&buf, "Version downgrade required:\n");
__le64 passes = ext->recovery_passes_required[0];
bch2_sb_set_downgrade(c,
BCH_VERSION_MINOR(bcachefs_metadata_version_current),
BCH_VERSION_MINOR(c->sb.version));
passes = ext->recovery_passes_required[0] & ~passes;
if (passes) {
prt_str(&buf, " running recovery passes: ");
prt_bitflags(&buf, bch2_recovery_passes,
bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
}
bch_info(c, "%s", buf.buf);
printbuf_exit(&buf);
write_sb = true;
}
if (check_version_upgrade(c))
write_sb = true;
@ -1022,7 +1044,7 @@ int bch2_fs_initialize(struct bch_fs *c)
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
bch2_sb_maybe_downgrade(c);
bch2_check_version_downgrade(c);
if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
bch2_sb_upgrade(c, bcachefs_metadata_version_current);

View File

@ -332,8 +332,6 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
mutex_lock(&c->sb_lock);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
bch2_sb_maybe_downgrade(c);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS);
ret = bch2_write_super(c);

188
fs/bcachefs/sb-downgrade.c Normal file
View File

@ -0,0 +1,188 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Superblock section that contains a list of recovery passes to run when
* downgrading past a given version
*/
#include "bcachefs.h"
#include "darray.h"
#include "recovery.h"
#include "sb-downgrade.h"
#include "sb-errors.h"
#include "super-io.h"
/*
* Downgrade table:
* When dowgrading past certain versions, we need to run certain recovery passes
* and fix certain errors:
*
* x(version, recovery_passes, errors...)
*/
#define DOWNGRADE_TABLE()
struct downgrade_entry {
u64 recovery_passes;
u16 version;
u16 nr_errors;
const u16 *errors;
};
#define x(ver, passes, ...) static const u16 ver_##errors[] = { __VA_ARGS__ };
DOWNGRADE_TABLE()
#undef x
static const struct downgrade_entry downgrade_table[] = {
#define x(ver, passes, ...) { \
.recovery_passes = passes, \
.version = bcachefs_metadata_version_##ver,\
.nr_errors = ARRAY_SIZE(ver_##errors), \
.errors = ver_##errors, \
},
DOWNGRADE_TABLE()
#undef x
};
static inline const struct bch_sb_field_downgrade_entry *
downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e)
{
return (void *) &e->errors[le16_to_cpu(e->nr_errors)];
}
#define for_each_downgrade_entry(_d, _i) \
for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries; \
(void *) _i < vstruct_end(&(_d)->field) && \
(void *) &_i->errors[0] < vstruct_end(&(_d)->field); \
_i = downgrade_entry_next_c(_i))
static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
struct printbuf *err)
{
struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
for_each_downgrade_entry(e, i) {
if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) !=
BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) {
prt_printf(err, "downgrade entry with mismatched major version (%u != %u)",
BCH_VERSION_MAJOR(le16_to_cpu(i->version)),
BCH_VERSION_MAJOR(le16_to_cpu(sb->version)));
return -BCH_ERR_invalid_sb_downgrade;
}
}
return 0;
}
static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb,
struct bch_sb_field *f)
{
struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
if (out->nr_tabstops <= 1)
printbuf_tabstop_push(out, 16);
for_each_downgrade_entry(e, i) {
prt_str(out, "version:");
prt_tab(out);
bch2_version_to_text(out, le16_to_cpu(i->version));
prt_newline(out);
prt_str(out, "recovery passes:");
prt_tab(out);
prt_bitflags(out, bch2_recovery_passes,
bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0])));
prt_newline(out);
prt_str(out, "errors:");
prt_tab(out);
bool first = true;
for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
if (!first)
prt_char(out, ',');
first = false;
unsigned e = le16_to_cpu(i->errors[j]);
prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)");
}
prt_newline(out);
}
}
const struct bch_sb_field_ops bch_sb_field_ops_downgrade = {
.validate = bch2_sb_downgrade_validate,
.to_text = bch2_sb_downgrade_to_text,
};
int bch2_sb_downgrade_update(struct bch_fs *c)
{
darray_char table = {};
int ret = 0;
for (const struct downgrade_entry *src = downgrade_table;
src < downgrade_table + ARRAY_SIZE(downgrade_table);
src++) {
if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
continue;
struct bch_sb_field_downgrade_entry *dst;
unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors;
ret = darray_make_room(&table, bytes);
if (ret)
goto out;
dst = (void *) &darray_top(table);
dst->version = cpu_to_le16(src->version);
dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes);
dst->recovery_passes[1] = 0;
dst->nr_errors = cpu_to_le16(src->nr_errors);
for (unsigned i = 0; i < src->nr_errors; i++)
dst->errors[i] = cpu_to_le16(src->errors[i]);
table.nr += bytes;
}
struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64));
if (d && le32_to_cpu(d->field.u64s) > sb_u64s)
goto out;
d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s);
if (!d) {
ret = -BCH_ERR_ENOSPC_sb_downgrade;
goto out;
}
memcpy(d->entries, table.data, table.nr);
memset_u64s_tail(d->entries, 0, table.nr);
out:
darray_exit(&table);
return ret;
}
void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
{
struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
if (!d)
return;
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
for_each_downgrade_entry(d, i) {
unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version));
if (new_minor < minor && minor <= old_minor) {
ext->recovery_passes_required[0] |= i->recovery_passes[0];
ext->recovery_passes_required[1] |= i->recovery_passes[1];
for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
unsigned e = le16_to_cpu(i->errors[j]);
if (e < BCH_SB_ERR_MAX)
__set_bit(e, c->sb.errors_silent);
if (e < sizeof(ext->errors_silent) * 8)
ext->errors_silent[e / 64] |= cpu_to_le64(BIT_ULL(e % 64));
}
}
}
}

View File

@ -0,0 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_SB_DOWNGRADE_H
#define _BCACHEFS_SB_DOWNGRADE_H
extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade;
int bch2_sb_downgrade_update(struct bch_fs *);
void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned);
#endif /* _BCACHEFS_SB_DOWNGRADE_H */

View File

@ -20,9 +20,7 @@ static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id
static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e)
{
return e
? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0])
: 0;
return bch2_sb_field_nr_entries(e);
}
static inline unsigned bch2_sb_field_errors_u64s(unsigned nr)

View File

@ -13,6 +13,7 @@
#include "replicas.h"
#include "quota.h"
#include "sb-clean.h"
#include "sb-downgrade.h"
#include "sb-errors.h"
#include "sb-members.h"
#include "super-io.h"
@ -939,6 +940,7 @@ int bch2_write_super(struct bch_fs *c)
bch2_sb_members_from_cpu(c);
bch2_sb_members_cpy_v2_v1(&c->disk_sb);
bch2_sb_errors_from_cpu(c);
bch2_sb_downgrade_update(c);
for_each_online_member(ca, c, i)
bch2_sb_from_fs(c, ca);
@ -1062,8 +1064,10 @@ void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
}
/* Downgrade if superblock is at a higher version than currently supported: */
void bch2_sb_maybe_downgrade(struct bch_fs *c)
bool bch2_check_version_downgrade(struct bch_fs *c)
{
bool ret = bcachefs_metadata_version_current < c->sb.version;
lockdep_assert_held(&c->sb_lock);
/*
@ -1077,12 +1081,17 @@ void bch2_sb_maybe_downgrade(struct bch_fs *c)
if (c->sb.version_min > bcachefs_metadata_version_current)
c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
return ret;
}
void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version)
{
lockdep_assert_held(&c->sb_lock);
if (BCH_VERSION_MAJOR(new_version) >
BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
bch2_sb_field_resize(&c->disk_sb, downgrade, 0);
c->disk_sb.sb->version = cpu_to_le16(new_version);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
}

View File

@ -93,7 +93,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
__bch2_check_set_feature(c, feat);
}
void bch2_sb_maybe_downgrade(struct bch_fs *);
bool bch2_check_version_downgrade(struct bch_fs *);
void bch2_sb_upgrade(struct bch_fs *, unsigned);
void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,