bcachefs: KEY_TYPE_inode_v3, metadata_version_inode_v3

Move bi_size and bi_sectors into the non-varint portion of the inode, so
that the write path can update them without going through the relatively
expensive unpack/pack operations.

Other changes:
 - Add a field for the offset of the varint section, so we can add new
   non-varint fields without needing a new inode type, like alloc_v3
 - Move bi_mode into the flags field, so that the varint section can be
   u64 aligned

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2022-10-21 13:21:03 -04:00
parent 47b323a0b0
commit 8dd69d9f64
7 changed files with 219 additions and 37 deletions

View File

@ -370,7 +370,8 @@ static inline void bkey_init(struct bkey *k)
x(set, 25) \
x(lru, 26) \
x(alloc_v4, 27) \
x(backpointer, 28)
x(backpointer, 28) \
x(inode_v3, 29)
enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name = nr,
@ -721,6 +722,21 @@ struct bch_inode_v2 {
__u8 fields[0];
} __packed __aligned(8);
struct bch_inode_v3 {
struct bch_val v;
__le64 bi_journal_seq;
__le64 bi_hash_seed;
__le64 bi_flags;
__le64 bi_sectors;
__le64 bi_size;
__le64 bi_version;
__u8 fields[0];
} __packed __aligned(8);
#define INODEv3_FIELDS_START_INITIAL 6
#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(u64))
struct bch_inode_generation {
struct bch_val v;
@ -732,7 +748,7 @@ struct bch_inode_generation {
* bi_subvol and bi_parent_subvol are only set for subvolume roots:
*/
#define BCH_INODE_FIELDS() \
#define BCH_INODE_FIELDS_v2() \
x(bi_atime, 96) \
x(bi_ctime, 96) \
x(bi_mtime, 96) \
@ -759,6 +775,31 @@ struct bch_inode_generation {
x(bi_subvol, 32) \
x(bi_parent_subvol, 32)
#define BCH_INODE_FIELDS_v3() \
x(bi_atime, 96) \
x(bi_ctime, 96) \
x(bi_mtime, 96) \
x(bi_otime, 96) \
x(bi_uid, 32) \
x(bi_gid, 32) \
x(bi_nlink, 32) \
x(bi_generation, 32) \
x(bi_dev, 32) \
x(bi_data_checksum, 8) \
x(bi_compression, 8) \
x(bi_project, 32) \
x(bi_background_compression, 8) \
x(bi_data_replicas, 8) \
x(bi_promote_target, 16) \
x(bi_foreground_target, 16) \
x(bi_background_target, 16) \
x(bi_erasure_code, 16) \
x(bi_fields_set, 16) \
x(bi_dir, 64) \
x(bi_dir_offset, 64) \
x(bi_subvol, 32) \
x(bi_parent_subvol, 32)
/* subset of BCH_INODE_FIELDS */
#define BCH_INODE_OPTS() \
x(data_checksum, 8) \
@ -815,6 +856,13 @@ LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32);
LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24);
LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31);
LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24);
LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31);
LE64_BITMASK(INODEv3_FIELDS_START,
struct bch_inode_v3, bi_flags, 31, 36);
LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52);
/* Dirents */
/*
@ -1499,7 +1547,8 @@ struct bch_sb_field_journal_seq_blacklist {
x(freespace, 19) \
x(alloc_v4, 20) \
x(new_data_types, 21) \
x(backpointers, 22)
x(backpointers, 22) \
x(inode_v3, 23)
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,

View File

@ -149,6 +149,7 @@ static unsigned bch2_key_types_allowed[] = {
(1U << KEY_TYPE_whiteout)|
(1U << KEY_TYPE_inode)|
(1U << KEY_TYPE_inode_v2)|
(1U << KEY_TYPE_inode_v3)|
(1U << KEY_TYPE_inode_generation),
[BKEY_TYPE_dirents] =
(1U << KEY_TYPE_deleted)|

View File

@ -1123,10 +1123,10 @@ int bch2_mark_inode(struct btree_trans *trans,
u64 journal_seq = trans->journal_res.seq;
if (flags & BTREE_TRIGGER_INSERT) {
struct bch_inode_v2 *v = (struct bch_inode_v2 *) new.v;
struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v;
BUG_ON(!journal_seq);
BUG_ON(new.k->type != KEY_TYPE_inode_v2);
BUG_ON(new.k->type != KEY_TYPE_inode_v3);
v->bi_journal_seq = cpu_to_le64(journal_seq);
}

View File

@ -60,11 +60,10 @@ static int inode_decode_field(const u8 *in, const u8 *end,
return bytes;
}
static inline void bch2_inode_pack_inlined(struct bch_fs *c,
struct bkey_inode_buf *packed,
static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
{
struct bkey_i_inode_v2 *k = &packed->inode;
struct bkey_i_inode_v3 *k = &packed->inode;
u8 *out = k->v.fields;
u8 *end = (void *) &packed[1];
u8 *last_nonzero_field = out;
@ -72,13 +71,17 @@ static inline void bch2_inode_pack_inlined(struct bch_fs *c,
unsigned bytes;
int ret;
bkey_inode_v2_init(&packed->inode.k_i);
bkey_inode_v3_init(&packed->inode.k_i);
packed->inode.k.p.offset = inode->bi_inum;
packed->inode.v.bi_journal_seq = cpu_to_le64(inode->bi_journal_seq);
packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags);
packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags);
packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode);
packed->inode.v.bi_sectors = cpu_to_le64(inode->bi_sectors);
packed->inode.v.bi_size = cpu_to_le64(inode->bi_size);
packed->inode.v.bi_version = cpu_to_le64(inode->bi_version);
SET_INODEv3_MODE(&packed->inode.v, inode->bi_mode);
SET_INODEv3_FIELDS_START(&packed->inode.v, INODEv3_FIELDS_START_CUR);
#define x(_name, _bits) \
nr_fields++; \
@ -99,7 +102,7 @@ static inline void bch2_inode_pack_inlined(struct bch_fs *c,
*out++ = 0; \
}
BCH_INODE_FIELDS()
BCH_INODE_FIELDS_v3()
#undef x
BUG_ON(out > end);
@ -110,7 +113,7 @@ static inline void bch2_inode_pack_inlined(struct bch_fs *c,
set_bkey_val_bytes(&packed->inode.k, bytes);
memset_u64s_tail(&packed->inode.v, 0, bytes);
SET_INODEv2_NR_FIELDS(&k->v, nr_fields);
SET_INODEv3_NR_FIELDS(&k->v, nr_fields);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
struct bch_inode_unpacked unpacked;
@ -120,21 +123,23 @@ static inline void bch2_inode_pack_inlined(struct bch_fs *c,
BUG_ON(ret);
BUG_ON(unpacked.bi_inum != inode->bi_inum);
BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed);
BUG_ON(unpacked.bi_sectors != inode->bi_sectors);
BUG_ON(unpacked.bi_size != inode->bi_size);
BUG_ON(unpacked.bi_version != inode->bi_version);
BUG_ON(unpacked.bi_mode != inode->bi_mode);
#define x(_name, _bits) if (unpacked._name != inode->_name) \
panic("unpacked %llu should be %llu", \
(u64) unpacked._name, (u64) inode->_name);
BCH_INODE_FIELDS()
BCH_INODE_FIELDS_v3()
#undef x
}
}
void bch2_inode_pack(struct bch_fs *c,
struct bkey_inode_buf *packed,
void bch2_inode_pack(struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
{
bch2_inode_pack_inlined(c, packed, inode);
bch2_inode_pack_inlined(packed, inode);
}
static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
@ -164,7 +169,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
unpacked->_name = field[1]; \
in += ret;
BCH_INODE_FIELDS()
BCH_INODE_FIELDS_v2()
#undef x
/* XXX: signal if there were more fields than expected? */
@ -203,15 +208,66 @@ static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked,
return -1; \
fieldnr++;
BCH_INODE_FIELDS()
BCH_INODE_FIELDS_v2()
#undef x
/* XXX: signal if there were more fields than expected? */
return 0;
}
int bch2_inode_unpack(struct bkey_s_c k,
struct bch_inode_unpacked *unpacked)
static int bch2_inode_unpack_v3(struct bkey_s_c k,
struct bch_inode_unpacked *unpacked)
{
struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
const u8 *in = inode.v->fields;
const u8 *end = bkey_val_end(inode);
unsigned nr_fields = INODEv3_NR_FIELDS(inode.v);
unsigned fieldnr = 0;
int ret;
u64 v[2];
unpacked->bi_inum = inode.k->p.offset;
unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
unpacked->bi_sectors = le64_to_cpu(inode.v->bi_sectors);
unpacked->bi_size = le64_to_cpu(inode.v->bi_size);
unpacked->bi_version = le64_to_cpu(inode.v->bi_version);
unpacked->bi_mode = INODEv3_MODE(inode.v);
#define x(_name, _bits) \
if (fieldnr < nr_fields) { \
ret = bch2_varint_decode_fast(in, end, &v[0]); \
if (ret < 0) \
return ret; \
in += ret; \
\
if (_bits > 64) { \
ret = bch2_varint_decode_fast(in, end, &v[1]); \
if (ret < 0) \
return ret; \
in += ret; \
} else { \
v[1] = 0; \
} \
} else { \
v[0] = v[1] = 0; \
} \
\
unpacked->_name = v[0]; \
if (v[1] || v[0] != unpacked->_name) \
return -1; \
fieldnr++;
BCH_INODE_FIELDS_v3()
#undef x
/* XXX: signal if there were more fields than expected? */
return 0;
}
static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
struct bch_inode_unpacked *unpacked)
{
memset(unpacked, 0, sizeof(*unpacked));
@ -252,6 +308,14 @@ int bch2_inode_unpack(struct bkey_s_c k,
}
}
int bch2_inode_unpack(struct bkey_s_c k,
struct bch_inode_unpacked *unpacked)
{
if (likely(k.k->type == KEY_TYPE_inode_v3))
return bch2_inode_unpack_v3(k, unpacked);
return bch2_inode_unpack_slowpath(k, unpacked);
}
int bch2_inode_peek(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_inode_unpacked *inode,
@ -297,11 +361,32 @@ int bch2_inode_write(struct btree_trans *trans,
if (IS_ERR(inode_p))
return PTR_ERR(inode_p);
bch2_inode_pack_inlined(trans->c, inode_p, inode);
bch2_inode_pack_inlined(inode_p, inode);
inode_p->inode.k.p.snapshot = iter->snapshot;
return bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
}
struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k)
{
struct bch_inode_unpacked u;
struct bkey_inode_buf *inode_p;
int ret;
if (!bkey_is_inode(&k->k))
return ERR_PTR(-ENOENT);
inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
if (IS_ERR(inode_p))
return ERR_CAST(inode_p);
ret = bch2_inode_unpack(bkey_i_to_s_c(k), &u);
if (ret)
return ERR_PTR(ret);
bch2_inode_pack(inode_p, &u);
return &inode_p->inode.k_i;
}
static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err)
{
struct bch_inode_unpacked unpacked;
@ -387,15 +472,48 @@ int bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
return __bch2_inode_invalid(k, err);
}
static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
int bch2_inode_v3_invalid(const struct bch_fs *c, struct bkey_s_c k,
int rw, struct printbuf *err)
{
prt_printf(out, "mode %o flags %x journal_seq %llu",
struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
if (bkey_val_bytes(k.k) < sizeof(*inode.v)) {
prt_printf(err, "incorrect value size (%zu < %zu)",
bkey_val_bytes(k.k), sizeof(*inode.v));
return -BCH_ERR_invalid_bkey;
}
if (INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL ||
INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k)) {
prt_printf(err, "invalid fields_start (got %llu, min %u max %zu)",
INODEv3_FIELDS_START(inode.v),
INODEv3_FIELDS_START_INITIAL,
bkey_val_u64s(inode.k));
return -BCH_ERR_invalid_bkey;
}
if (INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR) {
prt_printf(err, "invalid str hash type (%llu >= %u)",
INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR);
return -BCH_ERR_invalid_bkey;
}
return __bch2_inode_invalid(k, err);
}
static void __bch2_inode_unpacked_to_text(struct printbuf *out,
struct bch_inode_unpacked *inode)
{
prt_printf(out, "mode %o flags %x journal_seq %llu bi_size %llu bi_sectors %llu bi_version %llu",
inode->bi_mode, inode->bi_flags,
inode->bi_journal_seq);
inode->bi_journal_seq,
inode->bi_size,
inode->bi_sectors,
inode->bi_version);
#define x(_name, _bits) \
prt_printf(out, " "#_name " %llu", (u64) inode->_name);
BCH_INODE_FIELDS()
BCH_INODE_FIELDS_v3()
#undef x
}
@ -405,8 +523,7 @@ void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked
__bch2_inode_unpacked_to_text(out, inode);
}
void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
{
struct bch_inode_unpacked inode;

View File

@ -9,6 +9,7 @@ extern const char * const bch2_inode_opts[];
int bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
int bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
int bch2_inode_v3_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_inode ((struct bkey_ops) { \
@ -25,10 +26,18 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
.atomic_trigger = bch2_mark_inode, \
})
#define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \
.key_invalid = bch2_inode_v3_invalid, \
.val_to_text = bch2_inode_to_text, \
.trans_trigger = bch2_trans_mark_inode, \
.atomic_trigger = bch2_mark_inode, \
})
static inline bool bkey_is_inode(const struct bkey *k)
{
return k->type == KEY_TYPE_inode ||
k->type == KEY_TYPE_inode_v2;
k->type == KEY_TYPE_inode_v2 ||
k->type == KEY_TYPE_inode_v3;
}
int bch2_inode_generation_invalid(const struct bch_fs *, struct bkey_s_c,
@ -52,25 +61,28 @@ struct bch_inode_unpacked {
u64 bi_inum;
u64 bi_journal_seq;
__le64 bi_hash_seed;
u64 bi_size;
u64 bi_sectors;
u64 bi_version;
u32 bi_flags;
u16 bi_mode;
#define x(_name, _bits) u##_bits _name;
BCH_INODE_FIELDS()
BCH_INODE_FIELDS_v3()
#undef x
};
struct bkey_inode_buf {
struct bkey_i_inode_v2 inode;
struct bkey_i_inode_v3 inode;
#define x(_name, _bits) + 8 + _bits / 8
u8 _pad[0 + BCH_INODE_FIELDS()];
u8 _pad[0 + BCH_INODE_FIELDS_v3()];
#undef x
} __packed __aligned(8);
void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *,
const struct bch_inode_unpacked *);
void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *);
struct bkey_i *bch2_inode_to_v3(struct btree_trans *, struct bkey_i *);
void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);

View File

@ -356,7 +356,7 @@ int bch2_extent_update(struct btree_trans *trans,
}
if (i_sectors_delta || new_i_size) {
bch2_inode_pack(trans->c, &inode_p, &inode_u);
bch2_inode_pack(&inode_p, &inode_u);
inode_p.inode.k.p.snapshot = iter->snapshot;

View File

@ -1098,6 +1098,9 @@ int bch2_fs_recovery(struct bch_fs *c)
c->opts.version_upgrade = true;
c->opts.fsck = true;
c->opts.fix_errors = FSCK_OPT_YES;
} else if (c->sb.version < bcachefs_metadata_version_inode_v3) {
bch_info(c, "version prior to inode_v3, upgrade required");
c->opts.version_upgrade = true;
}
}
@ -1482,7 +1485,7 @@ int bch2_fs_initialize(struct bch_fs *c)
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
if (c->sb.version < bcachefs_metadata_version_backpointers)
if (c->sb.version < bcachefs_metadata_version_inode_v3)
c->opts.version_upgrade = true;
if (c->opts.version_upgrade) {
@ -1563,7 +1566,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL);
root_inode.bi_inum = BCACHEFS_ROOT_INO;
root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
bch2_inode_pack(c, &packed_inode, &root_inode);
bch2_inode_pack(&packed_inode, &root_inode);
packed_inode.inode.k.p.snapshot = U32_MAX;
err = "error creating root directory";