diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 6ba830583846..e3004593874c 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -342,7 +342,8 @@ static inline void bkey_init(struct bkey *k) x(quota, 13) \ x(stripe, 14) \ x(reflink_p, 15) \ - x(reflink_v, 16) + x(reflink_v, 16) \ + x(inline_data, 17) enum bch_bkey_type { #define x(name, nr) KEY_TYPE_##name = nr, @@ -915,6 +916,13 @@ struct bch_reflink_v { __u64 _data[0]; }; +/* Inline data */ + +struct bch_inline_data { + struct bch_val v; + u8 data[0]; +}; + /* Optional/variable size superblock sections: */ struct bch_sb_field { @@ -1319,6 +1327,7 @@ enum bch_sb_features { BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3 = 5, BCH_FEATURE_REFLINK = 6, BCH_FEATURE_NEW_SIPHASH = 7, + BCH_FEATURE_INLINE_DATA = 8, BCH_FEATURE_NR, }; diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index ba4d6329e37a..36e6ecc04514 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -572,6 +572,7 @@ BKEY_VAL_ACCESSORS(quota); BKEY_VAL_ACCESSORS(stripe); BKEY_VAL_ACCESSORS(reflink_p); BKEY_VAL_ACCESSORS(reflink_v); +BKEY_VAL_ACCESSORS(inline_data); /* byte order helpers */ diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index f01405dd502b..5312184c37f7 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -63,6 +63,23 @@ static const char *key_type_cookie_invalid(const struct bch_fs *c, .key_invalid = empty_val_key_invalid, \ } +static const char *key_type_inline_data_invalid(const struct bch_fs *c, + struct bkey_s_c k) +{ + return NULL; +} + +static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ + pr_buf(out, "(%zu bytes)", bkey_val_bytes(k.k)); +} + +static const struct bkey_ops bch2_bkey_ops_inline_data = { + .key_invalid = key_type_inline_data_invalid, + .val_to_text = key_type_inline_data_to_text, +}; + static const struct bkey_ops bch2_bkey_ops[] = { #define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name, BCH_BKEY_TYPES() @@ -83,9 +100,8 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, if (k.k->u64s < BKEY_U64s) return "u64s too small"; - if ((btree_node_type_is_extents(type) || - type == BKEY_TYPE_BTREE) && - bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX) + if (type == BKEY_TYPE_BTREE && + bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) return "value too big"; if (btree_node_type_is_extents(type)) { diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 8f511760102a..0e25fbe65b95 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -737,11 +737,6 @@ int bch2_cut_front_s(struct bpos where, struct bkey_s k) } switch (k.k->type) { - case KEY_TYPE_deleted: - case KEY_TYPE_discard: - case KEY_TYPE_error: - case KEY_TYPE_cookie: - break; case KEY_TYPE_extent: case KEY_TYPE_reflink_v: { struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); @@ -779,10 +774,18 @@ int bch2_cut_front_s(struct bpos where, struct bkey_s k) le64_add_cpu(&p.v->idx, sub); break; } - case KEY_TYPE_reservation: + case KEY_TYPE_inline_data: { + struct bkey_s_inline_data d = bkey_s_to_inline_data(k); + + sub = min_t(u64, sub << 9, bkey_val_bytes(d.k)); + + memmove(d.v->data, + d.v->data + sub, + bkey_val_bytes(d.k) - sub); + + new_val_u64s -= sub >> 3; break; - default: - BUG(); + } } val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s; @@ -814,6 +817,12 @@ int bch2_cut_back_s(struct bpos where, struct bkey_s k) new_val_u64s = 0; } + switch (k.k->type) { + case KEY_TYPE_inline_data: + new_val_u64s = min(new_val_u64s, k.k->size << 6); + break; + } + val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s; BUG_ON(val_u64s_delta < 0); diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index e360e1989812..35a66d4f4ea2 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -456,6 +456,7 @@ static inline bool bkey_extent_is_direct_data(const struct bkey *k) static inline bool bkey_extent_is_data(const struct bkey *k) { return bkey_extent_is_direct_data(k) || + k->type == KEY_TYPE_inline_data || k->type == KEY_TYPE_reflink_p; } @@ -469,6 +470,7 @@ static inline bool bkey_extent_is_allocation(const struct bkey *k) case KEY_TYPE_reservation: case KEY_TYPE_reflink_p: case KEY_TYPE_reflink_v: + case KEY_TYPE_inline_data: return true; default: return false; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index fab952856e36..7abe53be7dd3 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -990,6 +990,18 @@ static void bch2_writepage_io_done(struct closure *cl) } } + if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) { + bio_for_each_segment_all(bvec, bio, iter) { + struct bch_page_state *s; + + s = __bch2_page_state(bvec->bv_page); + spin_lock(&s->lock); + for (i = 0; i < PAGE_SECTORS; i++) + s->s[i].nr_replicas = 0; + spin_unlock(&s->lock); + } + } + /* * racing with fallocate can cause us to add fewer sectors than * expected - but we shouldn't add more sectors than expected: diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index ef953499e66c..8f558347ca7f 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -539,16 +539,19 @@ static void __bch2_write_index(struct bch_write_op *op) for (src = keys->keys; src != keys->top; src = n) { n = bkey_next(src); - bkey_copy(dst, src); - bch2_bkey_drop_ptrs(bkey_i_to_s(dst), ptr, - test_bit(ptr->dev, op->failed.d)); + if (bkey_extent_is_direct_data(&src->k)) { + bch2_bkey_drop_ptrs(bkey_i_to_s(src), ptr, + test_bit(ptr->dev, op->failed.d)); - if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(dst))) { - ret = -EIO; - goto err; + if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) { + ret = -EIO; + goto err; + } } + if (dst != src) + memmove_u64s_down(dst, src, src->u64s); dst = bkey_next(dst); } @@ -1092,7 +1095,7 @@ again: bio->bi_end_io = bch2_write_endio; bio->bi_private = &op->cl; - bio->bi_opf = REQ_OP_WRITE; + bio->bi_opf |= REQ_OP_WRITE; if (!skip_put) closure_get(bio->bi_private); @@ -1129,6 +1132,47 @@ flush_io: goto again; } +static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) +{ + struct closure *cl = &op->cl; + struct bio *bio = &op->wbio.bio; + struct bvec_iter iter; + struct bkey_i_inline_data *id; + unsigned sectors; + int ret; + + ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys, + ARRAY_SIZE(op->inline_keys), + BKEY_U64s + DIV_ROUND_UP(data_len, 8)); + if (ret) { + op->error = ret; + goto err; + } + + sectors = bio_sectors(bio); + op->pos.offset += sectors; + + id = bkey_inline_data_init(op->insert_keys.top); + id->k.p = op->pos; + id->k.version = op->version; + id->k.size = sectors; + + iter = bio->bi_iter; + iter.bi_size = data_len; + memcpy_from_bio(id->v.data, bio, iter); + + while (data_len & 7) + id->v.data[data_len++] = '\0'; + set_bkey_val_bytes(&id->k, data_len); + bch2_keylist_push(&op->insert_keys); + + op->flags |= BCH_WRITE_WROTE_DATA_INLINE; + continue_at_nobarrier(cl, bch2_write_index, NULL); + return; +err: + bch2_write_done(&op->cl); +} + /** * bch_write - handle a write to a cache device or flash only volume * @@ -1150,22 +1194,22 @@ void bch2_write(struct closure *cl) struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); struct bio *bio = &op->wbio.bio; struct bch_fs *c = op->c; + unsigned data_len; BUG_ON(!op->nr_replicas); BUG_ON(!op->write_point.v); BUG_ON(!bkey_cmp(op->pos, POS_MAX)); + op->start_time = local_clock(); + bch2_keylist_init(&op->insert_keys, op->inline_keys); + wbio_init(bio)->put_bio = false; + if (bio_sectors(bio) & (c->opts.block_size - 1)) { __bcache_io_error(c, "misaligned write"); op->error = -EIO; goto err; } - op->start_time = local_clock(); - - bch2_keylist_init(&op->insert_keys, op->inline_keys); - wbio_init(bio)->put_bio = false; - if (c->opts.nochanges || !percpu_ref_tryget(&c->writes)) { __bcache_io_error(c, "read only"); @@ -1175,6 +1219,14 @@ void bch2_write(struct closure *cl) bch2_increment_clock(c, bio_sectors(bio), WRITE); + data_len = min_t(u64, bio->bi_iter.bi_size, + op->new_i_size - (op->pos.offset << 9)); + + if (data_len <= min(block_bytes(c) / 2, 1024U)) { + bch2_write_data_inline(op, data_len); + return; + } + continue_at_nobarrier(cl, __bch2_write, NULL); return; err: @@ -1892,6 +1944,19 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, struct bpos pos = bkey_start_pos(k.k); int pick_ret; + if (k.k->type == KEY_TYPE_inline_data) { + struct bkey_s_c_inline_data d = bkey_s_c_to_inline_data(k); + unsigned bytes = min_t(unsigned, iter.bi_size, + bkey_val_bytes(d.k)); + + swap(iter.bi_size, bytes); + memcpy_to_bio(&orig->bio, iter, d.v->data); + swap(iter.bi_size, bytes); + bio_advance_iter(&orig->bio, &iter, bytes); + zero_fill_bio_iter(&orig->bio, iter); + goto out_read_done; + } + pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); /* hole or reservation - just zero fill: */ diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h index 81fc549a0c97..fa5841a86fcb 100644 --- a/fs/bcachefs/io.h +++ b/fs/bcachefs/io.h @@ -34,10 +34,11 @@ enum bch_write_flags { BCH_WRITE_PAGES_OWNED = (1 << 5), BCH_WRITE_ONLY_SPECIFIED_DEVS = (1 << 6), BCH_WRITE_NOPUT_RESERVATION = (1 << 7), + BCH_WRITE_WROTE_DATA_INLINE = (1 << 8), /* Internal: */ - BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 8), - BCH_WRITE_SKIP_CLOSURE_PUT = (1 << 9), + BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 9), + BCH_WRITE_SKIP_CLOSURE_PUT = (1 << 10), }; static inline u64 *op_journal_seq(struct bch_write_op *op) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 2efe023b2f0d..9102a1ce1ec4 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -913,6 +913,12 @@ int bch2_fs_recovery(struct bch_fs *c) write_sb = true; } + if (!(c->sb.features & (1ULL << BCH_FEATURE_INLINE_DATA))) { + c->disk_sb.sb->features[0] |= + cpu_to_le64(1ULL << BCH_FEATURE_INLINE_DATA); + write_sb = true; + } + if (!test_bit(BCH_FS_ERROR, &c->flags)) { c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO; write_sb = true;