From 35189e09ab46785746df7007ed2a57ee78b56191 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 9 Nov 2019 16:01:15 -0500 Subject: [PATCH] bcachefs: bkey_on_stack This implements code for storing small bkeys on the stack and allocating out of a mempool if they're too big. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 2 + fs/bcachefs/bkey_on_stack.h | 35 ++++++++++++++ fs/bcachefs/bkey_sort.c | 13 ++++-- fs/bcachefs/ec.c | 12 +++-- fs/bcachefs/extents.c | 18 +++++--- fs/bcachefs/fs-io.c | 92 ++++++++++++++++++++----------------- fs/bcachefs/fs.c | 29 +++++++----- fs/bcachefs/io.c | 63 +++++++++++++++---------- fs/bcachefs/migrate.c | 16 ++++--- fs/bcachefs/move.c | 10 ++-- fs/bcachefs/reflink.c | 17 ++++--- fs/bcachefs/super.c | 2 + 12 files changed, 205 insertions(+), 104 deletions(-) create mode 100644 fs/bcachefs/bkey_on_stack.h diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index f8a040115fd1..344cf982124f 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -729,6 +729,8 @@ struct bch_fs { atomic64_t key_version; + mempool_t large_bkey_pool; + /* REBALANCE */ struct bch_fs_rebalance rebalance; diff --git a/fs/bcachefs/bkey_on_stack.h b/fs/bcachefs/bkey_on_stack.h new file mode 100644 index 000000000000..d4739038323f --- /dev/null +++ b/fs/bcachefs/bkey_on_stack.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BKEY_ON_STACK_H +#define _BCACHEFS_BKEY_ON_STACK_H + +#include "bcachefs.h" + +struct bkey_on_stack { + struct bkey_i *k; + u64 onstack[12]; +}; + +static inline void bkey_on_stack_realloc(struct bkey_on_stack *s, + struct bch_fs *c, unsigned u64s) +{ + if (s->k == (void *) s->onstack && + u64s > ARRAY_SIZE(s->onstack)) { + s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS); + memcpy(s->k, s->onstack, sizeof(s->onstack)); + } +} + +static inline void bkey_on_stack_init(struct bkey_on_stack *s) +{ + s->k = (void *) s->onstack; +} + +static inline void bkey_on_stack_exit(struct bkey_on_stack *s, + struct bch_fs *c) +{ + if (s->k != (void *) s->onstack) + mempool_free(s->k, &c->large_bkey_pool); + s->k = NULL; +} + +#endif /* _BCACHEFS_BKEY_ON_STACK_H */ diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c index 2cac269b386f..f5c0507ad79d 100644 --- a/fs/bcachefs/bkey_sort.c +++ b/fs/bcachefs/bkey_sort.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey_on_stack.h" #include "bkey_sort.h" #include "bset.h" #include "extents.h" @@ -292,8 +293,10 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bkey l_unpacked, r_unpacked; struct bkey_s l, r; struct btree_nr_keys nr; + struct bkey_on_stack split; memset(&nr, 0, sizeof(nr)); + bkey_on_stack_init(&split); heap_resort(iter, extent_sort_cmp, NULL); @@ -349,13 +352,13 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, extent_sort_sift(iter, b, _r - iter->data); } else if (bkey_cmp(l.k->p, r.k->p) > 0) { - BKEY_PADDED(k) tmp; + bkey_on_stack_realloc(&split, c, l.k->u64s); /* * r wins, but it overlaps in the middle of l - split l: */ - bkey_reassemble(&tmp.k, l.s_c); - bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k); + bkey_reassemble(split.k, l.s_c); + bch2_cut_back(bkey_start_pos(r.k), &split.k->k); __bch2_cut_front(r.k->p, l); extent_save(b, lk, l.k); @@ -363,7 +366,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, extent_sort_sift(iter, b, 0); extent_sort_append(c, f, &nr, dst->start, - &prev, bkey_i_to_s(&tmp.k)); + &prev, bkey_i_to_s(split.k)); } else { bch2_cut_back(bkey_start_pos(r.k), l.k); extent_save(b, lk, l.k); @@ -373,6 +376,8 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, extent_sort_advance_prev(f, &nr, dst->start, &prev); dst->u64s = cpu_to_le16((u64 *) prev - dst->_data); + + bkey_on_stack_exit(&split, c); return nr; } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index f32b8e6bf2ce..b24f867520c3 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -4,6 +4,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "bkey_on_stack.h" #include "bset.h" #include "btree_gc.h" #include "btree_update.h" @@ -777,9 +778,10 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, struct bkey_s_c k; struct bkey_s_extent e; struct bch_extent_ptr *ptr; - BKEY_PADDED(k) tmp; + struct bkey_on_stack sk; int ret = 0, dev, idx; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -804,8 +806,9 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, dev = s->key.v.ptrs[idx].dev; - bkey_reassemble(&tmp.k, k); - e = bkey_i_to_s_extent(&tmp.k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + e = bkey_i_to_s_extent(sk.k); extent_for_each_ptr(e, ptr) if (ptr->dev != dev) @@ -816,7 +819,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, extent_stripe_ptr_add(e, s, ptr, idx); - bch2_trans_update(&trans, iter, &tmp.k); + bch2_trans_update(&trans, iter, sk.k); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| @@ -829,6 +832,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, } bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); return ret; } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index b12798103763..46eeaa574e86 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -8,6 +8,7 @@ #include "bcachefs.h" #include "bkey_methods.h" +#include "bkey_on_stack.h" #include "btree_gc.h" #include "btree_update.h" #include "btree_update_interior.h" @@ -1132,7 +1133,11 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter, break; } case BCH_EXTENT_OVERLAP_MIDDLE: { - BKEY_PADDED(k) split; + struct bkey_on_stack split; + + bkey_on_stack_init(&split); + bkey_on_stack_realloc(&split, c, k.k->u64s); + /* * The insert key falls 'in the middle' of k * The insert key splits k in 3: @@ -1147,18 +1152,19 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter, * modify k _before_ doing the insert (which will move * what k points to) */ - bkey_reassemble(&split.k, k.s_c); - split.k.k.needs_whiteout |= bkey_written(l->b, _k); + bkey_reassemble(split.k, k.s_c); + split.k->k.needs_whiteout |= bkey_written(l->b, _k); - bch2_cut_back(bkey_start_pos(&insert->k), &split.k.k); - BUG_ON(bkey_deleted(&split.k.k)); + bch2_cut_back(bkey_start_pos(&insert->k), &split.k->k); + BUG_ON(bkey_deleted(&split.k->k)); __bch2_cut_front(insert->k.p, k); BUG_ON(bkey_deleted(k.k)); extent_save(l->b, _k, k.k); bch2_btree_iter_fix_key_modified(iter, l->b, _k); - extent_bset_insert(c, iter, &split.k); + extent_bset_insert(c, iter, split.k); + bkey_on_stack_exit(&split, c); break; } } diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 657559c2db14..478630fdf643 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "bkey_on_stack.h" #include "btree_update.h" #include "buckets.h" #include "clock.h" @@ -691,6 +692,18 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k) } } +static bool extent_partial_reads_expensive(struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *i; + + bkey_for_each_crc(k.k, ptrs, crc, i) + if (crc.csum_type || crc.compression_type) + return true; + return false; +} + static void readpage_bio_extend(struct readpages_iter *iter, struct bio *bio, unsigned sectors_this_extent, @@ -744,15 +757,17 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, struct readpages_iter *readpages_iter) { struct bch_fs *c = trans->c; + struct bkey_on_stack sk; int flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE; int ret = 0; rbio->c = c; rbio->start_time = local_clock(); + + bkey_on_stack_init(&sk); retry: while (1) { - BKEY_PADDED(k) tmp; struct bkey_s_c k; unsigned bytes, sectors, offset_into_extent; @@ -764,15 +779,16 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, if (ret) break; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + k = bkey_i_to_s_c(sk.k); offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; ret = bch2_read_indirect_extent(trans, - &offset_into_extent, &tmp.k); + &offset_into_extent, sk.k); if (ret) break; @@ -780,22 +796,9 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, bch2_trans_unlock(trans); - if (readpages_iter) { - bool want_full_extent = false; - - if (bkey_extent_is_data(k.k)) { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *i; - struct extent_ptr_decoded p; - - bkey_for_each_ptr_decode(k.k, ptrs, p, i) - want_full_extent |= ((p.crc.csum_type != 0) | - (p.crc.compression_type != 0)); - } - - readpage_bio_extend(readpages_iter, &rbio->bio, - sectors, want_full_extent); - } + if (readpages_iter) + readpage_bio_extend(readpages_iter, &rbio->bio, sectors, + extent_partial_reads_expensive(k)); bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; swap(rbio->bio.bi_iter.bi_size, bytes); @@ -809,7 +812,7 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, bch2_read_extent(c, rbio, k, offset_into_extent, flags); if (flags & BCH_READ_LAST_FRAGMENT) - return; + break; swap(rbio->bio.bi_iter.bi_size, bytes); bio_advance(&rbio->bio, bytes); @@ -818,8 +821,12 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, if (ret == -EINTR) goto retry; - bcache_io_error(c, &rbio->bio, "btree IO error %i", ret); - bio_endio(&rbio->bio); + if (ret) { + bcache_io_error(c, &rbio->bio, "btree IO error %i", ret); + bio_endio(&rbio->bio); + } + + bkey_on_stack_exit(&sk, c); } void bch2_readahead(struct readahead_control *ractl) @@ -2353,6 +2360,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; + struct bkey_on_stack copy; struct btree_trans trans; struct btree_iter *src, *dst, *del = NULL; loff_t shift, new_size; @@ -2362,6 +2370,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, if ((offset | len) & (block_bytes(c) - 1)) return -EINVAL; + bkey_on_stack_init(©); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256); /* @@ -2430,7 +2439,6 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, while (1) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); - BKEY_PADDED(k) copy; struct bkey_i delete; struct bkey_s_c k; struct bpos next_pos; @@ -2455,34 +2463,35 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0) break; reassemble: - bkey_reassemble(©.k, k); + bkey_on_stack_realloc(©, c, k.k->u64s); + bkey_reassemble(copy.k, k); if (insert && bkey_cmp(bkey_start_pos(k.k), move_pos) < 0) { - bch2_cut_front(move_pos, ©.k); - bch2_btree_iter_set_pos(src, bkey_start_pos(©.k.k)); + bch2_cut_front(move_pos, copy.k); + bch2_btree_iter_set_pos(src, bkey_start_pos(©.k->k)); } - copy.k.k.p.offset += shift >> 9; - bch2_btree_iter_set_pos(dst, bkey_start_pos(©.k.k)); + copy.k->k.p.offset += shift >> 9; + bch2_btree_iter_set_pos(dst, bkey_start_pos(©.k->k)); - ret = bch2_extent_atomic_end(dst, ©.k, &atomic_end); + ret = bch2_extent_atomic_end(dst, copy.k, &atomic_end); if (ret) goto bkey_err; - if (bkey_cmp(atomic_end, copy.k.k.p)) { + if (bkey_cmp(atomic_end, copy.k->k.p)) { if (insert) { move_pos = atomic_end; move_pos.offset -= shift >> 9; goto reassemble; } else { - bch2_cut_back(atomic_end, ©.k.k); + bch2_cut_back(atomic_end, ©.k->k); } } bkey_init(&delete.k); delete.k.p = src->pos; - bch2_key_resize(&delete.k, copy.k.k.size); + bch2_key_resize(&delete.k, copy.k->k.size); next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; @@ -2495,12 +2504,12 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, * by the triggers machinery: */ if (insert && - bkey_cmp(bkey_start_pos(©.k.k), delete.k.p) < 0) { - bch2_cut_back(bkey_start_pos(©.k.k), &delete.k); + bkey_cmp(bkey_start_pos(©.k->k), delete.k.p) < 0) { + bch2_cut_back(bkey_start_pos(©.k->k), &delete.k); } else if (!insert && - bkey_cmp(copy.k.k.p, + bkey_cmp(copy.k->k.p, bkey_start_pos(&delete.k)) > 0) { - bch2_cut_front(copy.k.k.p, &delete); + bch2_cut_front(copy.k->k.p, &delete); del = bch2_trans_copy_iter(&trans, src); BUG_ON(IS_ERR_OR_NULL(del)); @@ -2509,10 +2518,10 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, bkey_start_pos(&delete.k)); } - bch2_trans_update(&trans, dst, ©.k); + bch2_trans_update(&trans, dst, copy.k); bch2_trans_update(&trans, del ?: src, &delete); - if (copy.k.k.size == k.k->size) { + if (copy.k->k.size == k.k->size) { /* * If we're moving the entire extent, we can skip * running triggers: @@ -2521,10 +2530,10 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, } else { /* We might end up splitting compressed extents: */ unsigned nr_ptrs = - bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(©.k)); + bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(copy.k)); ret = bch2_disk_reservation_get(c, &disk_res, - copy.k.k.size, nr_ptrs, + copy.k->k.size, nr_ptrs, BCH_DISK_RESERVATION_NOFAIL); BUG_ON(ret); } @@ -2559,6 +2568,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, } err: bch2_trans_exit(&trans); + bkey_on_stack_exit(©, c); bch2_pagecache_block_put(&inode->ei_pagecache_lock); inode_unlock(&inode->v); return ret; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b241164f6f7e..e8cdae3c114b 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "acl.h" +#include "bkey_on_stack.h" #include "btree_update.h" #include "buckets.h" #include "chardev.h" @@ -875,7 +876,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - BKEY_PADDED(k) cur, prev; + struct bkey_on_stack cur, prev; struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); unsigned offset_into_extent, sectors; bool have_extent = false; @@ -888,6 +889,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (start + len < start) return -EINVAL; + bkey_on_stack_init(&cur); + bkey_on_stack_init(&prev); bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -902,15 +905,17 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, continue; } - bkey_reassemble(&cur.k, k); - k = bkey_i_to_s_c(&cur.k); + bkey_on_stack_realloc(&cur, c, k.k->u64s); + bkey_on_stack_realloc(&prev, c, k.k->u64s); + bkey_reassemble(cur.k, k); + k = bkey_i_to_s_c(cur.k); offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; ret = bch2_read_indirect_extent(&trans, - &offset_into_extent, &cur.k); + &offset_into_extent, cur.k); if (ret) break; @@ -920,19 +925,19 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_cut_front(POS(k.k->p.inode, bkey_start_offset(k.k) + offset_into_extent), - &cur.k); - bch2_key_resize(&cur.k.k, sectors); - cur.k.k.p = iter->pos; - cur.k.k.p.offset += cur.k.k.size; + cur.k); + bch2_key_resize(&cur.k->k, sectors); + cur.k->k.p = iter->pos; + cur.k->k.p.offset += cur.k->k.size; if (have_extent) { ret = bch2_fill_extent(c, info, - bkey_i_to_s_c(&prev.k), 0); + bkey_i_to_s_c(prev.k), 0); if (ret) break; } - bkey_copy(&prev.k, &cur.k); + bkey_copy(prev.k, cur.k); have_extent = true; if (k.k->type == KEY_TYPE_reflink_v) @@ -945,10 +950,12 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, goto retry; if (!ret && have_extent) - ret = bch2_fill_extent(c, info, bkey_i_to_s_c(&prev.k), + ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), FIEMAP_EXTENT_LAST); ret = bch2_trans_exit(&trans) ?: ret; + bkey_on_stack_exit(&cur, c); + bkey_on_stack_exit(&prev, c); return ret < 0 ? ret : 0; } diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index f53eee7accc8..4fe61705ae75 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -8,6 +8,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "bkey_on_stack.h" #include "bset.h" #include "btree_update.h" #include "buckets.h" @@ -394,12 +395,14 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end, int bch2_write_index_default(struct bch_write_op *op) { struct bch_fs *c = op->c; + struct bkey_on_stack sk; struct keylist *keys = &op->insert_keys; struct bkey_i *k = bch2_keylist_front(keys); struct btree_trans trans; struct btree_iter *iter; int ret; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -407,13 +410,14 @@ int bch2_write_index_default(struct bch_write_op *op) BTREE_ITER_SLOTS|BTREE_ITER_INTENT); do { - BKEY_PADDED(k) tmp; + k = bch2_keylist_front(keys); - bkey_copy(&tmp.k, bch2_keylist_front(keys)); + bkey_on_stack_realloc(&sk, c, k->k.u64s); + bkey_copy(sk.k, k); bch2_trans_begin_updates(&trans); - ret = bch2_extent_update(&trans, iter, &tmp.k, + ret = bch2_extent_update(&trans, iter, sk.k, &op->res, op_journal_seq(op), op->new_i_size, &op->i_sectors_delta); if (ret == -EINTR) @@ -421,13 +425,14 @@ int bch2_write_index_default(struct bch_write_op *op) if (ret) break; - if (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) < 0) - bch2_cut_front(iter->pos, bch2_keylist_front(keys)); + if (bkey_cmp(iter->pos, k->k.p) < 0) + bch2_cut_front(iter->pos, k); else bch2_keylist_pop_front(keys); } while (!bch2_keylist_empty(keys)); bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); return ret; } @@ -1463,13 +1468,14 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio { struct btree_trans trans; struct btree_iter *iter; - BKEY_PADDED(k) tmp; + struct bkey_on_stack sk; struct bkey_s_c k; int ret; flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -1481,11 +1487,12 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio if (bkey_err(k)) goto err; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); - if (!bch2_bkey_matches_ptr(c, bkey_i_to_s_c(&tmp.k), + if (!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, rbio->pos.offset - rbio->pick.crc.offset)) { @@ -1502,6 +1509,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio out: bch2_rbio_done(rbio); bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); return; err: rbio->bio.bi_status = BLK_STS_IOERR; @@ -1514,12 +1522,14 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, { struct btree_trans trans; struct btree_iter *iter; + struct bkey_on_stack sk; struct bkey_s_c k; int ret; flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -1527,18 +1537,18 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode, bvec_iter.bi_sector), BTREE_ITER_SLOTS, k, ret) { - BKEY_PADDED(k) tmp; unsigned bytes, sectors, offset_into_extent; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + k = bkey_i_to_s_c(sk.k); offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; ret = bch2_read_indirect_extent(&trans, - &offset_into_extent, &tmp.k); + &offset_into_extent, sk.k); if (ret) break; @@ -1577,6 +1587,7 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, rbio->bio.bi_status = BLK_STS_IOERR; out: bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); bch2_rbio_done(rbio); } @@ -1633,7 +1644,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - BKEY_PADDED(k) new; + struct bkey_on_stack new; struct bch_extent_crc_unpacked new_crc; u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset; int ret; @@ -1641,6 +1652,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) if (rbio->pick.crc.compression_type) return; + bkey_on_stack_init(&new); bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -1651,8 +1663,9 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) if (IS_ERR_OR_NULL(k.k)) goto out; - bkey_reassemble(&new.k, k); - k = bkey_i_to_s_c(&new.k); + bkey_on_stack_realloc(&new, c, k.k->u64s); + bkey_reassemble(new.k, k); + k = bkey_i_to_s_c(new.k); if (bversion_cmp(k.k->version, rbio->version) || !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) @@ -1671,10 +1684,10 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) goto out; } - if (!bch2_bkey_narrow_crcs(&new.k, new_crc)) + if (!bch2_bkey_narrow_crcs(new.k, new_crc)) goto out; - bch2_trans_update(&trans, iter, &new.k); + bch2_trans_update(&trans, iter, new.k); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| @@ -1683,6 +1696,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) goto retry; out: bch2_trans_exit(&trans); + bkey_on_stack_exit(&new, c); } /* Inner part that may run in process context */ @@ -2114,6 +2128,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) { struct btree_trans trans; struct btree_iter *iter; + struct bkey_on_stack sk; struct bkey_s_c k; unsigned flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE| @@ -2127,6 +2142,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) rbio->c = c; rbio->start_time = local_clock(); + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -2135,7 +2151,6 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) POS(inode, rbio->bio.bi_iter.bi_sector), BTREE_ITER_SLOTS); while (1) { - BKEY_PADDED(k) tmp; unsigned bytes, sectors, offset_into_extent; bch2_btree_iter_set_pos(iter, @@ -2146,15 +2161,16 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) if (ret) goto err; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); - offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + k = bkey_i_to_s_c(sk.k); + ret = bch2_read_indirect_extent(&trans, - &offset_into_extent, &tmp.k); + &offset_into_extent, sk.k); if (ret) goto err; @@ -2186,6 +2202,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) } out: bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); return; err: if (ret == -EINTR) diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index de8522f754e2..4dacbd637d02 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -4,6 +4,7 @@ */ #include "bcachefs.h" +#include "bkey_on_stack.h" #include "btree_update.h" #include "btree_update_interior.h" #include "buckets.h" @@ -40,9 +41,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - BKEY_PADDED(key) tmp; + struct bkey_on_stack sk; int ret = 0; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, @@ -58,9 +60,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags continue; } - bkey_reassemble(&tmp.key, k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); - ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.key), + ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k), dev_idx, flags, false); if (ret) break; @@ -70,11 +73,11 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags * will do the appropriate thing with it (turning it into a * KEY_TYPE_error key, or just a discard if it was a cached extent) */ - bch2_extent_normalize(c, bkey_i_to_s(&tmp.key)); + bch2_extent_normalize(c, bkey_i_to_s(sk.k)); - bch2_btree_iter_set_pos(iter, bkey_start_pos(&tmp.key.k)); + bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); - bch2_trans_update(&trans, iter, &tmp.key); + bch2_trans_update(&trans, iter, sk.k); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| @@ -92,6 +95,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags } ret = bch2_trans_exit(&trans) ?: ret; + bkey_on_stack_exit(&sk, c); BUG_ON(ret == -EINTR); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index c5d3375882d7..dbe35d16e7dd 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "bkey_on_stack.h" #include "btree_gc.h" #include "btree_update.h" #include "btree_update_interior.h" @@ -489,7 +490,7 @@ static int __bch2_move_data(struct bch_fs *c, { bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); - BKEY_PADDED(k) tmp; + struct bkey_on_stack sk; struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; @@ -498,6 +499,7 @@ static int __bch2_move_data(struct bch_fs *c, u64 delay, cur_inum = U64_MAX; int ret = 0, ret2; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, 0, 0); stats->data_type = BCH_DATA_USER; @@ -577,8 +579,9 @@ static int __bch2_move_data(struct bch_fs *c, } /* unlock before doing IO: */ - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); ret2 = bch2_move_extent(c, ctxt, wp, io_opts, btree_id, k, @@ -605,6 +608,7 @@ static int __bch2_move_data(struct bch_fs *c, } out: ret = bch2_trans_exit(&trans) ?: ret; + bkey_on_stack_exit(&sk, c); return ret; } diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 6e71c5e8f9a2..6d21086c3254 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey_on_stack.h" #include "btree_update.h" #include "extents.h" #include "inode.h" @@ -160,7 +161,8 @@ s64 bch2_remap_range(struct bch_fs *c, struct btree_trans trans; struct btree_iter *dst_iter, *src_iter; struct bkey_s_c src_k; - BKEY_PADDED(k) new_dst, new_src; + BKEY_PADDED(k) new_dst; + struct bkey_on_stack new_src; struct bpos dst_end = dst_start, src_end = src_start; struct bpos dst_want, src_want; u64 src_done, dst_done; @@ -183,6 +185,7 @@ s64 bch2_remap_range(struct bch_fs *c, dst_end.offset += remap_sectors; src_end.offset += remap_sectors; + bkey_on_stack_init(&new_src); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096); src_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start, @@ -222,14 +225,15 @@ s64 bch2_remap_range(struct bch_fs *c, break; if (src_k.k->type == KEY_TYPE_extent) { - bkey_reassemble(&new_src.k, src_k); - src_k = bkey_i_to_s_c(&new_src.k); + bkey_on_stack_realloc(&new_src, c, src_k.k->u64s); + bkey_reassemble(new_src.k, src_k); + src_k = bkey_i_to_s_c(new_src.k); - bch2_cut_front(src_iter->pos, &new_src.k); - bch2_cut_back(src_end, &new_src.k.k); + bch2_cut_front(src_iter->pos, new_src.k); + bch2_cut_back(src_end, &new_src.k->k); ret = bch2_make_extent_indirect(&trans, src_iter, - bkey_i_to_extent(&new_src.k)); + bkey_i_to_extent(new_src.k)); if (ret) goto btree_err; @@ -299,6 +303,7 @@ s64 bch2_remap_range(struct bch_fs *c, } while (ret2 == -EINTR); ret = bch2_trans_exit(&trans) ?: ret; + bkey_on_stack_exit(&new_src, c); percpu_ref_put(&c->writes); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 14e2f6828cc6..8c7b56a95f4b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -475,6 +475,7 @@ static void bch2_fs_free(struct bch_fs *c) free_percpu(c->usage[0]); kfree(c->usage_base); free_percpu(c->pcpu); + mempool_exit(&c->large_bkey_pool); mempool_exit(&c->btree_bounce_pool); bioset_exit(&c->btree_bio); mempool_exit(&c->btree_interior_update_pool); @@ -729,6 +730,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) !(c->online_reserved = alloc_percpu(u64)) || mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, btree_bytes(c)) || + mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || bch2_io_clock_init(&c->io_clock[READ]) || bch2_io_clock_init(&c->io_clock[WRITE]) || bch2_fs_journal_init(&c->journal) ||