bcachefs: Persist stripe blocks_used

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2018-11-25 20:53:51 -05:00 committed by Kent Overstreet
parent ed1646ca74
commit 61c8d7c8eb
7 changed files with 222 additions and 96 deletions

View file

@ -561,6 +561,7 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
dst_iter.pos, ##__VA_ARGS__, \
dst->_f, src->_f); \
dst->_f = src->_f; \
dst->dirty = true; \
}
#define copy_bucket_field(_f) \
if (dst->b[b].mark._f != src->b[b].mark._f) { \
@ -591,16 +592,18 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
(src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
BUG_ON(src_iter.pos != dst_iter.pos);
copy_stripe_field(alive, "alive");
copy_stripe_field(sectors, "sectors");
copy_stripe_field(algorithm, "algorithm");
copy_stripe_field(nr_blocks, "nr_blocks");
copy_stripe_field(nr_redundant, "nr_redundant");
copy_stripe_field(blocks_nonempty.counter,
copy_stripe_field(blocks_nonempty,
"blocks_nonempty");
for (i = 0; i < ARRAY_SIZE(dst->block_sectors); i++)
copy_stripe_field(block_sectors[i].counter,
copy_stripe_field(block_sectors[i],
"block_sectors[%u]", i);
if (dst->alive)

View file

@ -605,9 +605,14 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
int blocks_nonempty_delta;
s64 parity_sectors;
BUG_ON(!sectors);
m = genradix_ptr(&c->stripes[gc], p.idx);
spin_lock(&c->ec_stripes_heap_lock);
if (!m || !m->alive) {
spin_unlock(&c->ec_stripes_heap_lock);
bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
(u64) p.idx);
return -1;
@ -623,19 +628,21 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
parity_sectors = -parity_sectors;
sectors += parity_sectors;
new = atomic_add_return(sectors, &m->block_sectors[p.block]);
old = new - sectors;
old = m->block_sectors[p.block];
m->block_sectors[p.block] += sectors;
new = m->block_sectors[p.block];
blocks_nonempty_delta = (int) !!new - (int) !!old;
if (!blocks_nonempty_delta)
return 0;
if (blocks_nonempty_delta) {
m->blocks_nonempty += blocks_nonempty_delta;
atomic_add(blocks_nonempty_delta, &m->blocks_nonempty);
if (!gc)
bch2_stripes_heap_update(c, m, p.idx);
}
BUG_ON(atomic_read(&m->blocks_nonempty) < 0);
m->dirty = true;
if (!gc)
bch2_stripes_heap_update(c, m, p.idx);
spin_unlock(&c->ec_stripes_heap_lock);
update_replicas(c, fs_usage, &m->r.e, sectors);
@ -721,8 +728,6 @@ static void bucket_set_stripe(struct bch_fs *c,
new.journal_seq = journal_seq;
}
}));
BUG_ON(old.stripe == enabled);
}
}
@ -737,22 +742,19 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
unsigned i;
spin_lock(&c->ec_stripes_heap_lock);
if (!m || (!inserting && !m->alive)) {
spin_unlock(&c->ec_stripes_heap_lock);
bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
idx);
return -1;
}
if (inserting && m->alive) {
bch_err_ratelimited(c, "error marking stripe %zu: already exists",
idx);
return -1;
}
if (m->alive)
bch2_stripes_heap_del(c, m, idx);
BUG_ON(atomic_read(&m->blocks_nonempty));
for (i = 0; i < EC_STRIPE_MAX; i++)
BUG_ON(atomic_read(&m->block_sectors[i]));
memset(m, 0, sizeof(*m));
if (inserting) {
m->sectors = le16_to_cpu(s.v->sectors);
@ -768,7 +770,6 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
for (i = 0; i < s.v->nr_blocks; i++)
m->r.e.devs[i] = s.v->ptrs[i].dev;
}
/*
* XXX: account for stripes somehow here
@ -777,15 +778,23 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
update_replicas(c, fs_usage, &m->r.e, stripe_sectors);
#endif
if (!gc) {
if (inserting)
/* gc recalculates these fields: */
if (!(flags & BCH_BUCKET_MARK_GC)) {
for (i = 0; i < s.v->nr_blocks; i++) {
m->block_sectors[i] =
stripe_blockcount_get(s.v, i);
m->blocks_nonempty += !!m->block_sectors[i];
}
}
if (!gc)
bch2_stripes_heap_insert(c, m, idx);
else
bch2_stripes_heap_del(c, m, idx);
} else {
m->alive = inserting;
m->alive = true;
}
spin_unlock(&c->ec_stripes_heap_lock);
bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc);
return 0;
}

View file

@ -12,6 +12,7 @@
#include "ec.h"
#include "error.h"
#include "io.h"
#include "journal_io.h"
#include "keylist.h"
#include "super-io.h"
#include "util.h"
@ -99,40 +100,6 @@ struct ec_bio {
/* Stripes btree keys: */
static unsigned stripe_csums_per_device(const struct bch_stripe *s)
{
return DIV_ROUND_UP(le16_to_cpu(s->sectors),
1 << s->csum_granularity_bits);
}
static unsigned stripe_csum_offset(const struct bch_stripe *s,
unsigned dev, unsigned csum_idx)
{
unsigned csum_bytes = bch_crc_bytes[s->csum_type];
return sizeof(struct bch_stripe) +
sizeof(struct bch_extent_ptr) * s->nr_blocks +
(dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes;
}
static unsigned stripe_blockcount_offset(const struct bch_stripe *s,
unsigned idx)
{
return stripe_csum_offset(s, s->nr_blocks, 0) +
sizeof(16) * idx;
}
static unsigned stripe_val_u64s(const struct bch_stripe *s)
{
return DIV_ROUND_UP(stripe_blockcount_offset(s, s->nr_blocks),
sizeof(u64));
}
static void *stripe_csum(struct bch_stripe *s, unsigned dev, unsigned csum_idx)
{
return (void *) s + stripe_csum_offset(s, dev, csum_idx);
}
const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
@ -165,8 +132,9 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
1U << s->csum_granularity_bits);
for (i = 0; i < s->nr_blocks; i++)
pr_buf(out, " %u:%llu", s->ptrs[i].dev,
(u64) s->ptrs[i].offset);
pr_buf(out, " %u:%llu:%u", s->ptrs[i].dev,
(u64) s->ptrs[i].offset,
stripe_blockcount_get(s, i));
}
static int ptr_matches_stripe(struct bch_fs *c,
@ -610,29 +578,15 @@ static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
BUG_ON(h->data[m->heap_idx].idx != idx);
}
static inline unsigned stripe_entry_blocks(struct stripe *m)
{
return atomic_read(&m->blocks_nonempty);
}
void bch2_stripes_heap_update(struct bch_fs *c,
struct stripe *m, size_t idx)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
bool queue_delete;
size_t i;
spin_lock(&c->ec_stripes_heap_lock);
if (!m->alive) {
spin_unlock(&c->ec_stripes_heap_lock);
return;
}
heap_verify_backpointer(c, idx);
h->data[m->heap_idx].blocks_nonempty =
stripe_entry_blocks(m);
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
i = m->heap_idx;
heap_sift_up(h, i, ec_stripes_heap_cmp,
@ -642,44 +596,35 @@ void bch2_stripes_heap_update(struct bch_fs *c,
heap_verify_backpointer(c, idx);
queue_delete = stripe_idx_to_delete(c) >= 0;
spin_unlock(&c->ec_stripes_heap_lock);
if (queue_delete)
if (stripe_idx_to_delete(c) >= 0)
schedule_work(&c->ec_stripe_delete_work);
}
void bch2_stripes_heap_del(struct bch_fs *c,
struct stripe *m, size_t idx)
{
spin_lock(&c->ec_stripes_heap_lock);
heap_verify_backpointer(c, idx);
m->alive = false;
heap_del(&c->ec_stripes_heap, m->heap_idx,
ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
spin_unlock(&c->ec_stripes_heap_lock);
}
void bch2_stripes_heap_insert(struct bch_fs *c,
struct stripe *m, size_t idx)
{
spin_lock(&c->ec_stripes_heap_lock);
BUG_ON(heap_full(&c->ec_stripes_heap));
heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
.idx = idx,
.blocks_nonempty = stripe_entry_blocks(m),
.blocks_nonempty = m->blocks_nonempty,
}),
ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
m->alive = true;
heap_verify_backpointer(c, idx);
spin_unlock(&c->ec_stripes_heap_lock);
}
/* stripe deletion */
@ -1218,6 +1163,116 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
mutex_unlock(&c->ec_new_stripe_lock);
}
static int __bch2_stripe_write_key(struct bch_fs *c,
struct btree_iter *iter,
struct stripe *m,
size_t idx,
struct bkey_i_stripe *new_key,
unsigned flags)
{
struct bkey_s_c k;
unsigned i;
int ret;
bch2_btree_iter_set_pos(iter, POS(0, idx));
k = bch2_btree_iter_peek_slot(iter);
ret = btree_iter_err(k);
if (ret)
return ret;
if (k.k->type != KEY_TYPE_stripe)
return -EIO;
bkey_reassemble(&new_key->k_i, k);
spin_lock(&c->ec_stripes_heap_lock);
for (i = 0; i < new_key->v.nr_blocks; i++)
stripe_blockcount_set(&new_key->v, i,
m->block_sectors[i]);
m->dirty = false;
spin_unlock(&c->ec_stripes_heap_lock);
return bch2_btree_insert_at(c, NULL, NULL,
BTREE_INSERT_NOFAIL|flags,
BTREE_INSERT_ENTRY(iter, &new_key->k_i));
}
int bch2_stripes_write(struct bch_fs *c, bool *wrote)
{
struct btree_iter iter;
struct genradix_iter giter;
struct bkey_i_stripe *new_key;
struct stripe *m;
int ret = 0;
new_key = kmalloc(255 * sizeof(u64), GFP_KERNEL);
BUG_ON(!new_key);
bch2_btree_iter_init(&iter, c, BTREE_ID_EC, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
genradix_for_each(&c->stripes[0], giter, m) {
if (!m->dirty)
continue;
ret = __bch2_stripe_write_key(c, &iter, m, giter.pos,
new_key, BTREE_INSERT_NOCHECK_RW);
if (ret)
break;
*wrote = true;
}
bch2_btree_iter_unlock(&iter);
kfree(new_key);
return ret;
}
static void bch2_stripe_read_key(struct bch_fs *c, struct bkey_s_c k)
{
struct gc_pos pos = { 0 };
bch2_mark_key(c, k, true, 0, pos, NULL, 0, 0);
}
int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list)
{
struct journal_replay *r;
struct btree_iter iter;
struct bkey_s_c k;
int ret;
ret = bch2_fs_ec_start(c);
if (ret)
return ret;
for_each_btree_key(&iter, c, BTREE_ID_EC, POS_MIN, 0, k) {
bch2_stripe_read_key(c, k);
bch2_btree_iter_cond_resched(&iter);
}
ret = bch2_btree_iter_unlock(&iter);
if (ret)
return ret;
list_for_each_entry(r, journal_replay_list, list) {
struct bkey_i *k, *n;
struct jset_entry *entry;
for_each_jset_key(k, n, entry, &r->j)
if (entry->btree_id == BTREE_ID_EC)
bch2_stripe_read_key(c, bkey_i_to_s_c(k));
}
return 0;
}
int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
{
struct btree_iter iter;

View file

@ -14,6 +14,55 @@ void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
.val_to_text = bch2_stripe_to_text, \
}
static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
{
return DIV_ROUND_UP(le16_to_cpu(s->sectors),
1 << s->csum_granularity_bits);
}
static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
unsigned dev, unsigned csum_idx)
{
unsigned csum_bytes = bch_crc_bytes[s->csum_type];
return sizeof(struct bch_stripe) +
sizeof(struct bch_extent_ptr) * s->nr_blocks +
(dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes;
}
static inline unsigned stripe_blockcount_offset(const struct bch_stripe *s,
unsigned idx)
{
return stripe_csum_offset(s, s->nr_blocks, 0) +
sizeof(u16) * idx;
}
static inline unsigned stripe_blockcount_get(const struct bch_stripe *s,
unsigned idx)
{
return le16_to_cpup((void *) s + stripe_blockcount_offset(s, idx));
}
static inline void stripe_blockcount_set(struct bch_stripe *s,
unsigned idx, unsigned v)
{
__le16 *p = (void *) s + stripe_blockcount_offset(s, idx);
*p = cpu_to_le16(v);
}
static inline unsigned stripe_val_u64s(const struct bch_stripe *s)
{
return DIV_ROUND_UP(stripe_blockcount_offset(s, s->nr_blocks),
sizeof(u64));
}
static inline void *stripe_csum(struct bch_stripe *s,
unsigned dev, unsigned csum_idx)
{
return (void *) s + stripe_csum_offset(s, dev, csum_idx);
}
struct bch_read_bio;
struct ec_stripe_buf {
@ -101,6 +150,9 @@ void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
void bch2_ec_flush_new_stripes(struct bch_fs *);
int bch2_stripes_read(struct bch_fs *, struct list_head *);
int bch2_stripes_write(struct bch_fs *, bool *);
int bch2_ec_mem_alloc(struct bch_fs *, bool);
int bch2_fs_ec_start(struct bch_fs *);

View file

@ -20,9 +20,10 @@ struct stripe {
u8 nr_blocks;
u8 nr_redundant;
u8 alive;
atomic_t blocks_nonempty;
atomic_t block_sectors[EC_STRIPE_MAX];
unsigned alive:1;
unsigned dirty:1;
u8 blocks_nonempty;
u16 block_sectors[EC_STRIPE_MAX];
struct bch_replicas_padded r;
};

View file

@ -215,12 +215,12 @@ int bch2_fs_recovery(struct bch_fs *c)
if (ret)
goto err;
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
err = "cannot allocate memory";
ret = bch2_fs_ec_start(c);
ret = bch2_stripes_read(c, &journal);
if (ret)
goto err;
pr_info("stripes_read done");
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
bch_verbose(c, "starting mark and sweep:");
err = "error in recovery";

View file

@ -198,6 +198,12 @@ static void __bch2_fs_read_only(struct bch_fs *c)
break;
}
ret = bch2_stripes_write(c, &wrote);
if (ret) {
bch2_fs_inconsistent(c, "error writing out stripes");
break;
}
for_each_member_device(ca, c, i)
bch2_dev_allocator_quiesce(c, ca);