btrfs: remove root usage from can_overcommit

can_overcommit using the root to determine the allocation profile
is the only use of a root in the call graph below reserve_metadata_bytes.

It turns out that we only need to know whether the allocation is for
the chunk root or not -- and we can pass that around as a bool instead.

This allows us to pull root usage out of the reservation path all the
way up to reserve_metadata_bytes itself, which uses it only to compare
against fs_info->chunk_root to set the bool.  In turn, this eliminates
a bunch of races where we use a particular root too early in the mount
process.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Jeff Mahoney 2017-05-17 11:38:36 -04:00 committed by David Sterba
parent 1b86826d12
commit c1c4919b11

View file

@ -97,10 +97,11 @@ static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int delalloc); u64 num_bytes, int delalloc);
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes); u64 num_bytes);
static int __reserve_metadata_bytes(struct btrfs_root *root, static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, struct btrfs_space_info *space_info,
u64 orig_bytes, u64 orig_bytes,
enum btrfs_reserve_flush_enum flush); enum btrfs_reserve_flush_enum flush,
bool system_chunk);
static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, struct btrfs_space_info *space_info,
u64 num_bytes); u64 num_bytes);
@ -4628,11 +4629,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
return ret; return ret;
} }
static int can_overcommit(struct btrfs_root *root, static int can_overcommit(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 bytes, struct btrfs_space_info *space_info, u64 bytes,
enum btrfs_reserve_flush_enum flush) enum btrfs_reserve_flush_enum flush,
bool system_chunk)
{ {
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
u64 profile; u64 profile;
u64 space_size; u64 space_size;
@ -4643,7 +4644,11 @@ static int can_overcommit(struct btrfs_root *root,
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
return 0; return 0;
profile = get_alloc_profile_by_root(root, 0); if (system_chunk)
profile = btrfs_system_alloc_profile(fs_info);
else
profile = btrfs_metadata_alloc_profile(fs_info);
used = btrfs_space_info_used(space_info, false); used = btrfs_space_info_used(space_info, false);
/* /*
@ -4728,10 +4733,9 @@ static inline int calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
/* /*
* shrink metadata reservation for delalloc * shrink metadata reservation for delalloc
*/ */
static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
bool wait_ordered) u64 orig, bool wait_ordered)
{ {
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info; struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
@ -4788,7 +4792,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
else else
flush = BTRFS_RESERVE_NO_FLUSH; flush = BTRFS_RESERVE_NO_FLUSH;
spin_lock(&space_info->lock); spin_lock(&space_info->lock);
if (can_overcommit(root, space_info, orig, flush)) { if (can_overcommit(fs_info, space_info, orig, flush, false)) {
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
break; break;
} }
@ -4898,7 +4902,7 @@ static int flush_space(struct btrfs_fs_info *fs_info,
break; break;
case FLUSH_DELALLOC: case FLUSH_DELALLOC:
case FLUSH_DELALLOC_WAIT: case FLUSH_DELALLOC_WAIT:
shrink_delalloc(root, num_bytes * 2, orig_bytes, shrink_delalloc(fs_info, num_bytes * 2, orig_bytes,
state == FLUSH_DELALLOC_WAIT); state == FLUSH_DELALLOC_WAIT);
break; break;
case ALLOC_CHUNK: case ALLOC_CHUNK:
@ -4929,8 +4933,9 @@ static int flush_space(struct btrfs_fs_info *fs_info,
} }
static inline u64 static inline u64
btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info) struct btrfs_space_info *space_info,
bool system_chunk)
{ {
struct reserve_ticket *ticket; struct reserve_ticket *ticket;
u64 used; u64 used;
@ -4945,14 +4950,15 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
return to_reclaim; return to_reclaim;
to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
if (can_overcommit(root, space_info, to_reclaim, if (can_overcommit(fs_info, space_info, to_reclaim,
BTRFS_RESERVE_FLUSH_ALL)) BTRFS_RESERVE_FLUSH_ALL, system_chunk))
return 0; return 0;
used = space_info->bytes_used + space_info->bytes_reserved + used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly + space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use; space_info->bytes_may_use;
if (can_overcommit(root, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL)) if (can_overcommit(fs_info, space_info, SZ_1M,
BTRFS_RESERVE_FLUSH_ALL, system_chunk))
expected = div_factor_fine(space_info->total_bytes, 95); expected = div_factor_fine(space_info->total_bytes, 95);
else else
expected = div_factor_fine(space_info->total_bytes, 90); expected = div_factor_fine(space_info->total_bytes, 90);
@ -4966,17 +4972,18 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
return to_reclaim; return to_reclaim;
} }
static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
struct btrfs_root *root, u64 used) struct btrfs_space_info *space_info,
u64 used, bool system_chunk)
{ {
struct btrfs_fs_info *fs_info = root->fs_info;
u64 thresh = div_factor_fine(space_info->total_bytes, 98); u64 thresh = div_factor_fine(space_info->total_bytes, 98);
/* If we're just plain full then async reclaim just slows us down. */ /* If we're just plain full then async reclaim just slows us down. */
if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
return 0; return 0;
if (!btrfs_calc_reclaim_metadata_size(root, space_info)) if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
system_chunk))
return 0; return 0;
return (used >= thresh && !btrfs_fs_closing(fs_info) && return (used >= thresh && !btrfs_fs_closing(fs_info) &&
@ -5013,8 +5020,8 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
spin_lock(&space_info->lock); spin_lock(&space_info->lock);
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
space_info); false);
if (!to_reclaim) { if (!to_reclaim) {
space_info->flush = 0; space_info->flush = 0;
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
@ -5036,8 +5043,9 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
return; return;
} }
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
space_info); space_info,
false);
ticket = list_first_entry(&space_info->tickets, ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list); struct reserve_ticket, list);
if (last_tickets_id == space_info->tickets_id) { if (last_tickets_id == space_info->tickets_id) {
@ -5075,8 +5083,8 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
int flush_state = FLUSH_DELAYED_ITEMS_NR; int flush_state = FLUSH_DELAYED_ITEMS_NR;
spin_lock(&space_info->lock); spin_lock(&space_info->lock);
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->extent_root, to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
space_info); false);
if (!to_reclaim) { if (!to_reclaim) {
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
return; return;
@ -5155,12 +5163,12 @@ static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
* regain reservations will be made and this will fail if there is not enough * regain reservations will be made and this will fail if there is not enough
* space already. * space already.
*/ */
static int __reserve_metadata_bytes(struct btrfs_root *root, static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, struct btrfs_space_info *space_info,
u64 orig_bytes, u64 orig_bytes,
enum btrfs_reserve_flush_enum flush) enum btrfs_reserve_flush_enum flush,
bool system_chunk)
{ {
struct btrfs_fs_info *fs_info = root->fs_info;
struct reserve_ticket ticket; struct reserve_ticket ticket;
u64 used; u64 used;
int ret = 0; int ret = 0;
@ -5182,7 +5190,8 @@ static int __reserve_metadata_bytes(struct btrfs_root *root,
trace_btrfs_space_reservation(fs_info, "space_info", trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, orig_bytes, 1); space_info->flags, orig_bytes, 1);
ret = 0; ret = 0;
} else if (can_overcommit(root, space_info, orig_bytes, flush)) { } else if (can_overcommit(fs_info, space_info, orig_bytes, flush,
system_chunk)) {
space_info->bytes_may_use += orig_bytes; space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(fs_info, "space_info", trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, orig_bytes, 1); space_info->flags, orig_bytes, 1);
@ -5209,7 +5218,7 @@ static int __reserve_metadata_bytes(struct btrfs_root *root,
orig_bytes, flush, orig_bytes, flush,
"enospc"); "enospc");
queue_work(system_unbound_wq, queue_work(system_unbound_wq,
&root->fs_info->async_reclaim_work); &fs_info->async_reclaim_work);
} }
} else { } else {
list_add_tail(&ticket.list, list_add_tail(&ticket.list,
@ -5223,7 +5232,8 @@ static int __reserve_metadata_bytes(struct btrfs_root *root,
* the async reclaim as we will panic. * the async reclaim as we will panic.
*/ */
if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) && if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
need_do_async_reclaim(space_info, root, used) && need_do_async_reclaim(fs_info, space_info,
used, system_chunk) &&
!work_busy(&fs_info->async_reclaim_work)) { !work_busy(&fs_info->async_reclaim_work)) {
trace_btrfs_trigger_flush(fs_info, space_info->flags, trace_btrfs_trigger_flush(fs_info, space_info->flags,
orig_bytes, flush, "preempt"); orig_bytes, flush, "preempt");
@ -5281,9 +5291,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
int ret; int ret;
bool system_chunk = (root == fs_info->chunk_root);
ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes, ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
flush); orig_bytes, flush, system_chunk);
if (ret == -ENOSPC && if (ret == -ENOSPC &&
unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
if (block_rsv != global_rsv && if (block_rsv != global_rsv &&
@ -5406,8 +5417,7 @@ static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
* adding the ticket space would be a double count. * adding the ticket space would be a double count.
*/ */
if (check_overcommit && if (check_overcommit &&
!can_overcommit(fs_info->extent_root, space_info, 0, !can_overcommit(fs_info, space_info, 0, flush, false))
flush))
break; break;
if (num_bytes >= ticket->bytes) { if (num_bytes >= ticket->bytes) {
list_del_init(&ticket->list); list_del_init(&ticket->list);