diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 6ba15c45e779..ba71a84fd1ee 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1246,6 +1246,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) struct btrfs_block_group *block_group; struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; + const bool async_trim_enabled = btrfs_test_opt(fs_info, DISCARD_ASYNC); int ret = 0; if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) @@ -1275,6 +1276,22 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); + + /* + * Async discard moves the final block group discard to be prior + * to the unused_bgs code path. Therefore, if it's not fully + * trimmed, punt it back to the async discard lists. + */ + if (btrfs_test_opt(fs_info, DISCARD_ASYNC) && + !btrfs_is_free_space_trimmed(block_group)) { + trace_btrfs_skip_unused_block_group(block_group); + up_write(&space_info->groups_sem); + /* Requeue if we failed because of async discard */ + btrfs_discard_queue_work(&fs_info->discard_ctl, + block_group); + goto next; + } + spin_lock(&block_group->lock); if (block_group->reserved || block_group->pinned || block_group->used || block_group->ro || @@ -1378,6 +1395,16 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_unlock(&block_group->lock); spin_unlock(&space_info->lock); + /* + * The normal path here is an unused block group is passed here, + * then trimming is handled in the transaction commit path. + * Async discard interposes before this to do the trimming + * before coming down the unused block group path as trimming + * will no longer be done later in the transaction commit path. + */ + if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC)) + goto flip_async; + /* DISCARD can flip during remount */ trimming = btrfs_test_opt(fs_info, DISCARD_SYNC); @@ -1422,6 +1449,13 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->unused_bgs_lock); } spin_unlock(&fs_info->unused_bgs_lock); + return; + +flip_async: + btrfs_end_transaction(trans); + mutex_unlock(&fs_info->delete_unused_bgs_mutex); + btrfs_put_block_group(block_group); + btrfs_discard_punt_unused_bgs_list(fs_info); } void btrfs_mark_bg_unused(struct btrfs_block_group *bg) @@ -1626,6 +1660,8 @@ static struct btrfs_block_group *btrfs_create_block_group_cache( cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start); set_free_space_tree_thresholds(cache); + cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED; + atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); init_rwsem(&cache->data_rwsem); @@ -1792,7 +1828,10 @@ static int read_one_block_group(struct btrfs_fs_info *info, inc_block_group_ro(cache, 1); } else if (cache->used == 0) { ASSERT(list_empty(&cache->bg_list)); - btrfs_mark_bg_unused(cache); + if (btrfs_test_opt(info, DISCARD_ASYNC)) + btrfs_discard_queue_work(&info->discard_ctl, cache); + else + btrfs_mark_bg_unused(cache); } return 0; error: @@ -2755,8 +2794,10 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, * dirty list to avoid races between cleaner kthread and space * cache writeout. */ - if (!alloc && old_val == 0) - btrfs_mark_bg_unused(cache); + if (!alloc && old_val == 0) { + if (!btrfs_test_opt(info, DISCARD_ASYNC)) + btrfs_mark_bg_unused(cache); + } btrfs_put_block_group(cache); total -= num_bytes; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f7b429277089..d15a4aa721aa 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -443,9 +443,14 @@ struct btrfs_full_stripe_locks_tree { /* Discard control. */ /* * Async discard uses multiple lists to differentiate the discard filter - * parameters. + * parameters. Index 0 is for completely free block groups where we need to + * ensure the entire block group is trimmed without being lossy. Indices + * afterwards represent monotonically decreasing discard filter sizes to + * prioritize what should be discarded next. */ -#define BTRFS_NR_DISCARD_LISTS 1 +#define BTRFS_NR_DISCARD_LISTS 2 +#define BTRFS_DISCARD_INDEX_UNUSED 0 +#define BTRFS_DISCARD_INDEX_START 1 struct btrfs_discard_ctl { struct workqueue_struct *discard_workers; diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index 5924e757471b..0f1c6d01aab0 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -13,6 +13,7 @@ /* This is an initial delay to give some chance for block reuse */ #define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC) +#define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC) static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl, struct btrfs_block_group *block_group) @@ -30,9 +31,13 @@ static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, return; } - if (list_empty(&block_group->discard_list)) + if (list_empty(&block_group->discard_list) || + block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) { + if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) + block_group->discard_index = BTRFS_DISCARD_INDEX_START; block_group->discard_eligible_time = (ktime_get_ns() + BTRFS_DISCARD_DELAY); + } list_move_tail(&block_group->discard_list, get_discard_list(discard_ctl, block_group)); @@ -40,6 +45,27 @@ static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, spin_unlock(&discard_ctl->lock); } +static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl, + struct btrfs_block_group *block_group) +{ + spin_lock(&discard_ctl->lock); + + if (!btrfs_run_discard_work(discard_ctl)) { + spin_unlock(&discard_ctl->lock); + return; + } + + list_del_init(&block_group->discard_list); + + block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED; + block_group->discard_eligible_time = (ktime_get_ns() + + BTRFS_DISCARD_UNUSED_DELAY); + list_add_tail(&block_group->discard_list, + &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]); + + spin_unlock(&discard_ctl->lock); +} + static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl, struct btrfs_block_group *block_group) { @@ -154,7 +180,10 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl, if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) return; - add_to_discard_list(discard_ctl, block_group); + if (block_group->used == 0) + add_to_discard_unused_list(discard_ctl, block_group); + else + add_to_discard_list(discard_ctl, block_group); if (!delayed_work_pending(&discard_ctl->work)) btrfs_discard_schedule_work(discard_ctl, false); @@ -198,6 +227,29 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, spin_unlock(&discard_ctl->lock); } +/** + * btrfs_finish_discard_pass - determine next step of a block_group + * @discard_ctl: discard control + * @block_group: block_group of interest + * + * This determines the next step for a block group after it's finished going + * through a pass on a discard list. If it is unused and fully trimmed, we can + * mark it unused and send it to the unused_bgs path. Otherwise, pass it onto + * the appropriate filter list or let it fall off. + */ +static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl, + struct btrfs_block_group *block_group) +{ + remove_from_discard_list(discard_ctl, block_group); + + if (block_group->used == 0) { + if (btrfs_is_free_space_trimmed(block_group)) + btrfs_mark_bg_unused(block_group); + else + add_to_discard_unused_list(discard_ctl, block_group); + } +} + /** * btrfs_discard_workfn - discard work function * @work: work @@ -219,7 +271,7 @@ static void btrfs_discard_workfn(struct work_struct *work) btrfs_trim_block_group(block_group, &trimmed, block_group->start, btrfs_block_group_end(block_group), 0); - remove_from_discard_list(discard_ctl, block_group); + btrfs_finish_discard_pass(discard_ctl, block_group); btrfs_discard_schedule_work(discard_ctl, false); } @@ -239,6 +291,60 @@ bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl) test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags)); } +/** + * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists + * @fs_info: fs_info of interest + * + * The unused_bgs list needs to be punted to the discard lists because the + * order of operations is changed. In the normal sychronous discard path, the + * block groups are trimmed via a single large trim in transaction commit. This + * is ultimately what we are trying to avoid with asynchronous discard. Thus, + * it must be done before going down the unused_bgs path. + */ +void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info) +{ + struct btrfs_block_group *block_group, *next; + + spin_lock(&fs_info->unused_bgs_lock); + /* We enabled async discard, so punt all to the queue */ + list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs, + bg_list) { + list_del_init(&block_group->bg_list); + btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); + } + spin_unlock(&fs_info->unused_bgs_lock); +} + +/** + * btrfs_discard_purge_list - purge discard lists + * @discard_ctl: discard control + * + * If we are disabling async discard, we may have intercepted block groups that + * are completely free and ready for the unused_bgs path. As discarding will + * now happen in transaction commit or not at all, we can safely mark the + * corresponding block groups as unused and they will be sent on their merry + * way to the unused_bgs list. + */ +static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl) +{ + struct btrfs_block_group *block_group, *next; + int i; + + spin_lock(&discard_ctl->lock); + for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) { + list_for_each_entry_safe(block_group, next, + &discard_ctl->discard_list[i], + discard_list) { + list_del_init(&block_group->discard_list); + spin_unlock(&discard_ctl->lock); + if (block_group->used == 0) + btrfs_mark_bg_unused(block_group); + spin_lock(&discard_ctl->lock); + } + } + spin_unlock(&discard_ctl->lock); +} + void btrfs_discard_resume(struct btrfs_fs_info *fs_info) { if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) { @@ -246,6 +352,8 @@ void btrfs_discard_resume(struct btrfs_fs_info *fs_info) return; } + btrfs_discard_punt_unused_bgs_list(fs_info); + set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags); } @@ -270,4 +378,5 @@ void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info) { btrfs_discard_stop(fs_info); cancel_delayed_work_sync(&fs_info->discard_ctl.work); + btrfs_discard_purge_list(&fs_info->discard_ctl); } diff --git a/fs/btrfs/discard.h b/fs/btrfs/discard.h index f3775e84d35a..3c5a04f8714f 100644 --- a/fs/btrfs/discard.h +++ b/fs/btrfs/discard.h @@ -7,6 +7,7 @@ struct btrfs_fs_info; struct btrfs_discard_ctl; struct btrfs_block_group; +/* Work operations */ void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl, struct btrfs_block_group *block_group); void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl, @@ -15,6 +16,8 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, bool override); bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl); +/* Setup/cleanup operations */ +void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info); void btrfs_discard_resume(struct btrfs_fs_info *fs_info); void btrfs_discard_stop(struct btrfs_fs_info *fs_info); void btrfs_discard_init(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index fdc5401f3877..3c2796bb6498 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2707,6 +2707,37 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group) } +/** + * btrfs_is_free_space_trimmed - see if everything is trimmed + * @block_group: block_group of interest + * + * Walk @block_group's free space rb_tree to determine if everything is trimmed. + */ +bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_free_space *info; + struct rb_node *node; + bool ret = true; + + spin_lock(&ctl->tree_lock); + node = rb_first(&ctl->free_space_offset); + + while (node) { + info = rb_entry(node, struct btrfs_free_space, offset_index); + + if (!btrfs_free_space_trimmed(info)) { + ret = false; + break; + } + + node = rb_next(node); + } + + spin_unlock(&ctl->tree_lock); + return ret; +} + u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, u64 offset, u64 bytes, u64 empty_size, u64 *max_extent_size) @@ -2793,6 +2824,8 @@ int btrfs_return_cluster_to_free_space( ret = __btrfs_return_cluster_to_free_space(block_group, cluster); spin_unlock(&ctl->tree_lock); + btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group); + /* finally drop our ref */ btrfs_put_block_group(block_group); return ret; diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 6a85a5d16343..f799eb491410 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -119,6 +119,7 @@ int btrfs_remove_free_space(struct btrfs_block_group *block_group, u64 bytenr, u64 size); void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl); void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group); +bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group); u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, u64 offset, u64 bytes, u64 empty_size, u64 *max_extent_size); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 21de630b0730..22cf69e6e5bc 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -8,6 +8,7 @@ #include #include #include "ctree.h" +#include "discard.h" #include "volumes.h" #include "disk-io.h" #include "ordered-data.h" @@ -3659,7 +3660,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, if (!cache->removed && !cache->ro && cache->reserved == 0 && cache->used == 0) { spin_unlock(&cache->lock); - btrfs_mark_bg_unused(cache); + if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) + btrfs_discard_queue_work(&fs_info->discard_ctl, + cache); + else + btrfs_mark_bg_unused(cache); } else { spin_unlock(&cache->lock); }