diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 19833b4af630..93270e20a8e7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1149,12 +1149,6 @@ struct btrfs_fs_info { struct mutex unused_bg_unpin_mutex; struct mutex delete_unused_bgs_mutex; - /* - * Chunks that can't be freed yet (under a trim/discard operation) - * and will be latter freed. Protected by fs_info->chunk_mutex. - */ - struct list_head pinned_chunks; - /* Cached block sizes */ u32 nodesize; u32 sectorsize; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8c10702a3f83..0b2b75a7efbd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2774,8 +2774,6 @@ int open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->async_submit_wait); init_waitqueue_head(&fs_info->delayed_iputs_wait); - INIT_LIST_HEAD(&fs_info->pinned_chunks); - /* Usable values until the real ones are cached from the superblock */ fs_info->nodesize = 4096; fs_info->sectorsize = 4096; @@ -4050,15 +4048,6 @@ void close_ctree(struct btrfs_fs_info *fs_info) btrfs_free_stripe_hash_table(fs_info); btrfs_free_ref_cache(fs_info); - - while (!list_empty(&fs_info->pinned_chunks)) { - struct extent_map *em; - - em = list_first_entry(&fs_info->pinned_chunks, - struct extent_map, list); - list_del_init(&em->list); - free_extent_map(em); - } } int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c5f9e8359c6f..a9f504e7be33 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10946,10 +10946,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, memcpy(&key, &block_group->key, sizeof(key)); mutex_lock(&fs_info->chunk_mutex); - if (!list_empty(&em->list)) { - /* We're in the transaction->pending_chunks list. */ - free_extent_map(em); - } spin_lock(&block_group->lock); block_group->removed = 1; /* @@ -10976,25 +10972,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, * the transaction commit has completed. */ remove_em = (atomic_read(&block_group->trimming) == 0); - /* - * Make sure a trimmer task always sees the em in the pinned_chunks list - * if it sees block_group->removed == 1 (needs to lock block_group->lock - * before checking block_group->removed). - */ - if (!remove_em) { - /* - * Our em might be in trans->transaction->pending_chunks which - * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks), - * and so is the fs_info->pinned_chunks list. - * - * So at this point we must be holding the chunk_mutex to avoid - * any races with chunk allocation (more specifically at - * volumes.c:contains_pending_extent()), to ensure it always - * sees the em, either in the pending_chunks list or in the - * pinned_chunks list. - */ - list_move_tail(&em->list, &fs_info->pinned_chunks); - } spin_unlock(&block_group->lock); if (remove_em) { @@ -11002,11 +10979,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, em_tree = &fs_info->mapping_tree.map_tree; write_lock(&em_tree->lock); - /* - * The em might be in the pending_chunks list, so make sure the - * chunk mutex is locked, since remove_extent_mapping() will - * delete us from that list. - */ remove_extent_mapping(em_tree, em); write_unlock(&em_tree->lock); /* once for the tree */ diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 928f729c55ba..5a79a656dfa6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -4,6 +4,7 @@ #include #include #include "ctree.h" +#include "volumes.h" #include "extent_map.h" #include "compression.h" @@ -337,6 +338,37 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree, try_merge_map(tree, em); } +static void extent_map_device_set_bits(struct extent_map *em, unsigned bits) +{ + struct map_lookup *map = em->map_lookup; + u64 stripe_size = em->orig_block_len; + int i; + + for (i = 0; i < map->num_stripes; i++) { + struct btrfs_bio_stripe *stripe = &map->stripes[i]; + struct btrfs_device *device = stripe->dev; + + set_extent_bits_nowait(&device->alloc_state, stripe->physical, + stripe->physical + stripe_size - 1, bits); + } +} + +static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits) +{ + struct map_lookup *map = em->map_lookup; + u64 stripe_size = em->orig_block_len; + int i; + + for (i = 0; i < map->num_stripes; i++) { + struct btrfs_bio_stripe *stripe = &map->stripes[i]; + struct btrfs_device *device = stripe->dev; + + __clear_extent_bit(&device->alloc_state, stripe->physical, + stripe->physical + stripe_size - 1, bits, + 0, 0, NULL, GFP_NOWAIT, NULL); + } +} + /** * add_extent_mapping - add new extent map to the extent tree * @tree: tree to insert new map in @@ -357,6 +389,8 @@ int add_extent_mapping(struct extent_map_tree *tree, goto out; setup_extent_mapping(tree, em, modified); + if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) + extent_map_device_set_bits(em, CHUNK_ALLOCATED); out: return ret; } @@ -438,6 +472,8 @@ void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) rb_erase_cached(&em->rb_node, &tree->map); if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) list_del_init(&em->list); + if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) + extent_map_device_clear_bits(em, CHUNK_ALLOCATED); RB_CLEAR_NODE(&em->rb_node); } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 74aa552f4793..207fb50dcc7a 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3366,10 +3366,6 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) em = lookup_extent_mapping(em_tree, block_group->key.objectid, 1); BUG_ON(!em); /* logic error, can't happen */ - /* - * remove_extent_mapping() will delete us from the pinned_chunks - * list, which is protected by the chunk mutex. - */ remove_extent_mapping(em_tree, em); write_unlock(&em_tree->lock); mutex_unlock(&fs_info->chunk_mutex); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b32769998bbb..e5404326fc55 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -50,14 +50,6 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) btrfs_err(transaction->fs_info, "pending csums is %llu", transaction->delayed_refs.pending_csums); - while (!list_empty(&transaction->pending_chunks)) { - struct extent_map *em; - - em = list_first_entry(&transaction->pending_chunks, - struct extent_map, list); - list_del_init(&em->list); - free_extent_map(em); - } /* * If any block groups are found in ->deleted_bgs then it's * because the transaction was aborted and a commit did not @@ -235,7 +227,6 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, spin_lock_init(&cur_trans->delayed_refs.lock); INIT_LIST_HEAD(&cur_trans->pending_snapshots); - INIT_LIST_HEAD(&cur_trans->pending_chunks); INIT_LIST_HEAD(&cur_trans->dev_update_list); INIT_LIST_HEAD(&cur_trans->switch_commits); INIT_LIST_HEAD(&cur_trans->dirty_bgs); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 2bd76f681520..4419a4a0294b 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -51,7 +51,6 @@ struct btrfs_transaction { wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; struct list_head pending_snapshots; - struct list_head pending_chunks; struct list_head dev_update_list; struct list_head switch_commits; struct list_head dirty_bgs; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2bde9e9c188e..2e5e48d8dd2f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -336,6 +336,7 @@ void btrfs_free_device(struct btrfs_device *device) { WARN_ON(!list_empty(&device->post_commit_list)); rcu_string_free(device->name); + extent_io_tree_release(&device->alloc_state); bio_put(device->flush_bio); kfree(device); } @@ -412,6 +413,7 @@ static struct btrfs_device *__alloc_device(void) btrfs_device_data_ordered_init(dev); INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); + extent_io_tree_init(NULL, &dev->alloc_state, 0, NULL); return dev; } @@ -1499,58 +1501,30 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, return device; } -static int contains_pending_extent(struct btrfs_transaction *transaction, - struct btrfs_device *device, - u64 *start, u64 len) +/* + * Try to find a chunk that intersects [start, start + len] range and when one + * such is found, record the end of it in *start + */ +#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len)) +static bool contains_pending_extent(struct btrfs_device *device, u64 *start, + u64 len) { - struct btrfs_fs_info *fs_info = device->fs_info; - struct extent_map *em; - struct list_head *search_list = &fs_info->pinned_chunks; - int ret = 0; - u64 physical_start = *start; + u64 physical_start, physical_end; - if (transaction) - search_list = &transaction->pending_chunks; -again: - list_for_each_entry(em, search_list, list) { - struct map_lookup *map; - int i; + lockdep_assert_held(&device->fs_info->chunk_mutex); - map = em->map_lookup; - for (i = 0; i < map->num_stripes; i++) { - u64 end; + if (!find_first_extent_bit(&device->alloc_state, *start, + &physical_start, &physical_end, + CHUNK_ALLOCATED, NULL)) { - if (map->stripes[i].dev != device) - continue; - if (map->stripes[i].physical >= physical_start + len || - map->stripes[i].physical + em->orig_block_len <= - physical_start) - continue; - /* - * Make sure that while processing the pinned list we do - * not override our *start with a lower value, because - * we can have pinned chunks that fall within this - * device hole and that have lower physical addresses - * than the pending chunks we processed before. If we - * do not take this special care we can end up getting - * 2 pending chunks that start at the same physical - * device offsets because the end offset of a pinned - * chunk can be equal to the start offset of some - * pending chunk. - */ - end = map->stripes[i].physical + em->orig_block_len; - if (end > *start) { - *start = end; - ret = 1; - } + if (in_range(physical_start, *start, len) || + in_range(*start, physical_start, + physical_end - physical_start)) { + *start = physical_end + 1; + return true; } } - if (search_list != &fs_info->pinned_chunks) { - search_list = &fs_info->pinned_chunks; - goto again; - } - - return ret; + return false; } @@ -1661,15 +1635,12 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction, * Have to check before we set max_hole_start, otherwise * we could end up sending back this offset anyway. */ - if (contains_pending_extent(transaction, device, - &search_start, + if (contains_pending_extent(device, &search_start, hole_size)) { - if (key.offset >= search_start) { + if (key.offset >= search_start) hole_size = key.offset - search_start; - } else { - WARN_ON_ONCE(1); + else hole_size = 0; - } } if (hole_size > max_hole_size) { @@ -1710,8 +1681,7 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction, if (search_end > search_start) { hole_size = search_end - search_start; - if (contains_pending_extent(transaction, device, &search_start, - hole_size)) { + if (contains_pending_extent(device, &search_start, hole_size)) { btrfs_release_path(path); goto again; } @@ -4756,7 +4726,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) * in-memory chunks are synced to disk so that the loop below sees them * and relocates them accordingly. */ - if (contains_pending_extent(trans->transaction, device, &start, diff)) { + if (contains_pending_extent(device, &start, diff)) { mutex_unlock(&fs_info->chunk_mutex); ret = btrfs_commit_transaction(trans); if (ret) @@ -5189,9 +5159,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, free_extent_map(em); goto error; } - - list_add_tail(&em->list, &trans->transaction->pending_chunks); - refcount_inc(&em->refs); write_unlock(&em_tree->lock); ret = btrfs_make_block_group(trans, 0, type, start, chunk_size); @@ -5224,8 +5191,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, free_extent_map(em); /* One for the tree reference */ free_extent_map(em); - /* One for the pending_chunks list reference */ - free_extent_map(em); error: kfree(devices_info); return ret; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d3658a4e65db..6c466ac27c2e 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -133,6 +133,8 @@ struct btrfs_device { /* Counter to record the change of device stats */ atomic_t dev_stats_ccnt; atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; + + struct extent_io_tree alloc_state; }; /*