Merge branch 'for-linus-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "This has Jeff Mahoney's long standing trim patch that fixes corners
  where trims were missing.  Omar has some raid5/6 fixes, especially for
  using scrub and device replace when devices are missing.

  Zhao Lie continues cleaning and fixing things, this series fixes some
  really hard to hit corners in xfstests.  I had to pull it last merge
  window due to some deadlocks, but those are now resolved.

  I added support for Tejun's new blkio controllers.  It seems to work
  well for single devices, we'll expand to multi-device as well"

* 'for-linus-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (47 commits)
  btrfs: fix compile when block cgroups are not enabled
  Btrfs: fix file read corruption after extent cloning and fsync
  Btrfs: check if previous transaction aborted to avoid fs corruption
  btrfs: use __GFP_NOFAIL in alloc_btrfs_bio
  btrfs: Prevent from early transaction abort
  btrfs: Remove unused arguments in tree-log.c
  btrfs: Remove useless condition in start_log_trans()
  Btrfs: add support for blkio controllers
  Btrfs: remove unused mutex from struct 'btrfs_fs_info'
  Btrfs: fix parity scrub of RAID 5/6 with missing device
  Btrfs: fix device replace of a missing RAID 5/6 device
  Btrfs: add RAID 5/6 BTRFS_RBIO_REBUILD_MISSING operation
  Btrfs: count devices correctly in readahead during RAID 5/6 replace
  Btrfs: remove misleading handling of missing device scrub
  btrfs: fix clone / extent-same deadlocks
  Btrfs: fix defrag to merge tail file extent
  Btrfs: fix warning in backref walking
  btrfs: Add WARN_ON() for double lock in btrfs_tree_lock()
  btrfs: Remove root argument in extent_data_ref_count()
  btrfs: Fix wrong comment of btrfs_alloc_tree_block()
  ...
This commit is contained in:
Linus Torvalds 2015-09-05 15:14:43 -07:00
commit 22365979ab
21 changed files with 1135 additions and 339 deletions

View file

@ -206,10 +206,33 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
return -ENOMEM;
ref->root_id = root_id;
if (key)
if (key) {
ref->key_for_search = *key;
else
/*
* We can often find data backrefs with an offset that is too
* large (>= LLONG_MAX, maximum allowed file offset) due to
* underflows when subtracting a file's offset with the data
* offset of its corresponding extent data item. This can
* happen for example in the clone ioctl.
* So if we detect such case we set the search key's offset to
* zero to make sure we will find the matching file extent item
* at add_all_parents(), otherwise we will miss it because the
* offset taken form the backref is much larger then the offset
* of the file extent item. This can make us scan a very large
* number of file extent items, but at least it will not make
* us miss any.
* This is an ugly workaround for a behaviour that should have
* never existed, but it does and a fix for the clone ioctl
* would touch a lot of places, cause backwards incompatibility
* and would not fix the problem for extents cloned with older
* kernels.
*/
if (ref->key_for_search.type == BTRFS_EXTENT_DATA_KEY &&
ref->key_for_search.offset >= LLONG_MAX)
ref->key_for_search.offset = 0;
} else {
memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
}
ref->inode_list = NULL;
ref->level = level;
@ -632,7 +655,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
struct btrfs_delayed_tree_ref *ref;
ref = btrfs_delayed_node_to_tree_ref(node);
ret = __add_prelim_ref(prefs, ref->root, NULL,
ret = __add_prelim_ref(prefs, 0, NULL,
ref->level + 1, ref->parent,
node->bytenr,
node->ref_mod * sgn, GFP_ATOMIC);
@ -664,11 +687,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
struct btrfs_delayed_data_ref *ref;
ref = btrfs_delayed_node_to_data_ref(node);
key.objectid = ref->objectid;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = ref->offset;
ret = __add_prelim_ref(prefs, ref->root, &key, 0,
ret = __add_prelim_ref(prefs, 0, NULL, 0,
ref->parent, node->bytenr,
node->ref_mod * sgn, GFP_ATOMIC);
break;

View file

@ -1159,8 +1159,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret)
if (ret) {
btrfs_abort_transaction(trans, root, ret);
return ret;
}
}
if (buf == root->node) {

View file

@ -1300,7 +1300,7 @@ struct btrfs_block_group_cache {
/* for raid56, this is a full stripe, without parity */
unsigned long full_stripe_len;
unsigned int ro:1;
unsigned int ro;
unsigned int iref:1;
unsigned int has_caching_ctl:1;
unsigned int removed:1;
@ -1518,12 +1518,6 @@ struct btrfs_fs_info {
*/
struct mutex ordered_operations_mutex;
/*
* Same as ordered_operations_mutex except this is for ordered extents
* and not the operations.
*/
struct mutex ordered_extent_flush_mutex;
struct rw_semaphore commit_root_sem;
struct rw_semaphore cleanup_work_sem;
@ -3437,6 +3431,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start,
struct extent_map *em);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
@ -3495,9 +3491,9 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_set_block_group_ro(struct btrfs_root *root,
int btrfs_inc_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
void btrfs_set_block_group_rw(struct btrfs_root *root,
void btrfs_dec_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
@ -4073,6 +4069,7 @@ __cold
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
const char *btrfs_decode_error(int errno);
__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
@ -4185,8 +4182,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *cow);
void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending,
void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve);
int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);

View file

@ -1730,6 +1730,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
bdi->congested_fn = btrfs_congested_fn;
bdi->congested_data = info;
bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
return 0;
}
@ -2613,7 +2614,6 @@ int open_ctree(struct super_block *sb,
mutex_init(&fs_info->ordered_operations_mutex);
mutex_init(&fs_info->ordered_extent_flush_mutex);
mutex_init(&fs_info->tree_log_mutex);
mutex_init(&fs_info->chunk_mutex);
mutex_init(&fs_info->transaction_kthread_mutex);
@ -2955,8 +2955,9 @@ int open_ctree(struct super_block *sb,
if (fs_info->fs_devices->missing_devices >
fs_info->num_tolerated_disk_barrier_failures &&
!(sb->s_flags & MS_RDONLY)) {
printk(KERN_WARNING "BTRFS: "
"too many missing devices, writeable mount is not allowed\n");
pr_warn("BTRFS: missing devices(%llu) exceeds the limit(%d), writeable mount is not allowed\n",
fs_info->fs_devices->missing_devices,
fs_info->num_tolerated_disk_barrier_failures);
goto fail_sysfs;
}
@ -3763,6 +3764,15 @@ void close_ctree(struct btrfs_root *root)
cancel_work_sync(&fs_info->async_reclaim_work);
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
/*
* If the cleaner thread is stopped and there are
* block groups queued for removal, the deletion will be
* skipped when we quit the cleaner thread.
*/
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_delete_unused_bgs(root->fs_info);
mutex_unlock(&root->fs_info->cleaner_mutex);
ret = btrfs_commit_super(root);
if (ret)
btrfs_err(fs_info, "commit super ret %d", ret);

View file

@ -1316,8 +1316,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
return ret;
}
static noinline u32 extent_data_ref_count(struct btrfs_root *root,
struct btrfs_path *path,
static noinline u32 extent_data_ref_count(struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref)
{
struct btrfs_key key;
@ -1883,10 +1882,77 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
return ret;
}
static int btrfs_issue_discard(struct block_device *bdev,
u64 start, u64 len)
#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
u64 *discarded_bytes)
{
return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
int j, ret = 0;
u64 bytes_left, end;
u64 aligned_start = ALIGN(start, 1 << 9);
if (WARN_ON(start != aligned_start)) {
len -= aligned_start - start;
len = round_down(len, 1 << 9);
start = aligned_start;
}
*discarded_bytes = 0;
if (!len)
return 0;
end = start + len;
bytes_left = len;
/* Skip any superblocks on this device. */
for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
u64 sb_start = btrfs_sb_offset(j);
u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
u64 size = sb_start - start;
if (!in_range(sb_start, start, bytes_left) &&
!in_range(sb_end, start, bytes_left) &&
!in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
continue;
/*
* Superblock spans beginning of range. Adjust start and
* try again.
*/
if (sb_start <= start) {
start += sb_end - start;
if (start > end) {
bytes_left = 0;
break;
}
bytes_left = end - start;
continue;
}
if (size) {
ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
GFP_NOFS, 0);
if (!ret)
*discarded_bytes += size;
else if (ret != -EOPNOTSUPP)
return ret;
}
start = sb_end;
if (start > end) {
bytes_left = 0;
break;
}
bytes_left = end - start;
}
if (bytes_left) {
ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
GFP_NOFS, 0);
if (!ret)
*discarded_bytes += bytes_left;
}
return ret;
}
int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
@ -1907,14 +1973,16 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
for (i = 0; i < bbio->num_stripes; i++, stripe++) {
u64 bytes;
if (!stripe->dev->can_discard)
continue;
ret = btrfs_issue_discard(stripe->dev->bdev,
stripe->physical,
stripe->length);
stripe->length,
&bytes);
if (!ret)
discarded_bytes += stripe->length;
discarded_bytes += bytes;
else if (ret != -EOPNOTSUPP)
break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
@ -6062,20 +6130,19 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_group_cache *block_group, *tmp;
struct list_head *deleted_bgs;
struct extent_io_tree *unpin;
u64 start;
u64 end;
int ret;
if (trans->aborted)
return 0;
if (fs_info->pinned_extents == &fs_info->freed_extents[0])
unpin = &fs_info->freed_extents[1];
else
unpin = &fs_info->freed_extents[0];
while (1) {
while (!trans->aborted) {
mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY, NULL);
@ -6094,6 +6161,34 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
cond_resched();
}
/*
* Transaction is finished. We don't need the lock anymore. We
* do need to clean up the block groups in case of a transaction
* abort.
*/
deleted_bgs = &trans->transaction->deleted_bgs;
list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
u64 trimmed = 0;
ret = -EROFS;
if (!trans->aborted)
ret = btrfs_discard_extent(root,
block_group->key.objectid,
block_group->key.offset,
&trimmed);
list_del_init(&block_group->bg_list);
btrfs_put_block_group_trimming(block_group);
btrfs_put_block_group(block_group);
if (ret) {
const char *errstr = btrfs_decode_error(ret);
btrfs_warn(fs_info,
"Discard failed while removing blockgroup: errno=%d %s\n",
ret, errstr);
}
}
return 0;
}
@ -6349,7 +6444,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
} else {
if (found_extent) {
BUG_ON(is_data && refs_to_drop !=
extent_data_ref_count(root, path, iref));
extent_data_ref_count(path, iref));
if (iref) {
BUG_ON(path->slots[0] != extent_slot);
} else {
@ -7567,9 +7662,6 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
/*
* finds a free extent and does all the dirty work required for allocation
* returns the key for the extent through ins, and a tree buffer for
* the first block of the extent through buf.
*
* returns the tree buffer or an ERR_PTR on error.
*/
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
@ -8723,14 +8815,13 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
return flags;
}
static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
{
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
u64 min_allocable_bytes;
int ret = -ENOSPC;
/*
* We need some metadata space and system metadata space for
* allocating chunks in some corner cases until we force to set
@ -8747,6 +8838,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
spin_lock(&cache->lock);
if (cache->ro) {
cache->ro++;
ret = 0;
goto out;
}
@ -8758,7 +8850,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
min_allocable_bytes <= sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes;
cache->ro = 1;
cache->ro++;
list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
ret = 0;
}
@ -8768,7 +8860,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
return ret;
}
int btrfs_set_block_group_ro(struct btrfs_root *root,
int btrfs_inc_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
@ -8776,8 +8868,6 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
u64 alloc_flags;
int ret;
BUG_ON(cache->ro);
again:
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
@ -8820,7 +8910,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
goto out;
}
ret = set_block_group_ro(cache, 0);
ret = inc_block_group_ro(cache, 0);
if (!ret)
goto out;
alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@ -8828,7 +8918,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
ret = set_block_group_ro(cache, 0);
ret = inc_block_group_ro(cache, 0);
out:
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
alloc_flags = update_block_group_flags(root, cache->flags);
@ -8891,7 +8981,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
return free_bytes;
}
void btrfs_set_block_group_rw(struct btrfs_root *root,
void btrfs_dec_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
struct btrfs_space_info *sinfo = cache->space_info;
@ -8901,11 +8991,13 @@ void btrfs_set_block_group_rw(struct btrfs_root *root,
spin_lock(&sinfo->lock);
spin_lock(&cache->lock);
num_bytes = cache->key.offset - cache->reserved - cache->pinned -
cache->bytes_super - btrfs_block_group_used(&cache->item);
sinfo->bytes_readonly -= num_bytes;
cache->ro = 0;
list_del_init(&cache->ro_list);
if (!--cache->ro) {
num_bytes = cache->key.offset - cache->reserved -
cache->pinned - cache->bytes_super -
btrfs_block_group_used(&cache->item);
sinfo->bytes_readonly -= num_bytes;
list_del_init(&cache->ro_list);
}
spin_unlock(&cache->lock);
spin_unlock(&sinfo->lock);
}
@ -9421,7 +9513,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
set_avail_alloc_bits(root->fs_info, cache->flags);
if (btrfs_chunk_readonly(root, cache->key.objectid)) {
set_block_group_ro(cache, 1);
inc_block_group_ro(cache, 1);
} else if (btrfs_block_group_used(&cache->item) == 0) {
spin_lock(&info->unused_bgs_lock);
/* Should always be true but just in case. */
@ -9449,11 +9541,11 @@ int btrfs_read_block_groups(struct btrfs_root *root)
list_for_each_entry(cache,
&space_info->block_groups[BTRFS_RAID_RAID0],
list)
set_block_group_ro(cache, 1);
inc_block_group_ro(cache, 1);
list_for_each_entry(cache,
&space_info->block_groups[BTRFS_RAID_SINGLE],
list)
set_block_group_ro(cache, 1);
inc_block_group_ro(cache, 1);
}
init_global_block_rsv(info);
@ -9834,6 +9926,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* currently running transaction might finish and a new one start,
* allowing for new block groups to be created that can reuse the same
* physical device locations unless we take this special care.
*
* There may also be an implicit trim operation if the file system
* is mounted with -odiscard. The same protections must remain
* in place until the extents have been discarded completely when
* the transaction commit has completed.
*/
remove_em = (atomic_read(&block_group->trimming) == 0);
/*
@ -9908,6 +10005,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->unused_bgs_lock);
while (!list_empty(&fs_info->unused_bgs)) {
u64 start, end;
int trimming;
block_group = list_first_entry(&fs_info->unused_bgs,
struct btrfs_block_group_cache,
@ -9941,7 +10039,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_unlock(&block_group->lock);
/* We don't want to force the issue, only flip if it's ok. */
ret = set_block_group_ro(block_group, 0);
ret = inc_block_group_ro(block_group, 0);
up_write(&space_info->groups_sem);
if (ret < 0) {
ret = 0;
@ -9955,7 +10053,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
/* 1 for btrfs_orphan_reserve_metadata() */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
btrfs_set_block_group_rw(root, block_group);
btrfs_dec_block_group_ro(root, block_group);
ret = PTR_ERR(trans);
goto next;
}
@ -9982,14 +10080,14 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
EXTENT_DIRTY, GFP_NOFS);
if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
btrfs_set_block_group_rw(root, block_group);
btrfs_dec_block_group_ro(root, block_group);
goto end_trans;
}
ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
EXTENT_DIRTY, GFP_NOFS);
if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
btrfs_set_block_group_rw(root, block_group);
btrfs_dec_block_group_ro(root, block_group);
goto end_trans;
}
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
@ -10007,12 +10105,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
/* DISCARD can flip during remount */
trimming = btrfs_test_opt(root, DISCARD);
/* Implicit trim during transaction commit. */
if (trimming)
btrfs_get_block_group_trimming(block_group);
/*
* Btrfs_remove_chunk will abort the transaction if things go
* horribly wrong.
*/
ret = btrfs_remove_chunk(trans, root,
block_group->key.objectid);
if (ret) {
if (trimming)
btrfs_put_block_group_trimming(block_group);
goto end_trans;
}
/*
* If we're not mounted with -odiscard, we can just forget
* about this block group. Otherwise we'll need to wait
* until transaction commit to do the actual discard.
*/
if (trimming) {
WARN_ON(!list_empty(&block_group->bg_list));
spin_lock(&trans->transaction->deleted_bgs_lock);
list_move(&block_group->bg_list,
&trans->transaction->deleted_bgs);
spin_unlock(&trans->transaction->deleted_bgs_lock);
btrfs_get_block_group(block_group);
}
end_trans:
btrfs_end_transaction(trans, root);
next:
@ -10066,10 +10191,99 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
return unpin_extent_range(root, start, end, false);
}
/*
* It used to be that old block groups would be left around forever.
* Iterating over them would be enough to trim unused space. Since we
* now automatically remove them, we also need to iterate over unallocated
* space.
*
* We don't want a transaction for this since the discard may take a
* substantial amount of time. We don't require that a transaction be
* running, but we do need to take a running transaction into account
* to ensure that we're not discarding chunks that were released in
* the current transaction.
*
* Holding the chunks lock will prevent other threads from allocating
* or releasing chunks, but it won't prevent a running transaction
* from committing and releasing the memory that the pending chunks
* list head uses. For that, we need to take a reference to the
* transaction.
*/
static int btrfs_trim_free_extents(struct btrfs_device *device,
u64 minlen, u64 *trimmed)
{
u64 start = 0, len = 0;
int ret;
*trimmed = 0;
/* Not writeable = nothing to do. */
if (!device->writeable)
return 0;
/* No free space = nothing to do. */
if (device->total_bytes <= device->bytes_used)
return 0;
ret = 0;
while (1) {
struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
struct btrfs_transaction *trans;
u64 bytes;
ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
if (ret)
return ret;
down_read(&fs_info->commit_root_sem);
spin_lock(&fs_info->trans_lock);
trans = fs_info->running_transaction;
if (trans)
atomic_inc(&trans->use_count);
spin_unlock(&fs_info->trans_lock);
ret = find_free_dev_extent_start(trans, device, minlen, start,
&start, &len);
if (trans)
btrfs_put_transaction(trans);
if (ret) {
up_read(&fs_info->commit_root_sem);
mutex_unlock(&fs_info->chunk_mutex);
if (ret == -ENOSPC)
ret = 0;
break;
}
ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
up_read(&fs_info->commit_root_sem);
mutex_unlock(&fs_info->chunk_mutex);
if (ret)
break;
start += len;
*trimmed += bytes;
if (fatal_signal_pending(current)) {
ret = -ERESTARTSYS;
break;
}
cond_resched();
}
return ret;
}
int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_group_cache *cache = NULL;
struct btrfs_device *device;
struct list_head *devices;
u64 group_trimmed;
u64 start;
u64 end;
@ -10124,6 +10338,18 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
cache = next_block_group(fs_info->tree_root, cache);
}
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
devices = &root->fs_info->fs_devices->alloc_list;
list_for_each_entry(device, devices, dev_alloc_list) {
ret = btrfs_trim_free_extents(device, range->minlen,
&group_trimmed);
if (ret)
break;
trimmed += group_trimmed;
}
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
range->len = trimmed;
return ret;
}

View file

@ -2723,6 +2723,12 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
btrfs_bio->csum = NULL;
btrfs_bio->csum_allocated = NULL;
btrfs_bio->end_io = NULL;
#ifdef CONFIG_BLK_CGROUP
/* FIXME, put this into bio_clone_bioset */
if (bio->bi_css)
bio_associate_blkcg(new, bio->bi_css);
#endif
}
return new;
}
@ -2783,6 +2789,7 @@ static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page,
}
static int submit_extent_page(int rw, struct extent_io_tree *tree,
struct writeback_control *wbc,
struct page *page, sector_t sector,
size_t size, unsigned long offset,
struct block_device *bdev,
@ -2817,6 +2824,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
}
bio = NULL;
} else {
if (wbc)
wbc_account_io(wbc, page, page_size);
return 0;
}
}
@ -2829,6 +2838,10 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
if (wbc) {
wbc_init_bio(wbc, bio);
wbc_account_io(wbc, page, page_size);
}
if (bio_ret)
*bio_ret = bio;
@ -3039,7 +3052,7 @@ static int __do_readpage(struct extent_io_tree *tree,
}
pnr -= page->index;
ret = submit_extent_page(rw, tree, page,
ret = submit_extent_page(rw, tree, NULL, page,
sector, disk_io_size, pg_offset,
bdev, bio, pnr,
end_bio_extent_readpage, mirror_num,
@ -3434,7 +3447,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
page->index, cur, end);
}
ret = submit_extent_page(write_flags, tree, page,
ret = submit_extent_page(write_flags, tree, wbc, page,
sector, iosize, pg_offset,
bdev, &epd->bio, max_nr,
end_bio_extent_writepage,
@ -3738,7 +3751,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
clear_page_dirty_for_io(p);
set_page_writeback(p);
ret = submit_extent_page(rw, tree, p, offset >> 9,
ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
-1, end_bio_extent_buffer_writepage,
0, epd->bio_flags, bio_flags);
@ -4603,9 +4616,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
{
struct extent_buffer *eb = NULL;
eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS);
if (eb == NULL)
return NULL;
eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
eb->start = start;
eb->len = len;
eb->fs_info = fs_info;
@ -4863,7 +4874,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
return NULL;
for (i = 0; i < num_pages; i++, index++) {
p = find_or_create_page(mapping, index, GFP_NOFS);
p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
if (!p)
goto free_eb;

View file

@ -3272,35 +3272,23 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
return ret;
}
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen)
void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache)
{
int ret;
atomic_inc(&cache->trimming);
}
*trimmed = 0;
void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
{
struct extent_map_tree *em_tree;
struct extent_map *em;
bool cleanup;
spin_lock(&block_group->lock);
if (block_group->removed) {
spin_unlock(&block_group->lock);
return 0;
}
atomic_inc(&block_group->trimming);
cleanup = (atomic_dec_and_test(&block_group->trimming) &&
block_group->removed);
spin_unlock(&block_group->lock);
ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
if (ret)
goto out;
ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
out:
spin_lock(&block_group->lock);
if (atomic_dec_and_test(&block_group->trimming) &&
block_group->removed) {
struct extent_map_tree *em_tree;
struct extent_map *em;
spin_unlock(&block_group->lock);
if (cleanup) {
lock_chunks(block_group->fs_info->chunk_root);
em_tree = &block_group->fs_info->mapping_tree.map_tree;
write_lock(&em_tree->lock);
@ -3324,10 +3312,31 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
* this block group have left 1 entry each one. Free them.
*/
__btrfs_remove_free_space_cache(block_group->free_space_ctl);
} else {
spin_unlock(&block_group->lock);
}
}
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen)
{
int ret;
*trimmed = 0;
spin_lock(&block_group->lock);
if (block_group->removed) {
spin_unlock(&block_group->lock);
return 0;
}
btrfs_get_block_group_trimming(block_group);
spin_unlock(&block_group->lock);
ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
if (ret)
goto out;
ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
out:
btrfs_put_block_group_trimming(block_group);
return ret;
}

View file

@ -3658,6 +3658,35 @@ static void btrfs_read_locked_inode(struct inode *inode)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
/*
* We don't persist the id of the transaction where an unlink operation
* against the inode was last made. So here we assume the inode might
* have been evicted, and therefore the exact value of last_unlink_trans
* lost, and set it to last_trans to avoid metadata inconsistencies
* between the inode and its parent if the inode is fsync'ed and the log
* replayed. For example, in the scenario:
*
* touch mydir/foo
* ln mydir/foo mydir/bar
* sync
* unlink mydir/bar
* echo 2 > /proc/sys/vm/drop_caches # evicts inode
* xfs_io -c fsync mydir/foo
* <power failure>
* mount fs, triggers fsync log replay
*
* We must make sure that when we fsync our inode foo we also log its
* parent inode, otherwise after log replay the parent still has the
* dentry with the "bar" name but our inode foo has a link count of 1
* and doesn't have an inode ref with the name "bar" anymore.
*
* Setting last_unlink_trans to last_trans is a pessimistic approach,
* but it guarantees correctness at the expense of ocassional full
* transaction commits on fsync if our inode is a directory, or if our
* inode is not a directory, logging its parent unnecessarily.
*/
BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
path->slots[0]++;
if (inode->i_nlink != 1 ||
path->slots[0] >= btrfs_header_nritems(leaf))
@ -7958,7 +7987,11 @@ static void btrfs_end_dio_bio(struct bio *bio)
static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
u64 first_sector, gfp_t gfp_flags)
{
return btrfs_bio_alloc(bdev, first_sector, BIO_MAX_PAGES, gfp_flags);
struct bio *bio;
bio = btrfs_bio_alloc(bdev, first_sector, BIO_MAX_PAGES, gfp_flags);
if (bio)
bio_associate_current(bio);
return bio;
}
static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root,

View file

@ -1030,6 +1030,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u32 thresh,
struct extent_map *em;
int ret = 1;
bool next_mergeable = true;
bool prev_mergeable = true;
/*
* make sure that once we start defragging an extent, we keep on
@ -1050,13 +1051,16 @@ static int should_defrag_range(struct inode *inode, u64 start, u32 thresh,
goto out;
}
if (!*defrag_end)
prev_mergeable = false;
next_mergeable = defrag_check_next_extent(inode, em);
/*
* we hit a real extent, if it is big or the next extent is not a
* real extent, don't bother defragging it
*/
if (!compress && (*last_len == 0 || *last_len >= thresh) &&
(em->len >= thresh || !next_mergeable))
(em->len >= thresh || (!next_mergeable && !prev_mergeable)))
ret = 0;
out:
/*
@ -1933,6 +1937,7 @@ static noinline int copy_to_sk(struct btrfs_root *root,
u64 found_transid;
struct extent_buffer *leaf;
struct btrfs_ioctl_search_header sh;
struct btrfs_key test;
unsigned long item_off;
unsigned long item_len;
int nritems;
@ -2016,12 +2021,17 @@ static noinline int copy_to_sk(struct btrfs_root *root,
}
advance_key:
ret = 0;
if (key->offset < (u64)-1 && key->offset < sk->max_offset)
test.objectid = sk->max_objectid;
test.type = sk->max_type;
test.offset = sk->max_offset;
if (btrfs_comp_cpu_keys(key, &test) >= 0)
ret = 1;
else if (key->offset < (u64)-1)
key->offset++;
else if (key->type < (u8)-1 && key->type < sk->max_type) {
else if (key->type < (u8)-1) {
key->offset = 0;
key->type++;
} else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) {
} else if (key->objectid < (u64)-1) {
key->offset = 0;
key->type = 0;
key->objectid++;
@ -2842,8 +2852,7 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
swap(inode1, inode2);
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
if (inode1 != inode2)
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
}
static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
@ -2861,8 +2870,7 @@ static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
swap(loff1, loff2);
}
lock_extent_range(inode1, loff1, len);
if (inode1 != inode2)
lock_extent_range(inode2, loff2, len);
lock_extent_range(inode2, loff2, len);
}
struct cmp_pages {
@ -3787,13 +3795,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
goto out_fput;
if (!same_inode) {
if (inode < src) {
mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
} else {
mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
}
btrfs_double_inode_lock(src, inode);
} else {
mutex_lock(&src->i_mutex);
}
@ -3843,8 +3845,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
lock_extent_range(src, lock_start, lock_len);
} else {
lock_extent_range(src, off, len);
lock_extent_range(inode, destoff, len);
btrfs_double_extent_lock(src, off, inode, destoff, len);
}
ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
@ -3855,9 +3856,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
unlock_extent(&BTRFS_I(src)->io_tree, lock_start, lock_end);
} else {
unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
unlock_extent(&BTRFS_I(inode)->io_tree, destoff,
destoff + len - 1);
btrfs_double_extent_unlock(src, off, inode, destoff, len);
}
/*
* Truncate page cache pages so that future reads will see the cloned
@ -3866,17 +3865,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
truncate_inode_pages_range(&inode->i_data, destoff,
PAGE_CACHE_ALIGN(destoff + len) - 1);
out_unlock:
if (!same_inode) {
if (inode < src) {
mutex_unlock(&src->i_mutex);
mutex_unlock(&inode->i_mutex);
} else {
mutex_unlock(&inode->i_mutex);
mutex_unlock(&src->i_mutex);
}
} else {
if (!same_inode)
btrfs_double_inode_unlock(src, inode);
else
mutex_unlock(&src->i_mutex);
}
out_fput:
fdput(src_file);
out_drop_write:

View file

@ -241,6 +241,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
*/
void btrfs_tree_lock(struct extent_buffer *eb)
{
WARN_ON(eb->lock_owner == current->pid);
again:
wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);

View file

@ -61,9 +61,10 @@
#define RBIO_CACHE_SIZE 1024
enum btrfs_rbio_ops {
BTRFS_RBIO_WRITE = 0,
BTRFS_RBIO_READ_REBUILD = 1,
BTRFS_RBIO_PARITY_SCRUB = 2,
BTRFS_RBIO_WRITE,
BTRFS_RBIO_READ_REBUILD,
BTRFS_RBIO_PARITY_SCRUB,
BTRFS_RBIO_REBUILD_MISSING,
};
struct btrfs_raid_bio {
@ -602,6 +603,10 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
cur->operation == BTRFS_RBIO_PARITY_SCRUB)
return 0;
if (last->operation == BTRFS_RBIO_REBUILD_MISSING ||
cur->operation == BTRFS_RBIO_REBUILD_MISSING)
return 0;
return 1;
}
@ -793,7 +798,10 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
if (next->operation == BTRFS_RBIO_READ_REBUILD)
async_read_rebuild(next);
else if (next->operation == BTRFS_RBIO_WRITE) {
else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
steal_rbio(rbio, next);
async_read_rebuild(next);
} else if (next->operation == BTRFS_RBIO_WRITE) {
steal_rbio(rbio, next);
async_rmw_stripe(next);
} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
@ -1805,7 +1813,8 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
faila = rbio->faila;
failb = rbio->failb;
if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
spin_lock_irq(&rbio->bio_list_lock);
set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
spin_unlock_irq(&rbio->bio_list_lock);
@ -1830,7 +1839,8 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
* if we're rebuilding a read, we have to use
* pages from the bio list
*/
if (rbio->operation == BTRFS_RBIO_READ_REBUILD &&
if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
(stripe == faila || stripe == failb)) {
page = page_in_rbio(rbio, stripe, pagenr, 0);
} else {
@ -1939,7 +1949,8 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
* if we're rebuilding a read, we have to use
* pages from the bio list
*/
if (rbio->operation == BTRFS_RBIO_READ_REBUILD &&
if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
(stripe == faila || stripe == failb)) {
page = page_in_rbio(rbio, stripe, pagenr, 0);
} else {
@ -1960,6 +1971,8 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
rbio_orig_end_io(rbio, err);
} else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
rbio_orig_end_io(rbio, err);
} else if (err == 0) {
rbio->faila = -1;
@ -2096,7 +2109,8 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
return 0;
cleanup:
if (rbio->operation == BTRFS_RBIO_READ_REBUILD)
if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
rbio_orig_end_io(rbio, -EIO);
return -EIO;
}
@ -2227,8 +2241,9 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
return rbio;
}
void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
struct page *page, u64 logical)
/* Used for both parity scrub and missing. */
void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
u64 logical)
{
int stripe_offset;
int index;
@ -2662,3 +2677,55 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
if (!lock_stripe_add(rbio))
async_scrub_parity(rbio);
}
/* The following code is used for dev replace of a missing RAID 5/6 device. */
struct btrfs_raid_bio *
raid56_alloc_missing_rbio(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 length)
{
struct btrfs_raid_bio *rbio;
rbio = alloc_rbio(root, bbio, length);
if (IS_ERR(rbio))
return NULL;
rbio->operation = BTRFS_RBIO_REBUILD_MISSING;
bio_list_add(&rbio->bio_list, bio);
/*
* This is a special bio which is used to hold the completion handler
* and make the scrub rbio is similar to the other types
*/
ASSERT(!bio->bi_iter.bi_size);
rbio->faila = find_logical_bio_stripe(rbio, bio);
if (rbio->faila == -1) {
BUG();
kfree(rbio);
return NULL;
}
return rbio;
}
static void missing_raid56_work(struct btrfs_work *work)
{
struct btrfs_raid_bio *rbio;
rbio = container_of(work, struct btrfs_raid_bio, work);
__raid56_parity_recover(rbio);
}
static void async_missing_raid56(struct btrfs_raid_bio *rbio)
{
btrfs_init_work(&rbio->work, btrfs_rmw_helper,
missing_raid56_work, NULL, NULL);
btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
}
void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
{
if (!lock_stripe_add(rbio))
async_missing_raid56(rbio);
}

View file

@ -48,15 +48,21 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 stripe_len);
void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
u64 logical);
struct btrfs_raid_bio *
raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 stripe_len,
struct btrfs_device *scrub_dev,
unsigned long *dbitmap, int stripe_nsectors);
void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
struct page *page, u64 logical);
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
struct btrfs_raid_bio *
raid56_alloc_missing_rbio(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 length);
void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
#endif

View file

@ -328,6 +328,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
struct btrfs_device *prev_dev;
u32 blocksize;
u64 length;
int real_stripes;
int nzones = 0;
int i;
unsigned long index = logical >> PAGE_CACHE_SHIFT;
@ -369,7 +370,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
goto error;
}
for (nzones = 0; nzones < bbio->num_stripes; ++nzones) {
real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
for (nzones = 0; nzones < real_stripes; ++nzones) {
struct reada_zone *zone;
dev = bbio->stripes[nzones].dev;

View file

@ -2523,8 +2523,7 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
* counted. return -ENOENT if the block is root of reloc tree.
*/
static noinline_for_stack
struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
struct backref_node *node)
struct btrfs_root *select_one_root(struct backref_node *node)
{
struct backref_node *next;
struct btrfs_root *root;
@ -2912,7 +2911,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
return 0;
BUG_ON(node->processed);
root = select_one_root(trans, node);
root = select_one_root(node);
if (root == ERR_PTR(-ENOENT)) {
update_processed_blocks(rc, node);
goto out;
@ -3755,8 +3754,7 @@ int add_data_references(struct reloc_control *rc,
* helper to find next unprocessed extent
*/
static noinline_for_stack
int find_next_extent(struct btrfs_trans_handle *trans,
struct reloc_control *rc, struct btrfs_path *path,
int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
struct btrfs_key *extent_key)
{
struct btrfs_key key;
@ -3951,7 +3949,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
continue;
}
ret = find_next_extent(trans, rc, path, &key);
ret = find_next_extent(rc, path, &key);
if (ret < 0)
err = ret;
if (ret != 0)
@ -3976,6 +3974,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
sizeof(struct btrfs_extent_item_v0));
ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
&path_change);
if (ret < 0) {
err = ret;
break;
}
if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
else
@ -4140,7 +4142,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans;
struct btrfs_root *root;
struct btrfs_key key;
u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
u64 objectid;
int err = 0;
root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
@ -4215,14 +4217,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
BUG_ON(!rc->block_group);
if (!rc->block_group->ro) {
ret = btrfs_set_block_group_ro(extent_root, rc->block_group);
if (ret) {
err = ret;
goto out;
}
rw = 1;
ret = btrfs_inc_block_group_ro(extent_root, rc->block_group);
if (ret) {
err = ret;
goto out;
}
rw = 1;
path = btrfs_alloc_path();
if (!path) {
@ -4294,7 +4294,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
out:
if (err && rw)
btrfs_set_block_group_rw(extent_root, rc->block_group);
btrfs_dec_block_group_ro(extent_root, rc->block_group);
iput(rc->data_inode);
btrfs_put_block_group(rc->block_group);
kfree(rc);
@ -4594,8 +4594,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
* called before creating snapshot. it calculates metadata reservation
* requried for relocating tree blocks in the snapshot
*/
void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending,
void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve)
{
struct btrfs_root *root;

View file

@ -125,6 +125,7 @@ struct scrub_block {
/* It is for the data with checksum */
unsigned int data_corrected:1;
};
struct btrfs_work work;
};
/* Used for the chunks with parity stripe such RAID5/6 */
@ -332,11 +333,14 @@ static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
}
}
static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
static void scrub_pause_on(struct btrfs_fs_info *fs_info)
{
atomic_inc(&fs_info->scrubs_paused);
wake_up(&fs_info->scrub_pause_wait);
}
static void scrub_pause_off(struct btrfs_fs_info *fs_info)
{
mutex_lock(&fs_info->scrub_lock);
__scrub_blocked_if_needed(fs_info);
atomic_dec(&fs_info->scrubs_paused);
@ -345,6 +349,12 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
wake_up(&fs_info->scrub_pause_wait);
}
static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
{
scrub_pause_on(fs_info);
scrub_pause_off(fs_info);
}
/*
* used for workers that require transaction commits (i.e., for the
* NOCOW case)
@ -2074,21 +2084,7 @@ static void scrub_submit(struct scrub_ctx *sctx)
sbio = sctx->bios[sctx->curr];
sctx->curr = -1;
scrub_pending_bio_inc(sctx);
if (!sbio->bio->bi_bdev) {
/*
* this case should not happen. If btrfs_map_block() is
* wrong, it could happen for dev-replace operations on
* missing devices when no mirrors are available, but in
* this case it should already fail the mount.
* This case is handled correctly (but _very_ slowly).
*/
printk_ratelimited(KERN_WARNING
"BTRFS: scrub_submit(bio bdev == NULL) is unexpected!\n");
bio_io_error(sbio->bio);
} else {
btrfsic_submit_bio(READ, sbio->bio);
}
btrfsic_submit_bio(READ, sbio->bio);
}
static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
@ -2165,6 +2161,134 @@ static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
return 0;
}
static void scrub_missing_raid56_end_io(struct bio *bio)
{
struct scrub_block *sblock = bio->bi_private;
struct btrfs_fs_info *fs_info = sblock->sctx->dev_root->fs_info;
if (bio->bi_error)
sblock->no_io_error_seen = 0;
btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
}
static void scrub_missing_raid56_worker(struct btrfs_work *work)
{
struct scrub_block *sblock = container_of(work, struct scrub_block, work);
struct scrub_ctx *sctx = sblock->sctx;
struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
unsigned int is_metadata;
unsigned int have_csum;
u8 *csum;
u64 generation;
u64 logical;
struct btrfs_device *dev;
is_metadata = !(sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA);
have_csum = sblock->pagev[0]->have_csum;
csum = sblock->pagev[0]->csum;
generation = sblock->pagev[0]->generation;
logical = sblock->pagev[0]->logical;
dev = sblock->pagev[0]->dev;
if (sblock->no_io_error_seen) {
scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
have_csum, csum, generation,
sctx->csum_size);
}
if (!sblock->no_io_error_seen) {
spin_lock(&sctx->stat_lock);
sctx->stat.read_errors++;
spin_unlock(&sctx->stat_lock);
printk_ratelimited_in_rcu(KERN_ERR
"BTRFS: I/O error rebulding logical %llu for dev %s\n",
logical, rcu_str_deref(dev->name));
} else if (sblock->header_error || sblock->checksum_error) {
spin_lock(&sctx->stat_lock);
sctx->stat.uncorrectable_errors++;
spin_unlock(&sctx->stat_lock);
printk_ratelimited_in_rcu(KERN_ERR
"BTRFS: failed to rebuild valid logical %llu for dev %s\n",
logical, rcu_str_deref(dev->name));
} else {
scrub_write_block_to_dev_replace(sblock);
}
scrub_block_put(sblock);
if (sctx->is_dev_replace &&
atomic_read(&sctx->wr_ctx.flush_all_writes)) {
mutex_lock(&sctx->wr_ctx.wr_lock);
scrub_wr_submit(sctx);
mutex_unlock(&sctx->wr_ctx.wr_lock);
}
scrub_pending_bio_dec(sctx);
}
static void scrub_missing_raid56_pages(struct scrub_block *sblock)
{
struct scrub_ctx *sctx = sblock->sctx;
struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
u64 length = sblock->page_count * PAGE_SIZE;
u64 logical = sblock->pagev[0]->logical;
struct btrfs_bio *bbio;
struct bio *bio;
struct btrfs_raid_bio *rbio;
int ret;
int i;
ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, &length,
&bbio, 0, 1);
if (ret || !bbio || !bbio->raid_map)
goto bbio_out;
if (WARN_ON(!sctx->is_dev_replace ||
!(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
/*
* We shouldn't be scrubbing a missing device. Even for dev
* replace, we should only get here for RAID 5/6. We either
* managed to mount something with no mirrors remaining or
* there's a bug in scrub_remap_extent()/btrfs_map_block().
*/
goto bbio_out;
}
bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
if (!bio)
goto bbio_out;
bio->bi_iter.bi_sector = logical >> 9;
bio->bi_private = sblock;
bio->bi_end_io = scrub_missing_raid56_end_io;
rbio = raid56_alloc_missing_rbio(sctx->dev_root, bio, bbio, length);
if (!rbio)
goto rbio_out;
for (i = 0; i < sblock->page_count; i++) {
struct scrub_page *spage = sblock->pagev[i];
raid56_add_scrub_pages(rbio, spage->page, spage->logical);
}
btrfs_init_work(&sblock->work, btrfs_scrub_helper,
scrub_missing_raid56_worker, NULL, NULL);
scrub_block_get(sblock);
scrub_pending_bio_inc(sctx);
raid56_submit_missing_rbio(rbio);
return;
rbio_out:
bio_put(bio);
bbio_out:
btrfs_put_bbio(bbio);
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
spin_unlock(&sctx->stat_lock);
}
static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
u64 physical, struct btrfs_device *dev, u64 flags,
u64 gen, int mirror_num, u8 *csum, int force,
@ -2228,19 +2352,27 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
}
WARN_ON(sblock->page_count == 0);
for (index = 0; index < sblock->page_count; index++) {
struct scrub_page *spage = sblock->pagev[index];
int ret;
if (dev->missing) {
/*
* This case should only be hit for RAID 5/6 device replace. See
* the comment in scrub_missing_raid56_pages() for details.
*/
scrub_missing_raid56_pages(sblock);
} else {
for (index = 0; index < sblock->page_count; index++) {
struct scrub_page *spage = sblock->pagev[index];
int ret;
ret = scrub_add_page_to_rd_bio(sctx, spage);
if (ret) {
scrub_block_put(sblock);
return ret;
ret = scrub_add_page_to_rd_bio(sctx, spage);
if (ret) {
scrub_block_put(sblock);
return ret;
}
}
}
if (force)
scrub_submit(sctx);
if (force)
scrub_submit(sctx);
}
/* last one frees, either here or in bio completion for last page */
scrub_block_put(sblock);
@ -2551,6 +2683,11 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
u8 csum[BTRFS_CSUM_SIZE];
u32 blocksize;
if (dev->missing) {
scrub_parity_mark_sectors_error(sparity, logical, len);
return 0;
}
if (flags & BTRFS_EXTENT_FLAG_DATA) {
blocksize = sctx->sectorsize;
} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
@ -2689,7 +2826,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
sparity->nsectors))
goto out;
length = sparity->logic_end - sparity->logic_start + 1;
length = sparity->logic_end - sparity->logic_start;
ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
sparity->logic_start,
&length, &bbio, 0, 1);
@ -2712,8 +2849,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
goto rbio_out;
list_for_each_entry(spage, &sparity->spages, list)
raid56_parity_add_scrub_pages(rbio, spage->page,
spage->logical);
raid56_add_scrub_pages(rbio, spage->page, spage->logical);
scrub_pending_bio_inc(sctx);
raid56_parity_submit_scrub_rbio(rbio);
@ -2761,6 +2897,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
struct btrfs_root *root = fs_info->extent_root;
struct btrfs_root *csum_root = fs_info->csum_root;
struct btrfs_extent_item *extent;
struct btrfs_bio *bbio = NULL;
u64 flags;
int ret;
int slot;
@ -2770,6 +2907,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
u64 extent_logical;
u64 extent_physical;
u64 extent_len;
u64 mapped_length;
struct btrfs_device *extent_dev;
struct scrub_parity *sparity;
int nsectors;
@ -2843,6 +2981,10 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
}
btrfs_item_key_to_cpu(l, &key, slot);
if (key.type != BTRFS_EXTENT_ITEM_KEY &&
key.type != BTRFS_METADATA_ITEM_KEY)
goto next;
if (key.type == BTRFS_METADATA_ITEM_KEY)
bytes = root->nodesize;
else
@ -2851,11 +2993,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
if (key.objectid + bytes <= logic_start)
goto next;
if (key.type != BTRFS_EXTENT_ITEM_KEY &&
key.type != BTRFS_METADATA_ITEM_KEY)
goto next;
if (key.objectid > logic_end) {
if (key.objectid >= logic_end) {
stop_loop = 1;
break;
}
@ -2868,11 +3006,12 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
flags = btrfs_extent_flags(l, extent);
generation = btrfs_extent_generation(l, extent);
if (key.objectid < logic_start &&
(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
btrfs_err(fs_info,
"scrub: tree block %llu spanning stripes, ignored. logical=%llu",
key.objectid, logic_start);
if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
(key.objectid < logic_start ||
key.objectid + bytes >
logic_start + map->stripe_len)) {
btrfs_err(fs_info, "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
key.objectid, logic_start);
goto next;
}
again:
@ -2892,10 +3031,21 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
scrub_parity_mark_sectors_data(sparity, extent_logical,
extent_len);
scrub_remap_extent(fs_info, extent_logical,
extent_len, &extent_physical,
&extent_dev,
&extent_mirror_num);
mapped_length = extent_len;
ret = btrfs_map_block(fs_info, READ, extent_logical,
&mapped_length, &bbio, 0);
if (!ret) {
if (!bbio || mapped_length < extent_len)
ret = -EIO;
}
if (ret) {
btrfs_put_bbio(bbio);
goto out;
}
extent_physical = bbio->stripes[0].physical;
extent_mirror_num = bbio->mirror_num;
extent_dev = bbio->stripes[0].dev;
btrfs_put_bbio(bbio);
ret = btrfs_lookup_csums_range(csum_root,
extent_logical,
@ -2910,10 +3060,12 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
extent_dev, flags,
generation,
extent_mirror_num);
scrub_free_csums(sctx);
if (ret)
goto out;
scrub_free_csums(sctx);
if (extent_logical + extent_len <
key.objectid + bytes) {
logic_start += map->stripe_len;
@ -2942,7 +3094,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
out:
if (ret < 0)
scrub_parity_mark_sectors_error(sparity, logic_start,
logic_end - logic_start + 1);
logic_end - logic_start);
scrub_parity_put(sparity);
scrub_submit(sctx);
mutex_lock(&sctx->wr_ctx.wr_lock);
@ -3091,22 +3243,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
*/
ret = 0;
while (physical < physical_end) {
/* for raid56, we skip parity stripe */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
ret = get_raid56_logic_offset(physical, num,
map, &logical, &stripe_logical);
logical += base;
if (ret) {
stripe_logical += base;
stripe_end = stripe_logical + increment - 1;
ret = scrub_raid56_parity(sctx, map, scrub_dev,
ppath, stripe_logical,
stripe_end);
if (ret)
goto out;
goto skip;
}
}
/*
* canceled?
*/
@ -3131,6 +3267,24 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
scrub_blocked_if_needed(fs_info);
}
/* for raid56, we skip parity stripe */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
ret = get_raid56_logic_offset(physical, num, map,
&logical,
&stripe_logical);
logical += base;
if (ret) {
stripe_logical += base;
stripe_end = stripe_logical + increment;
ret = scrub_raid56_parity(sctx, map, scrub_dev,
ppath, stripe_logical,
stripe_end);
if (ret)
goto out;
goto skip;
}
}
if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
key.type = BTRFS_METADATA_ITEM_KEY;
else
@ -3175,6 +3329,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
}
btrfs_item_key_to_cpu(l, &key, slot);
if (key.type != BTRFS_EXTENT_ITEM_KEY &&
key.type != BTRFS_METADATA_ITEM_KEY)
goto next;
if (key.type == BTRFS_METADATA_ITEM_KEY)
bytes = root->nodesize;
else
@ -3183,10 +3341,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
if (key.objectid + bytes <= logical)
goto next;
if (key.type != BTRFS_EXTENT_ITEM_KEY &&
key.type != BTRFS_METADATA_ITEM_KEY)
goto next;
if (key.objectid >= logical + map->stripe_len) {
/* out of this device extent */
if (key.objectid >= logic_end)
@ -3199,8 +3353,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
flags = btrfs_extent_flags(l, extent);
generation = btrfs_extent_generation(l, extent);
if (key.objectid < logical &&
(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
(key.objectid < logical ||
key.objectid + bytes >
logical + map->stripe_len)) {
btrfs_err(fs_info,
"scrub: tree block %llu spanning "
"stripes, ignored. logical=%llu",
@ -3234,9 +3390,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
&extent_dev,
&extent_mirror_num);
ret = btrfs_lookup_csums_range(csum_root, logical,
logical + map->stripe_len - 1,
&sctx->csum_list, 1);
ret = btrfs_lookup_csums_range(csum_root,
extent_logical,
extent_logical +
extent_len - 1,
&sctx->csum_list, 1);
if (ret)
goto out;
@ -3244,10 +3402,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
extent_physical, extent_dev, flags,
generation, extent_mirror_num,
extent_logical - logical + physical);
scrub_free_csums(sctx);
if (ret)
goto out;
scrub_free_csums(sctx);
if (extent_logical + extent_len <
key.objectid + bytes) {
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
@ -3265,7 +3425,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
if (ret && physical < physical_end) {
stripe_logical += base;
stripe_end = stripe_logical +
increment - 1;
increment;
ret = scrub_raid56_parity(sctx,
map, scrub_dev, ppath,
stripe_logical,
@ -3374,7 +3534,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
u64 chunk_tree;
u64 chunk_objectid;
u64 chunk_offset;
int ret;
int ret = 0;
int slot;
struct extent_buffer *l;
struct btrfs_key key;
@ -3402,8 +3562,14 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (path->slots[0] >=
btrfs_header_nritems(path->nodes[0])) {
ret = btrfs_next_leaf(root, path);
if (ret)
if (ret < 0)
break;
if (ret > 0) {
ret = 0;
break;
}
} else {
ret = 0;
}
}
@ -3445,6 +3611,22 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (!cache)
goto skip;
/*
* we need call btrfs_inc_block_group_ro() with scrubs_paused,
* to avoid deadlock caused by:
* btrfs_inc_block_group_ro()
* -> btrfs_wait_for_commit()
* -> btrfs_commit_transaction()
* -> btrfs_scrub_pause()
*/
scrub_pause_on(fs_info);
ret = btrfs_inc_block_group_ro(root, cache);
scrub_pause_off(fs_info);
if (ret) {
btrfs_put_block_group(cache);
break;
}
dev_replace->cursor_right = found_key.offset + length;
dev_replace->cursor_left = found_key.offset;
dev_replace->item_needs_writeback = 1;
@ -3470,8 +3652,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
wait_event(sctx->list_wait,
atomic_read(&sctx->bios_in_flight) == 0);
atomic_inc(&fs_info->scrubs_paused);
wake_up(&fs_info->scrub_pause_wait);
scrub_pause_on(fs_info);
/*
* must be called before we decrease @scrub_paused.
@ -3482,11 +3664,9 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
atomic_read(&sctx->workers_pending) == 0);
atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
mutex_lock(&fs_info->scrub_lock);
__scrub_blocked_if_needed(fs_info);
atomic_dec(&fs_info->scrubs_paused);
mutex_unlock(&fs_info->scrub_lock);
wake_up(&fs_info->scrub_pause_wait);
scrub_pause_off(fs_info);
btrfs_dec_block_group_ro(root, cache);
btrfs_put_block_group(cache);
if (ret)
@ -3510,11 +3690,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
btrfs_free_path(path);
/*
* ret can still be 1 from search_slot or next_leaf,
* that's not an error
*/
return ret < 0 ? ret : 0;
return ret;
}
static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,

View file

@ -69,7 +69,7 @@ static struct file_system_type btrfs_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
static const char *btrfs_decode_error(int errno)
const char *btrfs_decode_error(int errno)
{
char *errstr = "unknown";
@ -1033,6 +1033,7 @@ static int btrfs_fill_super(struct super_block *sb,
sb->s_flags |= MS_POSIXACL;
#endif
sb->s_flags |= MS_I_VERSION;
sb->s_iflags |= SB_I_CGROUPWB;
err = open_ctree(sb, fs_devices, (char *)data);
if (err) {
printk(KERN_ERR "BTRFS: open_ctree failed\n");
@ -1650,6 +1651,17 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags |= MS_RDONLY;
/*
* Setting MS_RDONLY will put the cleaner thread to
* sleep at the next loop if it's already active.
* If it's already asleep, we'll leave unused block
* groups on disk until we're mounted read-write again
* unless we clean them up here.
*/
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_delete_unused_bgs(fs_info);
mutex_unlock(&root->fs_info->cleaner_mutex);
btrfs_dev_replace_suspend_for_unmount(fs_info);
btrfs_scrub_cancel(fs_info);
btrfs_pause_balance(fs_info);

View file

@ -258,6 +258,8 @@ static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
mutex_init(&cur_trans->cache_write_mutex);
cur_trans->num_dirty_bgs = 0;
spin_lock_init(&cur_trans->dirty_bgs_lock);
INIT_LIST_HEAD(&cur_trans->deleted_bgs);
spin_lock_init(&cur_trans->deleted_bgs_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages,
fs_info->btree_inode->i_mapping);
@ -1301,7 +1303,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
*/
btrfs_set_skip_qgroup(trans, objectid);
btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
btrfs_reloc_pre_snapshot(pending, &to_reserve);
if (to_reserve > 0) {
pending->error = btrfs_block_rsv_add(root,
@ -1893,8 +1895,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
spin_unlock(&root->fs_info->trans_lock);
wait_for_commit(root, prev_trans);
ret = prev_trans->aborted;
btrfs_put_transaction(prev_trans);
if (ret)
goto cleanup_transaction;
} else {
spin_unlock(&root->fs_info->trans_lock);
}

View file

@ -74,6 +74,8 @@ struct btrfs_transaction {
*/
struct mutex cache_write_mutex;
spinlock_t dirty_bgs_lock;
struct list_head deleted_bgs;
spinlock_t deleted_bgs_lock;
struct btrfs_delayed_ref_root delayed_refs;
int aborted;
int dirty_bg_run;

View file

@ -140,55 +140,46 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_log_ctx *ctx)
{
int index;
int ret;
int ret = 0;
mutex_lock(&root->log_mutex);
if (root->log_root) {
if (btrfs_need_log_full_commit(root->fs_info, trans)) {
ret = -EAGAIN;
goto out;
}
if (!root->log_start_pid) {
root->log_start_pid = current->pid;
clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
root->log_start_pid = current->pid;
} else if (root->log_start_pid != current->pid) {
set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
}
} else {
mutex_lock(&root->fs_info->tree_log_mutex);
if (!root->fs_info->log_root_tree)
ret = btrfs_init_log_root_tree(trans, root->fs_info);
mutex_unlock(&root->fs_info->tree_log_mutex);
if (ret)
goto out;
atomic_inc(&root->log_batch);
atomic_inc(&root->log_writers);
if (ctx) {
index = root->log_transid % 2;
list_add_tail(&ctx->list, &root->log_ctxs[index]);
ctx->log_transid = root->log_transid;
}
mutex_unlock(&root->log_mutex);
return 0;
}
ret = 0;
mutex_lock(&root->fs_info->tree_log_mutex);
if (!root->fs_info->log_root_tree)
ret = btrfs_init_log_root_tree(trans, root->fs_info);
mutex_unlock(&root->fs_info->tree_log_mutex);
if (ret)
goto out;
if (!root->log_root) {
ret = btrfs_add_log_tree(trans, root);
if (ret)
goto out;
clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
root->log_start_pid = current->pid;
}
clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
root->log_start_pid = current->pid;
atomic_inc(&root->log_batch);
atomic_inc(&root->log_writers);
if (ctx) {
index = root->log_transid % 2;
int index = root->log_transid % 2;
list_add_tail(&ctx->list, &root->log_ctxs[index]);
ctx->log_transid = root->log_transid;
}
out:
mutex_unlock(&root->log_mutex);
return ret;
@ -731,11 +722,65 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
&ordered_sums, 0);
if (ret)
goto out;
/*
* Now delete all existing cums in the csum root that
* cover our range. We do this because we can have an
* extent that is completely referenced by one file
* extent item and partially referenced by another
* file extent item (like after using the clone or
* extent_same ioctls). In this case if we end up doing
* the replay of the one that partially references the
* extent first, and we do not do the csum deletion
* below, we can get 2 csum items in the csum tree that
* overlap each other. For example, imagine our log has
* the two following file extent items:
*
* key (257 EXTENT_DATA 409600)
* extent data disk byte 12845056 nr 102400
* extent data offset 20480 nr 20480 ram 102400
*
* key (257 EXTENT_DATA 819200)
* extent data disk byte 12845056 nr 102400
* extent data offset 0 nr 102400 ram 102400
*
* Where the second one fully references the 100K extent
* that starts at disk byte 12845056, and the log tree
* has a single csum item that covers the entire range
* of the extent:
*
* key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
*
* After the first file extent item is replayed, the
* csum tree gets the following csum item:
*
* key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
*
* Which covers the 20K sub-range starting at offset 20K
* of our extent. Now when we replay the second file
* extent item, if we do not delete existing csum items
* that cover any of its blocks, we end up getting two
* csum items in our csum tree that overlap each other:
*
* key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
* key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
*
* Which is a problem, because after this anyone trying
* to lookup up for the checksum of any block of our
* extent starting at an offset of 40K or higher, will
* end up looking at the second csum item only, which
* does not contain the checksum for any block starting
* at offset 40K or higher of our extent.
*/
while (!list_empty(&ordered_sums)) {
struct btrfs_ordered_sum *sums;
sums = list_entry(ordered_sums.next,
struct btrfs_ordered_sum,
list);
if (!ret)
ret = btrfs_del_csums(trans,
root->fs_info->csum_root,
sums->bytenr,
sums->len);
if (!ret)
ret = btrfs_csum_file_blocks(trans,
root->fs_info->csum_root,
@ -1549,9 +1594,8 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
*/
static noinline int insert_one_name(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
u64 dirid, u64 index,
char *name, int name_len, u8 type,
char *name, int name_len,
struct btrfs_key *location)
{
struct inode *inode;
@ -1613,6 +1657,9 @@ static bool name_in_log_ref(struct btrfs_root *log_root,
* not exist in the FS, it is skipped. fsyncs on directories
* do not force down inodes inside that directory, just changes to the
* names or unlinks in a directory.
*
* Returns < 0 on error, 0 if the name wasn't replayed (dentry points to a
* non-existing inode) and 1 if the name was replayed.
*/
static noinline int replay_one_name(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@ -1631,6 +1678,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
int exists;
int ret = 0;
bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
bool name_added = false;
dir = read_one_inode(root, key->objectid);
if (!dir)
@ -1708,6 +1756,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
}
kfree(name);
iput(dir);
if (!ret && name_added)
ret = 1;
return ret;
insert:
@ -1719,10 +1769,12 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
goto out;
}
btrfs_release_path(path);
ret = insert_one_name(trans, root, path, key->objectid, key->offset,
name, name_len, log_type, &log_key);
ret = insert_one_name(trans, root, key->objectid, key->offset,
name, name_len, &log_key);
if (ret && ret != -ENOENT && ret != -EEXIST)
goto out;
if (!ret)
name_added = true;
update_size = false;
ret = 0;
goto out;
@ -1740,12 +1792,13 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, int slot,
struct btrfs_key *key)
{
int ret;
int ret = 0;
u32 item_size = btrfs_item_size_nr(eb, slot);
struct btrfs_dir_item *di;
int name_len;
unsigned long ptr;
unsigned long ptr_end;
struct btrfs_path *fixup_path = NULL;
ptr = btrfs_item_ptr_offset(eb, slot);
ptr_end = ptr + item_size;
@ -1755,12 +1808,59 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
return -EIO;
name_len = btrfs_dir_name_len(eb, di);
ret = replay_one_name(trans, root, path, eb, di, key);
if (ret)
return ret;
if (ret < 0)
break;
ptr = (unsigned long)(di + 1);
ptr += name_len;
/*
* If this entry refers to a non-directory (directories can not
* have a link count > 1) and it was added in the transaction
* that was not committed, make sure we fixup the link count of
* the inode it the entry points to. Otherwise something like
* the following would result in a directory pointing to an
* inode with a wrong link that does not account for this dir
* entry:
*
* mkdir testdir
* touch testdir/foo
* touch testdir/bar
* sync
*
* ln testdir/bar testdir/bar_link
* ln testdir/foo testdir/foo_link
* xfs_io -c "fsync" testdir/bar
*
* <power failure>
*
* mount fs, log replay happens
*
* File foo would remain with a link count of 1 when it has two
* entries pointing to it in the directory testdir. This would
* make it impossible to ever delete the parent directory has
* it would result in stale dentries that can never be deleted.
*/
if (ret == 1 && btrfs_dir_type(eb, di) != BTRFS_FT_DIR) {
struct btrfs_key di_key;
if (!fixup_path) {
fixup_path = btrfs_alloc_path();
if (!fixup_path) {
ret = -ENOMEM;
break;
}
}
btrfs_dir_item_key_to_cpu(eb, di, &di_key);
ret = link_to_fixup_dir(trans, root, fixup_path,
di_key.objectid);
if (ret)
break;
}
ret = 0;
}
return 0;
btrfs_free_path(fixup_path);
return ret;
}
/*
@ -2535,8 +2635,7 @@ static int update_log_root(struct btrfs_trans_handle *trans,
return ret;
}
static void wait_log_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int transid)
static void wait_log_commit(struct btrfs_root *root, int transid)
{
DEFINE_WAIT(wait);
int index = transid % 2;
@ -2561,8 +2660,7 @@ static void wait_log_commit(struct btrfs_trans_handle *trans,
atomic_read(&root->log_commit[index]));
}
static void wait_for_writer(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
static void wait_for_writer(struct btrfs_root *root)
{
DEFINE_WAIT(wait);
@ -2642,7 +2740,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
index1 = log_transid % 2;
if (atomic_read(&root->log_commit[index1])) {
wait_log_commit(trans, root, log_transid);
wait_log_commit(root, log_transid);
mutex_unlock(&root->log_mutex);
return ctx->log_ret;
}
@ -2651,7 +2749,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
/* wait for previous tree log sync to complete */
if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
wait_log_commit(trans, root, log_transid - 1);
wait_log_commit(root, log_transid - 1);
while (1) {
int batch = atomic_read(&root->log_batch);
@ -2662,7 +2760,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
schedule_timeout_uninterruptible(1);
mutex_lock(&root->log_mutex);
}
wait_for_writer(trans, root);
wait_for_writer(root);
if (batch == atomic_read(&root->log_batch))
break;
}
@ -2759,7 +2857,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = btrfs_wait_marked_extents(log, &log->dirty_log_pages,
mark);
btrfs_wait_logged_extents(trans, log, log_transid);
wait_log_commit(trans, log_root_tree,
wait_log_commit(log_root_tree,
root_log_ctx.log_transid);
mutex_unlock(&log_root_tree->log_mutex);
if (!ret)
@ -2770,11 +2868,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
atomic_set(&log_root_tree->log_commit[index2], 1);
if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) {
wait_log_commit(trans, log_root_tree,
wait_log_commit(log_root_tree,
root_log_ctx.log_transid - 1);
}
wait_for_writer(trans, log_root_tree);
wait_for_writer(log_root_tree);
/*
* now that we've moved on to the tree of log tree roots,
@ -4904,6 +5002,94 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
return ret;
}
static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
struct inode *inode,
struct btrfs_log_ctx *ctx)
{
int ret;
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_root *root = BTRFS_I(inode)->root;
const u64 ino = btrfs_ino(inode);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->skip_locking = 1;
path->search_commit_root = 1;
key.objectid = ino;
key.type = BTRFS_INODE_REF_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
while (true) {
struct extent_buffer *leaf = path->nodes[0];
int slot = path->slots[0];
u32 cur_offset = 0;
u32 item_size;
unsigned long ptr;
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
else if (ret > 0)
break;
continue;
}
btrfs_item_key_to_cpu(leaf, &key, slot);
/* BTRFS_INODE_EXTREF_KEY is BTRFS_INODE_REF_KEY + 1 */
if (key.objectid != ino || key.type > BTRFS_INODE_EXTREF_KEY)
break;
item_size = btrfs_item_size_nr(leaf, slot);
ptr = btrfs_item_ptr_offset(leaf, slot);
while (cur_offset < item_size) {
struct btrfs_key inode_key;
struct inode *dir_inode;
inode_key.type = BTRFS_INODE_ITEM_KEY;
inode_key.offset = 0;
if (key.type == BTRFS_INODE_EXTREF_KEY) {
struct btrfs_inode_extref *extref;
extref = (struct btrfs_inode_extref *)
(ptr + cur_offset);
inode_key.objectid = btrfs_inode_extref_parent(
leaf, extref);
cur_offset += sizeof(*extref);
cur_offset += btrfs_inode_extref_name_len(leaf,
extref);
} else {
inode_key.objectid = key.offset;
cur_offset = item_size;
}
dir_inode = btrfs_iget(root->fs_info->sb, &inode_key,
root, NULL);
/* If parent inode was deleted, skip it. */
if (IS_ERR(dir_inode))
continue;
ret = btrfs_log_inode(trans, root, dir_inode,
LOG_INODE_ALL, 0, LLONG_MAX, ctx);
iput(dir_inode);
if (ret)
goto out;
}
path->slots[0]++;
}
ret = 0;
out:
btrfs_free_path(path);
return ret;
}
/*
* helper function around btrfs_log_inode to make sure newly created
* parent directories also end up in the log. A minimal inode and backref
@ -4923,9 +5109,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct dentry *old_parent = NULL;
int ret = 0;
u64 last_committed = root->fs_info->last_trans_committed;
const struct dentry * const first_parent = parent;
const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
last_committed);
bool log_dentries = false;
struct inode *orig_inode = inode;
@ -4986,6 +5169,53 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
log_dentries = true;
/*
* On unlink we must make sure all our current and old parent directores
* inodes are fully logged. This is to prevent leaving dangling
* directory index entries in directories that were our parents but are
* not anymore. Not doing this results in old parent directory being
* impossible to delete after log replay (rmdir will always fail with
* error -ENOTEMPTY).
*
* Example 1:
*
* mkdir testdir
* touch testdir/foo
* ln testdir/foo testdir/bar
* sync
* unlink testdir/bar
* xfs_io -c fsync testdir/foo
* <power failure>
* mount fs, triggers log replay
*
* If we don't log the parent directory (testdir), after log replay the
* directory still has an entry pointing to the file inode using the bar
* name, but a matching BTRFS_INODE_[REF|EXTREF]_KEY does not exist and
* the file inode has a link count of 1.
*
* Example 2:
*
* mkdir testdir
* touch foo
* ln foo testdir/foo2
* ln foo testdir/foo3
* sync
* unlink testdir/foo3
* xfs_io -c fsync foo
* <power failure>
* mount fs, triggers log replay
*
* Similar as the first example, after log replay the parent directory
* testdir still has an entry pointing to the inode file with name foo3
* but the file inode does not have a matching BTRFS_INODE_REF_KEY item
* and has a link count of 2.
*/
if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
ret = btrfs_log_all_parents(trans, orig_inode, ctx);
if (ret)
goto end_trans;
}
while (1) {
if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
break;
@ -4994,23 +5224,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (root != BTRFS_I(inode)->root)
break;
/*
* On unlink we must make sure our immediate parent directory
* inode is fully logged. This is to prevent leaving dangling
* directory index entries and a wrong directory inode's i_size.
* Not doing so can result in a directory being impossible to
* delete after log replay (rmdir will always fail with error
* -ENOTEMPTY).
*/
if (did_unlink && parent == first_parent)
inode_only = LOG_INODE_ALL;
else
inode_only = LOG_INODE_EXISTS;
if (BTRFS_I(inode)->generation >
root->fs_info->last_trans_committed ||
inode_only == LOG_INODE_ALL) {
ret = btrfs_log_inode(trans, root, inode, inode_only,
if (BTRFS_I(inode)->generation > last_committed) {
ret = btrfs_log_inode(trans, root, inode,
LOG_INODE_EXISTS,
0, LLONG_MAX, ctx);
if (ret)
goto end_trans;

View file

@ -1116,15 +1116,18 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
return ret;
}
static int contains_pending_extent(struct btrfs_trans_handle *trans,
static int contains_pending_extent(struct btrfs_transaction *transaction,
struct btrfs_device *device,
u64 *start, u64 len)
{
struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
struct extent_map *em;
struct list_head *search_list = &trans->transaction->pending_chunks;
struct list_head *search_list = &fs_info->pinned_chunks;
int ret = 0;
u64 physical_start = *start;
if (transaction)
search_list = &transaction->pending_chunks;
again:
list_for_each_entry(em, search_list, list) {
struct map_lookup *map;
@ -1159,8 +1162,8 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
}
}
}
if (search_list == &trans->transaction->pending_chunks) {
search_list = &trans->root->fs_info->pinned_chunks;
if (search_list != &fs_info->pinned_chunks) {
search_list = &fs_info->pinned_chunks;
goto again;
}
@ -1169,12 +1172,13 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
/*
* find_free_dev_extent - find free space in the specified device
* @device: the device which we search the free space in
* @num_bytes: the size of the free space that we need
* @start: store the start of the free space.
* @len: the size of the free space. that we find, or the size of the max
* free space if we don't find suitable free space
* find_free_dev_extent_start - find free space in the specified device
* @device: the device which we search the free space in
* @num_bytes: the size of the free space that we need
* @search_start: the position from which to begin the search
* @start: store the start of the free space.
* @len: the size of the free space. that we find, or the size
* of the max free space if we don't find suitable free space
*
* this uses a pretty simple search, the expectation is that it is
* called very infrequently and that a given device has a small number
@ -1188,9 +1192,9 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
* But if we don't find suitable free space, it is used to store the size of
* the max free space.
*/
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *len)
int find_free_dev_extent_start(struct btrfs_transaction *transaction,
struct btrfs_device *device, u64 num_bytes,
u64 search_start, u64 *start, u64 *len)
{
struct btrfs_key key;
struct btrfs_root *root = device->dev_root;
@ -1200,19 +1204,11 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
u64 max_hole_start;
u64 max_hole_size;
u64 extent_end;
u64 search_start;
u64 search_end = device->total_bytes;
int ret;
int slot;
struct extent_buffer *l;
/* FIXME use last free of some kind */
/* we don't want to overwrite the superblock on the drive,
* so we make sure to start at an offset of at least 1MB
*/
search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@ -1273,7 +1269,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
* Have to check before we set max_hole_start, otherwise
* we could end up sending back this offset anyway.
*/
if (contains_pending_extent(trans, device,
if (contains_pending_extent(transaction, device,
&search_start,
hole_size)) {
if (key.offset >= search_start) {
@ -1322,7 +1318,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
if (search_end > search_start) {
hole_size = search_end - search_start;
if (contains_pending_extent(trans, device, &search_start,
if (contains_pending_extent(transaction, device, &search_start,
hole_size)) {
btrfs_release_path(path);
goto again;
@ -1348,6 +1344,24 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
return ret;
}
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *len)
{
struct btrfs_root *root = device->dev_root;
u64 search_start;
/* FIXME use last free of some kind */
/*
* we don't want to overwrite the superblock on the drive,
* so we make sure to start at an offset of at least 1MB
*/
search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
return find_free_dev_extent_start(trans->transaction, device,
num_bytes, search_start, start, len);
}
static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device,
u64 start, u64 *dev_extent_len)
@ -2755,9 +2769,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
return ret;
}
static int btrfs_relocate_chunk(struct btrfs_root *root,
u64 chunk_objectid,
u64 chunk_offset)
static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)
{
struct btrfs_root *extent_root;
struct btrfs_trans_handle *trans;
@ -2785,7 +2797,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
return -ENOSPC;
/* step one, relocate all the extents inside this chunk */
btrfs_scrub_pause(root);
ret = btrfs_relocate_block_group(extent_root, chunk_offset);
btrfs_scrub_continue(root);
if (ret)
return ret;
@ -2855,7 +2869,6 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
ret = btrfs_relocate_chunk(chunk_root,
found_key.objectid,
found_key.offset);
if (ret == -ENOSPC)
failed++;
@ -3375,7 +3388,6 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
}
ret = btrfs_relocate_chunk(chunk_root,
found_key.objectid,
found_key.offset);
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
if (ret && ret != -ENOSPC)
@ -4077,7 +4089,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
struct btrfs_dev_extent *dev_extent = NULL;
struct btrfs_path *path;
u64 length;
u64 chunk_objectid;
u64 chunk_offset;
int ret;
int slot;
@ -4154,11 +4165,10 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
break;
}
chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
btrfs_release_path(path);
ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
ret = btrfs_relocate_chunk(root, chunk_offset);
mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
if (ret && ret != -ENOSPC)
goto done;
@ -4200,7 +4210,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
u64 start = new_size;
u64 len = old_size - new_size;
if (contains_pending_extent(trans, device, &start, len)) {
if (contains_pending_extent(trans->transaction, device,
&start, len)) {
unlock_chunks(root);
checked_pending_chunks = true;
failed = 0;
@ -5071,9 +5082,7 @@ static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
* and the stripes
*/
sizeof(u64) * (total_stripes),
GFP_NOFS);
if (!bbio)
return NULL;
GFP_NOFS|__GFP_NOFAIL);
atomic_set(&bbio->error, 0);
atomic_set(&bbio->refs, 1);

View file

@ -453,6 +453,9 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent_start(struct btrfs_transaction *transaction,
struct btrfs_device *device, u64 num_bytes,
u64 search_start, u64 *start, u64 *max_avail);
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *max_avail);