btrfs: use bios instead of buffer_heads from super block writeout

Similar to the superblock read path, change the write path to using bios
and pages instead of buffer_heads. This allows us to skip over the
buffer_head code, for writing the superblock to disk.

This is based on a patch originally authored by Nikolay Borisov.

Co-developed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Johannes Thumshirn 2020-02-14 00:24:33 +09:00 committed by David Sterba
parent 8f32380d3f
commit 314b6dd0ee

View file

@ -7,7 +7,6 @@
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/slab.h>
@ -3395,25 +3394,34 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
}
ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
static void btrfs_end_super_write(struct bio *bio)
{
if (uptodate) {
set_buffer_uptodate(bh);
} else {
struct btrfs_device *device = (struct btrfs_device *)
bh->b_private;
struct btrfs_device *device = bio->bi_private;
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
struct page *page;
btrfs_warn_rl_in_rcu(device->fs_info,
"lost page write due to IO error on %s",
rcu_str_deref(device->name));
/* note, we don't set_buffer_write_io_error because we have
* our own ways of dealing with the IO errors
*/
clear_buffer_uptodate(bh);
btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
bio_for_each_segment_all(bvec, bio, iter_all) {
page = bvec->bv_page;
if (bio->bi_status) {
btrfs_warn_rl_in_rcu(device->fs_info,
"lost page write due to IO error on %s (%d)",
rcu_str_deref(device->name),
blk_status_to_errno(bio->bi_status));
ClearPageUptodate(page);
SetPageError(page);
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_WRITE_ERRS);
} else {
SetPageUptodate(page);
}
put_page(page);
unlock_page(page);
}
unlock_buffer(bh);
put_bh(bh);
bio_put(bio);
}
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
@ -3473,25 +3481,23 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
/*
* Write superblock @sb to the @device. Do not wait for completion, all the
* buffer heads we write are pinned.
* pages we use for writing are locked.
*
* Write @max_mirrors copies of the superblock, where 0 means default that fit
* the expected device size at commit time. Note that max_mirrors must be
* same for write and wait phases.
*
* Return number of errors when buffer head is not found or submission fails.
* Return number of errors when page is not found or submission fails.
*/
static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb, int max_mirrors)
{
struct btrfs_fs_info *fs_info = device->fs_info;
struct address_space *mapping = device->bdev->bd_inode->i_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct buffer_head *bh;
int i;
int ret;
int errors = 0;
u64 bytenr;
int op_flags;
if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
@ -3499,6 +3505,10 @@ static int write_dev_supers(struct btrfs_device *device,
shash->tfm = fs_info->csum_shash;
for (i = 0; i < max_mirrors; i++) {
struct page *page;
struct bio *bio;
struct btrfs_super_block *disk_super;
bytenr = btrfs_sb_offset(i);
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
device->commit_total_bytes)
@ -3511,37 +3521,45 @@ static int write_dev_supers(struct btrfs_device *device,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
crypto_shash_final(shash, sb->csum);
/* One reference for us, and we leave it for the caller */
bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
BTRFS_SUPER_INFO_SIZE);
if (!bh) {
page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
GFP_NOFS);
if (!page) {
btrfs_err(device->fs_info,
"couldn't get super buffer head for bytenr %llu",
"couldn't get super block page for bytenr %llu",
bytenr);
errors++;
continue;
}
memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
/* Bump the refcount for wait_dev_supers() */
get_page(page);
/* one reference for submit_bh */
get_bh(bh);
set_buffer_uptodate(bh);
lock_buffer(bh);
bh->b_end_io = btrfs_end_buffer_write_sync;
bh->b_private = device;
disk_super = page_address(page);
memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
/*
* we fua the first super. The others we allow
* to go down lazy.
* Directly use bios here instead of relying on the page cache
* to do I/O, so we don't lose the ability to do integrity
* checking.
*/
op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
bio = bio_alloc(GFP_NOFS, 1);
bio_set_dev(bio, device->bdev);
bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
bio->bi_private = device;
bio->bi_end_io = btrfs_end_super_write;
__bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
offset_in_page(bytenr));
/*
* We FUA only the first super block. The others we allow to
* go down lazy and there's a short window where the on-disk
* copies might still contain the older version.
*/
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO;
if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
op_flags |= REQ_FUA;
ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
if (ret)
errors++;
bio->bi_opf |= REQ_FUA;
btrfsic_submit_bio(bio);
}
return errors < i ? 0 : -1;
}
@ -3550,12 +3568,11 @@ static int write_dev_supers(struct btrfs_device *device,
* Wait for write completion of superblocks done by write_dev_supers,
* @max_mirrors same for write and wait phases.
*
* Return number of errors when buffer head is not found or not marked up to
* Return number of errors when page is not found or not marked up to
* date.
*/
static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
{
struct buffer_head *bh;
int i;
int errors = 0;
bool primary_failed = false;
@ -3565,32 +3582,34 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
for (i = 0; i < max_mirrors; i++) {
struct page *page;
bytenr = btrfs_sb_offset(i);
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
device->commit_total_bytes)
break;
bh = __find_get_block(device->bdev,
bytenr / BTRFS_BDEV_BLOCKSIZE,
BTRFS_SUPER_INFO_SIZE);
if (!bh) {
page = find_get_page(device->bdev->bd_inode->i_mapping,
bytenr >> PAGE_SHIFT);
if (!page) {
errors++;
if (i == 0)
primary_failed = true;
continue;
}
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* Page is submitted locked and unlocked once the IO completes */
wait_on_page_locked(page);
if (PageError(page)) {
errors++;
if (i == 0)
primary_failed = true;
}
/* drop our reference */
brelse(bh);
/* Drop our reference */
put_page(page);
/* drop the reference from the writing run */
brelse(bh);
/* Drop the reference from the writing run */
put_page(page);
}
/* log error, force error return */