btrfs: add framework to handle device flush error as a volume

This adds comments to the flush error handling part of the code, and
hopes to maintain the same logic with a framework which can be used to
handle the errors at the volume level.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Anand Jain 2017-05-06 07:17:54 +08:00 committed by David Sterba
parent 6b349dfe80
commit 401b41e5a8
2 changed files with 54 additions and 4 deletions

View file

@ -3509,6 +3509,10 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
if (wait) {
bio = device->flush_bio;
if (!bio)
/*
* This means the alloc has failed with ENOMEM, however
* here we return 0, as its not a device error.
*/
return 0;
wait_for_completion(&device->flush_wait);
@ -3548,6 +3552,32 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
return 0;
}
static int check_barrier_error(struct btrfs_fs_devices *fsdevs)
{
int submit_flush_error = 0;
int dev_flush_error = 0;
struct btrfs_device *dev;
int tolerance;
list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
if (!dev->bdev) {
submit_flush_error++;
dev_flush_error++;
continue;
}
if (dev->last_flush_error == -ENOMEM)
submit_flush_error++;
if (dev->last_flush_error && dev->last_flush_error != -ENOMEM)
dev_flush_error++;
}
tolerance = fsdevs->fs_info->num_tolerated_disk_barrier_failures;
if (submit_flush_error > tolerance || dev_flush_error > tolerance)
return -EIO;
return 0;
}
/*
* send an empty flush down to each device in parallel,
* then wait for them
@ -3575,6 +3605,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
ret = write_dev_flush(dev, 0);
if (ret)
errors_send++;
dev->last_flush_error = ret;
}
/* wait for all the barriers */
@ -3589,12 +3620,30 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
continue;
ret = write_dev_flush(dev, 1);
if (ret)
if (ret) {
dev->last_flush_error = ret;
errors_wait++;
}
}
/*
* Try hard in case of flush. Lets say, in RAID1 we have
* the following situation
* dev1: EIO dev2: ENOMEM
* this is not a fatal error as we hope to recover from
* ENOMEM in the next attempt to flush.
* But the following is considered as fatal
* dev1: ENOMEM dev2: ENOMEM
* dev1: bdev == NULL dev2: ENOMEM
*/
if (errors_send || errors_wait) {
/*
* At some point we need the status of all disks
* to arrive at the volume status. So error checking
* is being pushed to a separate loop.
*/
return check_barrier_error(info->fs_devices);
}
if (errors_send > info->num_tolerated_disk_barrier_failures ||
errors_wait > info->num_tolerated_disk_barrier_failures)
return -EIO;
return 0;
}

View file

@ -74,6 +74,7 @@ struct btrfs_device {
int missing;
int can_discard;
int is_tgtdev_for_dev_replace;
int last_flush_error;
#ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
seqcount_t data_seqcount;