linux-stable/fs/btrfs/zoned.c

284 lines
6.8 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
#include <linux/slab.h>
#include <linux/blkdev.h>
#include "ctree.h"
#include "volumes.h"
#include "zoned.h"
#include "rcu-string.h"
/* Maximum number of zones to report per blkdev_report_zones() call */
#define BTRFS_REPORT_NR_ZONES 4096
static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data)
{
struct blk_zone *zones = data;
memcpy(&zones[idx], zone, sizeof(*zone));
return 0;
}
static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
struct blk_zone *zones, unsigned int *nr_zones)
{
int ret;
if (!*nr_zones)
return 0;
ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones,
copy_zone_info_cb, zones);
if (ret < 0) {
btrfs_err_in_rcu(device->fs_info,
"zoned: failed to read zone %llu on %s (devid %llu)",
pos, rcu_str_deref(device->name),
device->devid);
return ret;
}
*nr_zones = ret;
if (!ret)
return -EIO;
return 0;
}
int btrfs_get_dev_zone_info(struct btrfs_device *device)
{
struct btrfs_zoned_device_info *zone_info = NULL;
struct block_device *bdev = device->bdev;
struct request_queue *queue = bdev_get_queue(bdev);
sector_t nr_sectors;
sector_t sector = 0;
struct blk_zone *zones = NULL;
unsigned int i, nreported = 0, nr_zones;
unsigned int zone_sectors;
int ret;
if (!bdev_is_zoned(bdev))
return 0;
if (device->zone_info)
return 0;
zone_info = kzalloc(sizeof(*zone_info), GFP_KERNEL);
if (!zone_info)
return -ENOMEM;
nr_sectors = bdev->bd_part->nr_sects;
zone_sectors = bdev_zone_sectors(bdev);
/* Check if it's power of 2 (see is_power_of_2) */
ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0);
zone_info->zone_size = zone_sectors << SECTOR_SHIFT;
zone_info->zone_size_shift = ilog2(zone_info->zone_size);
zone_info->max_zone_append_size =
(u64)queue_max_zone_append_sectors(queue) << SECTOR_SHIFT;
zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++;
zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
if (!zone_info->seq_zones) {
ret = -ENOMEM;
goto out;
}
zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
if (!zone_info->empty_zones) {
ret = -ENOMEM;
goto out;
}
zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
if (!zones) {
ret = -ENOMEM;
goto out;
}
/* Get zones type */
while (sector < nr_sectors) {
nr_zones = BTRFS_REPORT_NR_ZONES;
ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones,
&nr_zones);
if (ret)
goto out;
for (i = 0; i < nr_zones; i++) {
if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ)
__set_bit(nreported, zone_info->seq_zones);
if (zones[i].cond == BLK_ZONE_COND_EMPTY)
__set_bit(nreported, zone_info->empty_zones);
nreported++;
}
sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len;
}
if (nreported != zone_info->nr_zones) {
btrfs_err_in_rcu(device->fs_info,
"inconsistent number of zones on %s (%u/%u)",
rcu_str_deref(device->name), nreported,
zone_info->nr_zones);
ret = -EIO;
goto out;
}
kfree(zones);
device->zone_info = zone_info;
/* device->fs_info is not safe to use for printing messages */
btrfs_info_in_rcu(NULL,
"host-%s zoned block device %s, %u zones of %llu bytes",
bdev_zoned_model(bdev) == BLK_ZONED_HM ? "managed" : "aware",
rcu_str_deref(device->name), zone_info->nr_zones,
zone_info->zone_size);
return 0;
out:
kfree(zones);
bitmap_free(zone_info->empty_zones);
bitmap_free(zone_info->seq_zones);
kfree(zone_info);
return ret;
}
void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
{
struct btrfs_zoned_device_info *zone_info = device->zone_info;
if (!zone_info)
return;
bitmap_free(zone_info->seq_zones);
bitmap_free(zone_info->empty_zones);
kfree(zone_info);
device->zone_info = NULL;
}
int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone)
{
unsigned int nr_zones = 1;
int ret;
ret = btrfs_get_dev_zones(device, pos, zone, &nr_zones);
if (ret != 0 || !nr_zones)
return ret ? ret : -EIO;
return 0;
}
int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
u64 zoned_devices = 0;
u64 nr_devices = 0;
u64 zone_size = 0;
u64 max_zone_append_size = 0;
const bool incompat_zoned = btrfs_is_zoned(fs_info);
int ret = 0;
/* Count zoned devices */
list_for_each_entry(device, &fs_devices->devices, dev_list) {
enum blk_zoned_model model;
if (!device->bdev)
continue;
model = bdev_zoned_model(device->bdev);
if (model == BLK_ZONED_HM ||
(model == BLK_ZONED_HA && incompat_zoned)) {
struct btrfs_zoned_device_info *zone_info;
zone_info = device->zone_info;
zoned_devices++;
if (!zone_size) {
zone_size = zone_info->zone_size;
} else if (zone_info->zone_size != zone_size) {
btrfs_err(fs_info,
"zoned: unequal block device zone sizes: have %llu found %llu",
device->zone_info->zone_size,
zone_size);
ret = -EINVAL;
goto out;
}
if (!max_zone_append_size ||
(zone_info->max_zone_append_size &&
zone_info->max_zone_append_size < max_zone_append_size))
max_zone_append_size =
zone_info->max_zone_append_size;
}
nr_devices++;
}
if (!zoned_devices && !incompat_zoned)
goto out;
if (!zoned_devices && incompat_zoned) {
/* No zoned block device found on ZONED filesystem */
btrfs_err(fs_info,
"zoned: no zoned devices found on a zoned filesystem");
ret = -EINVAL;
goto out;
}
if (zoned_devices && !incompat_zoned) {
btrfs_err(fs_info,
"zoned: mode not enabled but zoned device found");
ret = -EINVAL;
goto out;
}
if (zoned_devices != nr_devices) {
btrfs_err(fs_info,
"zoned: cannot mix zoned and regular devices");
ret = -EINVAL;
goto out;
}
/*
* stripe_size is always aligned to BTRFS_STRIPE_LEN in
* __btrfs_alloc_chunk(). Since we want stripe_len == zone_size,
* check the alignment here.
*/
if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) {
btrfs_err(fs_info,
"zoned: zone size %llu not aligned to stripe %u",
zone_size, BTRFS_STRIPE_LEN);
ret = -EINVAL;
goto out;
}
fs_info->zone_size = zone_size;
fs_info->max_zone_append_size = max_zone_append_size;
btrfs_info(fs_info, "zoned mode enabled with zone size %llu", zone_size);
out:
return ret;
}
btrfs: disallow space_cache in ZONED mode As updates to the space cache v1 are in-place, the space cache cannot be located over sequential zones and there is no guarantees that the device will have enough conventional zones to store this cache. Resolve this problem by disabling completely the space cache v1. This does not introduce any problems with sequential block groups: all the free space is located after the allocation pointer and no free space before the pointer. There is no need to have such cache. Note: we can technically use free-space-tree (space cache v2) on ZONED mode. But, since ZONED mode now always allocates extents in a block group sequentially regardless of underlying device zone type, it's no use to enable and maintain the tree. For the same reason, NODATACOW is also disabled. In summary, ZONED will disable: | Disabled features | Reason | |-------------------+-----------------------------------------------------| | RAID/DUP | Cannot handle two zone append writes to different | | | zones | |-------------------+-----------------------------------------------------| | space_cache (v1) | In-place updating | | NODATACOW | In-place updating | |-------------------+-----------------------------------------------------| | fallocate | Reserved extent will be a write hole | |-------------------+-----------------------------------------------------| | MIXED_BG | Allocated metadata region will be write holes for | | | data writes | Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2020-11-10 11:26:10 +00:00
int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info)
{
if (!btrfs_is_zoned(info))
return 0;
/*
* Space cache writing is not COWed. Disable that to avoid write errors
* in sequential zones.
*/
if (btrfs_test_opt(info, SPACE_CACHE)) {
btrfs_err(info, "zoned: space cache v1 is not supported");
return -EINVAL;
}
if (btrfs_test_opt(info, NODATACOW)) {
btrfs_err(info, "zoned: NODATACOW not supported");
return -EINVAL;
}
btrfs: disallow space_cache in ZONED mode As updates to the space cache v1 are in-place, the space cache cannot be located over sequential zones and there is no guarantees that the device will have enough conventional zones to store this cache. Resolve this problem by disabling completely the space cache v1. This does not introduce any problems with sequential block groups: all the free space is located after the allocation pointer and no free space before the pointer. There is no need to have such cache. Note: we can technically use free-space-tree (space cache v2) on ZONED mode. But, since ZONED mode now always allocates extents in a block group sequentially regardless of underlying device zone type, it's no use to enable and maintain the tree. For the same reason, NODATACOW is also disabled. In summary, ZONED will disable: | Disabled features | Reason | |-------------------+-----------------------------------------------------| | RAID/DUP | Cannot handle two zone append writes to different | | | zones | |-------------------+-----------------------------------------------------| | space_cache (v1) | In-place updating | | NODATACOW | In-place updating | |-------------------+-----------------------------------------------------| | fallocate | Reserved extent will be a write hole | |-------------------+-----------------------------------------------------| | MIXED_BG | Allocated metadata region will be write holes for | | | data writes | Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2020-11-10 11:26:10 +00:00
return 0;
}