for-5.5/disk-revalidate-20191122

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl3YA5sQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgplFxEACM7CwrWsullPX6b3j62NW6VepU5JQdzwVW
 S+bmLpb8Z2I4wzEnaVuWAY5hEhGaS9NFtQLdBG0W0YOzH7sweNmL38dZfCE4+oFj
 ZwytpXQQhAQUwkgANJCpNfzDymHduPsTz7RYqRr1plmhna1KC/dnhuMwg8lVOBf5
 myWjqcCHxxoQn6KFqcX9/Azz29ZrgzV28lOnZdiw9yoTjraBmS/ymx4woaa3pc2v
 UNw0Cgx53vHENJzEL9FNSxc0ENZq/bQhpDolnc2AlPGy9+vPg4afMitJb60KTT7r
 HpDcLGkYAIKLrfk8DUmFW8lZhWsxTchXvK2+zwQV7nXMcdUgGN/G3HTIdvWEHFv8
 oGbPB8cfdA2vNC9QAybwWEum/S0H/GfYsBVplNCUCdFXE7yj1cbKD5dPfCyIvmPz
 BjgMae5vH/KoH+vNdZ8NL5oFz2eFC3rLxa/Ss78pcEoBdiiV3WQHPv9MBmn/OQ/v
 CeUAM7omyWpbv3lcByNzIOkeeO3m6Ne28EpEMc2pzLnDPu2btvSyetdO488DE+7O
 MNfApZULVX91W7jWnhM5GR+1SJTdEXZnoxnFV+J/j4deog5vUR7Dt1VkujpUILfL
 7jMl3erF6C53wNrc465z8iLRp1ZM+aTpwatXXRfucNXeomExKK9zF+/+O1ACckUB
 jWDCR9NTcw==
 =e5Lx
 -----END PGP SIGNATURE-----

Merge tag 'for-5.5/disk-revalidate-20191122' of git://git.kernel.dk/linux-block

Pull disk revalidation updates from Jens Axboe:
 "This continues the work that Jan Kara started to thoroughly cleanup
  and consolidate how we handle rescans and revalidations"

* tag 'for-5.5/disk-revalidate-20191122' of git://git.kernel.dk/linux-block:
  block: move clearing bd_invalidated into check_disk_size_change
  block: remove (__)blkdev_reread_part as an exported API
  block: fix bdev_disk_changed for non-partitioned devices
  block: move rescan_partitions to fs/block_dev.c
  block: merge invalidate_partitions into rescan_partitions
  block: refactor rescan_partitions
This commit is contained in:
Linus Torvalds 2019-11-25 11:37:01 -08:00
commit 7e5192b93c
7 changed files with 135 additions and 156 deletions

View File

@ -155,48 +155,21 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
}
}
/*
* This is an exported API for the block driver, and will not
* acquire bd_mutex. This API should be used in case that
* caller has held bd_mutex already.
*/
int __blkdev_reread_part(struct block_device *bdev)
static int blkdev_reread_part(struct block_device *bdev)
{
struct gendisk *disk = bdev->bd_disk;
int ret;
if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
if (!disk_part_scan_enabled(bdev->bd_disk) || bdev != bdev->bd_contains)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
lockdep_assert_held(&bdev->bd_mutex);
return rescan_partitions(disk, bdev);
}
EXPORT_SYMBOL(__blkdev_reread_part);
/*
* This is an exported API for the block driver, and will
* try to acquire bd_mutex. If bd_mutex has been held already
* in current context, please call __blkdev_reread_part().
*
* Make sure the held locks in current context aren't required
* in open()/close() handler and I/O path for avoiding ABBA deadlock:
* - bd_mutex is held before calling block driver's open/close
* handler
* - reading partition table may submit I/O to the block device
*/
int blkdev_reread_part(struct block_device *bdev)
{
int res;
mutex_lock(&bdev->bd_mutex);
res = __blkdev_reread_part(bdev);
ret = bdev_disk_changed(bdev, false);
mutex_unlock(&bdev->bd_mutex);
return res;
return ret;
}
EXPORT_SYMBOL(blkdev_reread_part);
static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
unsigned long arg, unsigned long flags)

View File

@ -442,12 +442,14 @@ static bool disk_unlock_native_capacity(struct gendisk *disk)
}
}
static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev)
{
struct disk_part_iter piter;
struct hd_struct *part;
int res;
if (!disk_part_scan_enabled(disk))
return 0;
if (bdev->bd_part_count || bdev->bd_super)
return -EBUSY;
res = invalidate_partition(disk, 0);
@ -462,148 +464,124 @@ static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
return 0;
}
int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
struct parsed_partitions *state, int p)
{
struct parsed_partitions *state = NULL;
sector_t size = state->parts[p].size;
sector_t from = state->parts[p].from;
struct hd_struct *part;
int p, highest, res;
rescan:
if (state && !IS_ERR(state)) {
free_partitions(state);
state = NULL;
if (!size)
return true;
if (from >= get_capacity(disk)) {
printk(KERN_WARNING
"%s: p%d start %llu is beyond EOD, ",
disk->disk_name, p, (unsigned long long) from);
if (disk_unlock_native_capacity(disk))
return false;
return true;
}
res = drop_partitions(disk, bdev);
if (res)
return res;
if (from + size > get_capacity(disk)) {
printk(KERN_WARNING
"%s: p%d size %llu extends beyond EOD, ",
disk->disk_name, p, (unsigned long long) size);
if (disk->fops->revalidate_disk)
disk->fops->revalidate_disk(disk);
check_disk_size_change(disk, bdev, true);
bdev->bd_invalidated = 0;
if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
if (disk_unlock_native_capacity(disk))
return false;
/*
* We can not ignore partitions of broken tables created by for
* example camera firmware, but we limit them to the end of the
* disk to avoid creating invalid block devices.
*/
size = get_capacity(disk) - from;
}
part = add_partition(disk, p, from, size, state->parts[p].flags,
&state->parts[p].info);
if (IS_ERR(part)) {
printk(KERN_ERR " %s: p%d could not be added: %ld\n",
disk->disk_name, p, -PTR_ERR(part));
return true;
}
#ifdef CONFIG_BLK_DEV_MD
if (state->parts[p].flags & ADDPART_FLAG_RAID)
md_autodetect_dev(part_to_dev(part)->devt);
#endif
return true;
}
int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
{
struct parsed_partitions *state;
int ret = -EAGAIN, p, highest;
if (!disk_part_scan_enabled(disk))
return 0;
state = check_partition(disk, bdev);
if (!state)
return 0;
if (IS_ERR(state)) {
/*
* I/O error reading the partition table. If any
* partition code tried to read beyond EOD, retry
* after unlocking native capacity.
* I/O error reading the partition table. If we tried to read
* beyond EOD, retry after unlocking the native capacity.
*/
if (PTR_ERR(state) == -ENOSPC) {
printk(KERN_WARNING "%s: partition table beyond EOD, ",
disk->disk_name);
if (disk_unlock_native_capacity(disk))
goto rescan;
return -EAGAIN;
}
return -EIO;
}
/* Partitions are not supported on zoned block devices */
/*
* Partitions are not supported on zoned block devices.
*/
if (bdev_is_zoned(bdev)) {
pr_warn("%s: ignoring partition table on zoned block device\n",
disk->disk_name);
goto out;
ret = 0;
goto out_free_state;
}
/*
* If any partition code tried to read beyond EOD, try
* unlocking native capacity even if partition table is
* successfully read as we could be missing some partitions.
* If we read beyond EOD, try unlocking native capacity even if the
* partition table was successfully read as we could be missing some
* partitions.
*/
if (state->access_beyond_eod) {
printk(KERN_WARNING
"%s: partition table partially beyond EOD, ",
disk->disk_name);
if (disk_unlock_native_capacity(disk))
goto rescan;
goto out_free_state;
}
/* tell userspace that the media / partition table may have changed */
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
/* Detect the highest partition number and preallocate
* disk->part_tbl. This is an optimization and not strictly
* necessary.
/*
* Detect the highest partition number and preallocate disk->part_tbl.
* This is an optimization and not strictly necessary.
*/
for (p = 1, highest = 0; p < state->limit; p++)
if (state->parts[p].size)
highest = p;
disk_expand_part_tbl(disk, highest);
/* add partitions */
for (p = 1; p < state->limit; p++) {
sector_t size, from;
for (p = 1; p < state->limit; p++)
if (!blk_add_partition(disk, bdev, state, p))
goto out_free_state;
size = state->parts[p].size;
if (!size)
continue;
from = state->parts[p].from;
if (from >= get_capacity(disk)) {
printk(KERN_WARNING
"%s: p%d start %llu is beyond EOD, ",
disk->disk_name, p, (unsigned long long) from);
if (disk_unlock_native_capacity(disk))
goto rescan;
continue;
}
if (from + size > get_capacity(disk)) {
printk(KERN_WARNING
"%s: p%d size %llu extends beyond EOD, ",
disk->disk_name, p, (unsigned long long) size);
if (disk_unlock_native_capacity(disk)) {
/* free state and restart */
goto rescan;
} else {
/*
* we can not ignore partitions of broken tables
* created by for example camera firmware, but
* we limit them to the end of the disk to avoid
* creating invalid block devices
*/
size = get_capacity(disk) - from;
}
}
part = add_partition(disk, p, from, size,
state->parts[p].flags,
&state->parts[p].info);
if (IS_ERR(part)) {
printk(KERN_ERR " %s: p%d could not be added: %ld\n",
disk->disk_name, p, -PTR_ERR(part));
continue;
}
#ifdef CONFIG_BLK_DEV_MD
if (state->parts[p].flags & ADDPART_FLAG_RAID)
md_autodetect_dev(part_to_dev(part)->devt);
#endif
}
out:
ret = 0;
out_free_state:
free_partitions(state);
return 0;
}
int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
{
int res;
if (!bdev->bd_invalidated)
return 0;
res = drop_partitions(disk, bdev);
if (res)
return res;
set_capacity(disk, 0);
check_disk_size_change(disk, bdev, false);
bdev->bd_invalidated = 0;
/* tell userspace that the media / partition table may have changed */
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
return 0;
return ret;
}
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)

View File

@ -640,7 +640,9 @@ static void loop_reread_partitions(struct loop_device *lo,
{
int rc;
rc = blkdev_reread_part(bdev);
mutex_lock(&bdev->bd_mutex);
rc = bdev_disk_changed(bdev, false);
mutex_unlock(&bdev->bd_mutex);
if (rc)
pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
__func__, lo->lo_number, lo->lo_file_name, rc);
@ -1164,10 +1166,11 @@ out_unlock:
* must be at least one and it can only become zero when the
* current holder is released.
*/
if (release)
err = __blkdev_reread_part(bdev);
else
err = blkdev_reread_part(bdev);
if (!release)
mutex_lock(&bdev->bd_mutex);
err = bdev_disk_changed(bdev, false);
if (!release)
mutex_unlock(&bdev->bd_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
__func__, lo_number, err);

View File

@ -116,7 +116,9 @@ int dasd_scan_partitions(struct dasd_block *block)
return -ENODEV;
}
rc = blkdev_reread_part(bdev);
mutex_lock(&bdev->bd_mutex);
rc = bdev_disk_changed(bdev, false);
mutex_unlock(&bdev->bd_mutex);
if (rc)
DBF_DEV_EVENT(DBF_ERR, block->base,
"scan partitions error, rc %d", rc);

View File

@ -1416,8 +1416,8 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
* and adjusts it if it differs. When shrinking the bdev size, its all caches
* are freed.
*/
void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
bool verbose)
static void check_disk_size_change(struct gendisk *disk,
struct block_device *bdev, bool verbose)
{
loff_t disk_size, bdev_size;
@ -1433,6 +1433,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
if (bdev_size > disk_size)
flush_disk(bdev, false);
}
bdev->bd_invalidated = 0;
}
/**
@ -1462,7 +1463,6 @@ int revalidate_disk(struct gendisk *disk)
mutex_lock(&bdev->bd_mutex);
check_disk_size_change(disk, bdev, ret == 0);
bdev->bd_invalidated = 0;
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
}
@ -1508,18 +1508,44 @@ EXPORT_SYMBOL(bd_set_size);
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
static void bdev_disk_changed(struct block_device *bdev, bool invalidate)
int bdev_disk_changed(struct block_device *bdev, bool invalidate)
{
if (disk_part_scan_enabled(bdev->bd_disk)) {
if (invalidate)
invalidate_partitions(bdev->bd_disk, bdev);
else
rescan_partitions(bdev->bd_disk, bdev);
struct gendisk *disk = bdev->bd_disk;
int ret;
lockdep_assert_held(&bdev->bd_mutex);
rescan:
ret = blk_drop_partitions(disk, bdev);
if (ret)
return ret;
if (invalidate)
set_capacity(disk, 0);
else if (disk->fops->revalidate_disk)
disk->fops->revalidate_disk(disk);
check_disk_size_change(disk, bdev, !invalidate);
if (get_capacity(disk)) {
ret = blk_add_partitions(disk, bdev);
if (ret == -EAGAIN)
goto rescan;
} else {
check_disk_size_change(bdev->bd_disk, bdev, !invalidate);
bdev->bd_invalidated = 0;
/*
* Tell userspace that the media / partition table may have
* changed.
*/
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
}
return ret;
}
/*
* Only exported for for loop and dasd for historic reasons. Don't use in new
* code!
*/
EXPORT_SYMBOL_GPL(bdev_disk_changed);
/*
* bd_mutex locking:

View File

@ -2630,8 +2630,6 @@ extern void bd_finish_claiming(struct block_device *bdev,
extern void bd_abort_claiming(struct block_device *bdev,
struct block_device *whole, void *holder);
extern void blkdev_put(struct block_device *bdev, fmode_t mode);
extern int __blkdev_reread_part(struct block_device *bdev);
extern int blkdev_reread_part(struct block_device *bdev);
#ifdef CONFIG_SYSFS
extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
@ -2701,8 +2699,6 @@ extern void make_bad_inode(struct inode *);
extern bool is_bad_inode(struct inode *);
#ifdef CONFIG_BLOCK
extern void check_disk_size_change(struct gendisk *disk,
struct block_device *bdev, bool verbose);
extern int revalidate_disk(struct gendisk *);
extern int check_disk_change(struct block_device *);
extern int __invalidate_device(struct block_device *, bool);

View File

@ -621,9 +621,10 @@ extern void blk_invalidate_devt(dev_t devt);
extern dev_t blk_lookup_devt(const char *name, int partno);
extern char *disk_name (struct gendisk *hd, int partno, char *buf);
int bdev_disk_changed(struct block_device *bdev, bool invalidate);
int blk_add_partitions(struct gendisk *disk, struct block_device *bdev);
int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev);
extern int disk_expand_part_tbl(struct gendisk *disk, int target);
extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
extern int invalidate_partitions(struct gendisk *disk, struct block_device *bdev);
extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
int partno, sector_t start,
sector_t len, int flags,