block: move bd_mutex to struct gendisk

Replace the per-block device bd_mutex with a per-gendisk open_mutex,
thus simplifying locking wherever we deal with partitions.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
Link: https://lore.kernel.org/r/20210525061301.2242282-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Christoph Hellwig 2021-05-25 08:12:56 +02:00 committed by Jens Axboe
parent 210a6d756f
commit a8698707a1
15 changed files with 68 additions and 76 deletions

View file

@ -480,7 +480,7 @@ prototypes::
locking rules:
======================= ===================
ops bd_mutex
ops open_mutex
======================= ===================
open: yes
release: yes

View file

@ -591,10 +591,10 @@ void del_gendisk(struct gendisk *disk)
blk_integrity_del(disk);
disk_del_events(disk);
mutex_lock(&disk->part0->bd_mutex);
mutex_lock(&disk->open_mutex);
disk->flags &= ~GENHD_FL_UP;
blk_drop_partitions(disk);
mutex_unlock(&disk->part0->bd_mutex);
mutex_unlock(&disk->open_mutex);
fsync_bdev(disk->part0);
__invalidate_device(disk->part0, true);
@ -1273,6 +1273,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
goto out_free_disk;
disk->node_id = node_id;
mutex_init(&disk->open_mutex);
xa_init(&disk->part_tbl);
if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
goto out_destroy_part_tbl;
@ -1525,7 +1526,7 @@ void disk_unblock_events(struct gendisk *disk)
* doesn't clear the events from @disk->ev.
*
* CONTEXT:
* If @mask is non-zero must be called with bdev->bd_mutex held.
* If @mask is non-zero must be called with disk->open_mutex held.
*/
void disk_flush_events(struct gendisk *disk, unsigned int mask)
{

View file

@ -283,7 +283,7 @@ struct device_type part_type = {
};
/*
* Must be called either with bd_mutex held, before a disk can be opened or
* Must be called either with open_mutex held, before a disk can be opened or
* after all disk users are gone.
*/
static void delete_partition(struct block_device *part)
@ -312,7 +312,7 @@ static ssize_t whole_disk_show(struct device *dev,
static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
/*
* Must be called either with bd_mutex held, before a disk can be opened or
* Must be called either with open_mutex held, before a disk can be opened or
* after all disk users are gone.
*/
static struct block_device *add_partition(struct gendisk *disk, int partno,
@ -453,15 +453,15 @@ int bdev_add_partition(struct block_device *bdev, int partno,
{
struct block_device *part;
mutex_lock(&bdev->bd_mutex);
mutex_lock(&bdev->bd_disk->open_mutex);
if (partition_overlaps(bdev->bd_disk, start, length, -1)) {
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
return -EBUSY;
}
part = add_partition(bdev->bd_disk, partno, start, length,
ADDPART_FLAG_NONE, NULL);
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
return PTR_ERR_OR_ZERO(part);
}
@ -474,8 +474,7 @@ int bdev_del_partition(struct block_device *bdev, int partno)
if (!part)
return -ENXIO;
mutex_lock(&part->bd_mutex);
mutex_lock_nested(&bdev->bd_mutex, 1);
mutex_lock(&bdev->bd_disk->open_mutex);
ret = -EBUSY;
if (part->bd_openers)
@ -484,8 +483,7 @@ int bdev_del_partition(struct block_device *bdev, int partno)
delete_partition(part);
ret = 0;
out_unlock:
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&part->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
bdput(part);
return ret;
}
@ -500,8 +498,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
if (!part)
return -ENXIO;
mutex_lock(&part->bd_mutex);
mutex_lock_nested(&bdev->bd_mutex, 1);
mutex_lock(&bdev->bd_disk->open_mutex);
ret = -EINVAL;
if (start != part->bd_start_sect)
goto out_unlock;
@ -514,8 +511,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
ret = 0;
out_unlock:
mutex_unlock(&part->bd_mutex);
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
bdput(part);
return ret;
}
@ -541,7 +537,7 @@ void blk_drop_partitions(struct gendisk *disk)
struct block_device *part;
unsigned long idx;
lockdep_assert_held(&disk->part0->bd_mutex);
lockdep_assert_held(&disk->open_mutex);
xa_for_each_start(&disk->part_tbl, idx, part, 1) {
if (!bdgrab(part))

View file

@ -652,9 +652,9 @@ static void loop_reread_partitions(struct loop_device *lo,
{
int rc;
mutex_lock(&bdev->bd_mutex);
mutex_lock(&bdev->bd_disk->open_mutex);
rc = bdev_disk_changed(bdev, false);
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
if (rc)
pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
__func__, lo->lo_number, lo->lo_file_name, rc);
@ -747,7 +747,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
mutex_unlock(&lo->lo_mutex);
/*
* We must drop file reference outside of lo_mutex as dropping
* the file ref can take bd_mutex which creates circular locking
* the file ref can take open_mutex which creates circular locking
* dependency.
*/
fput(old_file);
@ -1260,7 +1260,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
mutex_unlock(&lo->lo_mutex);
if (partscan) {
/*
* bd_mutex has been held already in release path, so don't
* open_mutex has been held already in release path, so don't
* acquire it if this function is called in such case.
*
* If the reread partition isn't from release path, lo_refcnt
@ -1268,10 +1268,10 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
* current holder is released.
*/
if (!release)
mutex_lock(&bdev->bd_mutex);
mutex_lock(&bdev->bd_disk->open_mutex);
err = bdev_disk_changed(bdev, false);
if (!release)
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
__func__, lo_number, err);
@ -1298,7 +1298,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
/*
* Need not hold lo_mutex to fput backing file. Calling fput holding
* lo_mutex triggers a circular lock dependency possibility warning as
* fput can take bd_mutex which is usually taken before lo_mutex.
* fput can take open_mutex which is usually taken before lo_mutex.
*/
if (filp)
fput(filp);

View file

@ -2163,7 +2163,7 @@ static void blkfront_closing(struct blkfront_info *info)
return;
}
mutex_lock(&bdev->bd_mutex);
mutex_lock(&bdev->bd_disk->open_mutex);
if (bdev->bd_openers) {
xenbus_dev_error(xbdev, -EBUSY,
@ -2174,7 +2174,7 @@ static void blkfront_closing(struct blkfront_info *info)
xenbus_frontend_closed(xbdev);
}
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
bdput(bdev);
}
@ -2531,7 +2531,7 @@ static int blkfront_remove(struct xenbus_device *xbdev)
* isn't closed yet, we let release take care of it.
*/
mutex_lock(&bdev->bd_mutex);
mutex_lock(&disk->open_mutex);
info = disk->private_data;
dev_warn(disk_to_dev(disk),
@ -2546,7 +2546,7 @@ static int blkfront_remove(struct xenbus_device *xbdev)
mutex_unlock(&blkfront_mutex);
}
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&disk->open_mutex);
bdput(bdev);
return 0;

View file

@ -1781,24 +1781,24 @@ static ssize_t reset_store(struct device *dev,
zram = dev_to_zram(dev);
bdev = zram->disk->part0;
mutex_lock(&bdev->bd_mutex);
mutex_lock(&bdev->bd_disk->open_mutex);
/* Do not reset an active device or claimed device */
if (bdev->bd_openers || zram->claim) {
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
return -EBUSY;
}
/* From now on, anyone can't open /dev/zram[0-9] */
zram->claim = true;
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
/* Make sure all the pending I/O are finished */
fsync_bdev(bdev);
zram_reset_device(zram);
mutex_lock(&bdev->bd_mutex);
mutex_lock(&bdev->bd_disk->open_mutex);
zram->claim = false;
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
return len;
}
@ -1808,7 +1808,7 @@ static int zram_open(struct block_device *bdev, fmode_t mode)
int ret = 0;
struct zram *zram;
WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex));
zram = bdev->bd_disk->private_data;
/* zram was claimed to reset so open request fails */
@ -1972,14 +1972,14 @@ static int zram_remove(struct zram *zram)
{
struct block_device *bdev = zram->disk->part0;
mutex_lock(&bdev->bd_mutex);
mutex_lock(&bdev->bd_disk->open_mutex);
if (bdev->bd_openers || zram->claim) {
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
return -EBUSY;
}
zram->claim = true;
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
zram_debugfs_unregister(zram);

View file

@ -112,7 +112,7 @@ struct zram {
/*
* zram is claimed so open request will be failed
*/
bool claim; /* Protected by bdev->bd_mutex */
bool claim; /* Protected by disk->open_mutex */
struct file *backing_dev;
#ifdef CONFIG_ZRAM_WRITEBACK
spinlock_t wb_limit_lock;

View file

@ -395,10 +395,10 @@ struct mddev {
* that we are never stopping an array while it is open.
* 'reconfig_mutex' protects all other reconfiguration.
* These locks are separate due to conflicting interactions
* with bdev->bd_mutex.
* with disk->open_mutex.
* Lock ordering is:
* reconfig_mutex -> bd_mutex
* bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
* reconfig_mutex -> disk->open_mutex
* disk->open_mutex -> open_mutex: e.g. __blkdev_get -> md_open
*/
struct mutex open_mutex;
struct mutex reconfig_mutex;

View file

@ -109,9 +109,9 @@ int dasd_scan_partitions(struct dasd_block *block)
return -ENODEV;
}
mutex_lock(&bdev->bd_mutex);
mutex_lock(&block->gdp->open_mutex);
rc = bdev_disk_changed(bdev, false);
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&block->gdp->open_mutex);
if (rc)
DBF_DEV_EVENT(DBF_ERR, block->base,
"scan partitions error, rc %d", rc);
@ -145,9 +145,9 @@ void dasd_destroy_partitions(struct dasd_block *block)
bdev = block->bdev;
block->bdev = NULL;
mutex_lock(&bdev->bd_mutex);
mutex_lock(&bdev->bd_disk->open_mutex);
bdev_disk_changed(bdev, true);
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
/* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */
blkdev_put(bdev, FMODE_READ);

View file

@ -1400,7 +1400,7 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
* In the latter case @inode and @filp carry an abridged amount
* of information as noted above.
*
* Locking: called with bdev->bd_mutex held.
* Locking: called with bdev->bd_disk->open_mutex held.
**/
static int sd_open(struct block_device *bdev, fmode_t mode)
{
@ -1476,7 +1476,7 @@ static int sd_open(struct block_device *bdev, fmode_t mode)
* Note: may block (uninterruptible) if error recovery is underway
* on this disk.
*
* Locking: called with bdev->bd_mutex held.
* Locking: called with bdev->bd_disk->open_mutex held.
**/
static void sd_release(struct gendisk *disk, fmode_t mode)
{

View file

@ -895,7 +895,6 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
mapping_set_gfp_mask(&inode->i_data, GFP_USER);
bdev = I_BDEV(inode);
mutex_init(&bdev->bd_mutex);
mutex_init(&bdev->bd_fsfreeze_mutex);
spin_lock_init(&bdev->bd_size_lock);
bdev->bd_disk = disk;
@ -1154,7 +1153,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
struct bd_holder_disk *holder;
int ret = 0;
mutex_lock(&bdev->bd_mutex);
mutex_lock(&bdev->bd_disk->open_mutex);
WARN_ON_ONCE(!bdev->bd_holder);
@ -1199,7 +1198,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
out_free:
kfree(holder);
out_unlock:
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(bd_link_disk_holder);
@ -1218,7 +1217,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
struct bd_holder_disk *holder;
mutex_lock(&bdev->bd_mutex);
mutex_lock(&bdev->bd_disk->open_mutex);
holder = bd_find_holder_disk(bdev, disk);
@ -1230,7 +1229,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
kfree(holder);
}
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
}
EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
#endif
@ -1242,7 +1241,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
struct gendisk *disk = bdev->bd_disk;
int ret = 0;
lockdep_assert_held(&bdev->bd_mutex);
lockdep_assert_held(&disk->open_mutex);
if (!(disk->flags & GENHD_FL_UP))
return -ENXIO;
@ -1327,14 +1326,10 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
goto done;
whole = bdgrab(disk->part0);
mutex_lock_nested(&whole->bd_mutex, 1);
ret = blkdev_get_whole(whole, mode);
if (ret) {
mutex_unlock(&whole->bd_mutex);
if (ret)
goto out_put_whole;
}
whole->bd_part_count++;
mutex_unlock(&whole->bd_mutex);
ret = -ENXIO;
if (!bdev_nr_sectors(part))
@ -1437,7 +1432,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
disk_block_events(disk);
mutex_lock(&bdev->bd_mutex);
mutex_lock(&disk->open_mutex);
ret = -ENXIO;
if (!(disk->flags & GENHD_FL_UP))
goto abort_claiming;
@ -1463,7 +1458,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
unblock_events = false;
}
}
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&disk->open_mutex);
if (unblock_events)
disk_unblock_events(disk);
@ -1472,7 +1467,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
abort_claiming:
if (mode & FMODE_EXCL)
bd_abort_claiming(bdev, holder);
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&disk->open_mutex);
disk_unblock_events(disk);
put_blkdev:
blkdev_put_no_open(bdev);
@ -1552,7 +1547,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
struct gendisk *disk = bdev->bd_disk;
struct block_device *victim = NULL;
mutex_lock_nested(&bdev->bd_mutex, for_part);
if (for_part)
bdev->bd_part_count--;
@ -1567,7 +1561,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
if (!bdev_is_partition(bdev) && disk->fops->release)
disk->fops->release(disk, mode);
mutex_unlock(&bdev->bd_mutex);
if (victim) {
__blkdev_put(victim, mode, 1);
bdput(victim);
@ -1588,15 +1581,14 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
if (bdev->bd_openers == 1)
sync_blockdev(bdev);
mutex_lock(&bdev->bd_mutex);
mutex_lock(&disk->open_mutex);
if (mode & FMODE_EXCL) {
struct block_device *whole = bdev_whole(bdev);
bool bdev_free;
/*
* Release a claim on the device. The holder fields
* are protected with bdev_lock. bd_mutex is to
* are protected with bdev_lock. open_mutex is to
* synchronize disk_holder unlinking.
*/
spin_lock(&bdev_lock);
@ -1627,9 +1619,10 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
* from userland - e.g. eject(1).
*/
disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
mutex_unlock(&bdev->bd_mutex);
__blkdev_put(bdev, mode, 0);
mutex_unlock(&disk->open_mutex);
blkdev_put_no_open(bdev);
}
EXPORT_SYMBOL(blkdev_put);
@ -1936,10 +1929,10 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
old_inode = inode;
bdev = I_BDEV(inode);
mutex_lock(&bdev->bd_mutex);
mutex_lock(&bdev->bd_disk->open_mutex);
if (bdev->bd_openers)
func(bdev, arg);
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdev->bd_disk->open_mutex);
spin_lock(&blockdev_superblock->s_inode_list_lock);
}

View file

@ -1247,7 +1247,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
lockdep_assert_held(&uuid_mutex);
/*
* The device_list_mutex cannot be taken here in case opening the
* underlying device takes further locks like bd_mutex.
* underlying device takes further locks like open_mutex.
*
* We also don't need the lock here as this is called during mount and
* exclusion is provided by uuid_mutex

View file

@ -1277,9 +1277,9 @@ int get_tree_bdev(struct fs_context *fc,
}
/*
* s_umount nests inside bd_mutex during
* s_umount nests inside open_mutex during
* __invalidate_device(). blkdev_put() acquires
* bd_mutex and can't be called under s_umount. Drop
* open_mutex and can't be called under s_umount. Drop
* s_umount temporarily. This is safe as we're
* holding an active reference.
*/
@ -1352,9 +1352,9 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
}
/*
* s_umount nests inside bd_mutex during
* s_umount nests inside open_mutex during
* __invalidate_device(). blkdev_put() acquires
* bd_mutex and can't be called under s_umount. Drop
* open_mutex and can't be called under s_umount. Drop
* s_umount temporarily. This is safe as we're
* holding an active reference.
*/

View file

@ -29,7 +29,6 @@ struct block_device {
int bd_openers;
struct inode * bd_inode; /* will die */
struct super_block * bd_super;
struct mutex bd_mutex; /* open/close mutex */
void * bd_claiming;
struct device bd_device;
void * bd_holder;

View file

@ -154,6 +154,9 @@ struct gendisk {
#define GD_NEED_PART_SCAN 0
#define GD_READ_ONLY 1
#define GD_QUEUE_REF 2
struct mutex open_mutex; /* open/close mutex */
struct kobject *slave_dir;
struct timer_rand_state *random;