linux-stable/include/linux/genhd.h
Matteo Croce cf17994855 block: add disk sequence number
Associating uevents with block devices in userspace is difficult and racy:
the uevent netlink socket is lossy, and on slow and overloaded systems
has a very high latency.
Block devices do not have exclusive owners in userspace, any process can
set one up (e.g. loop devices). Moreover, device names can be reused
(e.g. loop0 can be reused again and again). A userspace process setting
up a block device and watching for its events cannot thus reliably tell
whether an event relates to the device it just set up or another earlier
instance with the same name.

Being able to set a UUID on a loop device would solve the race conditions.
But it does not allow to derive orderings from uevents: if you see a
uevent with a UUID that does not match the device you are waiting for,
you cannot tell whether it's because the right uevent has not arrived yet,
or it was already sent and you missed it. So you cannot tell whether you
should wait for it or not.

Associating a unique, monotonically increasing sequential number to the
lifetime of each block device, which can be retrieved with an ioctl
immediately upon setting it up, allows to solve the race conditions with
uevents, and also allows userspace processes to know whether they should
wait for the uevent they need or if it was dropped and thus they should
move on.

Additionally, increment the disk sequence number when the media change,
i.e. on DISK_EVENT_MEDIA_CHANGE event.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Tested-by: Luca Boccassi <bluca@debian.org>
Link: https://lore.kernel.org/r/20210712230530.29323-2-mcroce@linux.microsoft.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2021-08-02 13:37:28 -06:00

347 lines
9.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_GENHD_H
#define _LINUX_GENHD_H
/*
* genhd.h Copyright (C) 1992 Drew Eckhardt
* Generic hard disk header file by
* Drew Eckhardt
*
* <drew@colorado.edu>
*/
#include <linux/types.h>
#include <linux/kdev_t.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/percpu-refcount.h>
#include <linux/uuid.h>
#include <linux/blk_types.h>
#include <asm/local.h>
extern const struct device_type disk_type;
extern struct device_type part_type;
extern struct class block_class;
#define DISK_MAX_PARTS 256
#define DISK_NAME_LEN 32
#include <linux/major.h>
#include <linux/device.h>
#include <linux/smp.h>
#include <linux/string.h>
#include <linux/fs.h>
#include <linux/workqueue.h>
#include <linux/xarray.h>
#define PARTITION_META_INFO_VOLNAMELTH 64
/*
* Enough for the string representation of any kind of UUID plus NULL.
* EFI UUID is 36 characters. MSDOS UUID is 11 characters.
*/
#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1)
struct partition_meta_info {
char uuid[PARTITION_META_INFO_UUIDLTH];
u8 volname[PARTITION_META_INFO_VOLNAMELTH];
};
/**
* DOC: genhd capability flags
*
* ``GENHD_FL_REMOVABLE`` (0x0001): indicates that the block device
* gives access to removable media.
* When set, the device remains present even when media is not
* inserted.
* Must not be set for devices which are removed entirely when the
* media is removed.
*
* ``GENHD_FL_CD`` (0x0008): the block device is a CD-ROM-style
* device.
* Affects responses to the ``CDROM_GET_CAPABILITY`` ioctl.
*
* ``GENHD_FL_UP`` (0x0010): indicates that the block device is "up",
* with a similar meaning to network interfaces.
*
* ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include
* partition information in ``/proc/partitions`` or in the output of
* printk_all_partitions().
* Used for the null block device and some MMC devices.
*
* ``GENHD_FL_EXT_DEVT`` (0x0040): the driver supports extended
* dynamic ``dev_t``, i.e. it wants extended device numbers
* (``BLOCK_EXT_MAJOR``).
* This affects the maximum number of partitions.
*
* ``GENHD_FL_NATIVE_CAPACITY`` (0x0080): based on information in the
* partition table, the device's capacity has been extended to its
* native capacity; i.e. the device has hidden capacity used by one
* of the partitions (this is a flag used so that native capacity is
* only ever unlocked once).
*
* ``GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE`` (0x0100): event polling is
* blocked whenever a writer holds an exclusive lock.
*
* ``GENHD_FL_NO_PART_SCAN`` (0x0200): partition scanning is disabled.
* Used for loop devices in their default settings and some MMC
* devices.
*
* ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it
* doesn't produce events, doesn't appear in sysfs, and doesn't have
* an associated ``bdev``.
* Implies ``GENHD_FL_SUPPRESS_PARTITION_INFO`` and
* ``GENHD_FL_NO_PART_SCAN``.
* Used for multipath devices.
*/
#define GENHD_FL_REMOVABLE 0x0001
/* 2 is unused (used to be GENHD_FL_DRIVERFS) */
/* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */
#define GENHD_FL_CD 0x0008
#define GENHD_FL_UP 0x0010
#define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020
#define GENHD_FL_EXT_DEVT 0x0040
#define GENHD_FL_NATIVE_CAPACITY 0x0080
#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 0x0100
#define GENHD_FL_NO_PART_SCAN 0x0200
#define GENHD_FL_HIDDEN 0x0400
enum {
DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */
};
enum {
/* Poll even if events_poll_msecs is unset */
DISK_EVENT_FLAG_POLL = 1 << 0,
/* Forward events to udev */
DISK_EVENT_FLAG_UEVENT = 1 << 1,
};
struct disk_events;
struct badblocks;
struct blk_integrity {
const struct blk_integrity_profile *profile;
unsigned char flags;
unsigned char tuple_size;
unsigned char interval_exp;
unsigned char tag_size;
};
struct gendisk {
/* major, first_minor and minors are input parameters only,
* don't use directly. Use disk_devt() and disk_max_parts().
*/
int major; /* major number of driver */
int first_minor;
int minors; /* maximum number of minors, =1 for
* disks that can't be partitioned. */
char disk_name[DISK_NAME_LEN]; /* name of major driver */
unsigned short events; /* supported events */
unsigned short event_flags; /* flags related to event processing */
struct xarray part_tbl;
struct block_device *part0;
const struct block_device_operations *fops;
struct request_queue *queue;
void *private_data;
int flags;
unsigned long state;
#define GD_NEED_PART_SCAN 0
#define GD_READ_ONLY 1
#define GD_QUEUE_REF 2
struct mutex open_mutex; /* open/close mutex */
unsigned open_partitions; /* number of open partitions */
struct kobject *slave_dir;
struct timer_rand_state *random;
atomic_t sync_io; /* RAID */
struct disk_events *ev;
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct kobject integrity_kobj;
#endif /* CONFIG_BLK_DEV_INTEGRITY */
#if IS_ENABLED(CONFIG_CDROM)
struct cdrom_device_info *cdi;
#endif
int node_id;
struct badblocks *bb;
struct lockdep_map lockdep_map;
u64 diskseq;
};
/*
* The gendisk is refcounted by the part0 block_device, and the bd_device
* therein is also used for device model presentation in sysfs.
*/
#define dev_to_disk(device) \
(dev_to_bdev(device)->bd_disk)
#define disk_to_dev(disk) \
(&((disk)->part0->bd_device))
#if IS_REACHABLE(CONFIG_CDROM)
#define disk_to_cdi(disk) ((disk)->cdi)
#else
#define disk_to_cdi(disk) NULL
#endif
static inline int disk_max_parts(struct gendisk *disk)
{
if (disk->flags & GENHD_FL_EXT_DEVT)
return DISK_MAX_PARTS;
return disk->minors;
}
static inline bool disk_part_scan_enabled(struct gendisk *disk)
{
return disk_max_parts(disk) > 1 &&
!(disk->flags & GENHD_FL_NO_PART_SCAN);
}
static inline dev_t disk_devt(struct gendisk *disk)
{
return MKDEV(disk->major, disk->first_minor);
}
void disk_uevent(struct gendisk *disk, enum kobject_action action);
/* block/genhd.c */
extern void device_add_disk(struct device *parent, struct gendisk *disk,
const struct attribute_group **groups);
static inline void add_disk(struct gendisk *disk)
{
device_add_disk(NULL, disk, NULL);
}
extern void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk);
static inline void add_disk_no_queue_reg(struct gendisk *disk)
{
device_add_disk_no_queue_reg(NULL, disk);
}
extern void del_gendisk(struct gendisk *gp);
void set_disk_ro(struct gendisk *disk, bool read_only);
static inline int get_disk_ro(struct gendisk *disk)
{
return disk->part0->bd_read_only ||
test_bit(GD_READ_ONLY, &disk->state);
}
extern void disk_block_events(struct gendisk *disk);
extern void disk_unblock_events(struct gendisk *disk);
extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
/* drivers/char/random.c */
extern void add_disk_randomness(struct gendisk *disk) __latent_entropy;
extern void rand_initialize_disk(struct gendisk *disk);
static inline sector_t get_start_sect(struct block_device *bdev)
{
return bdev->bd_start_sect;
}
static inline sector_t bdev_nr_sectors(struct block_device *bdev)
{
return i_size_read(bdev->bd_inode) >> 9;
}
static inline sector_t get_capacity(struct gendisk *disk)
{
return bdev_nr_sectors(disk->part0);
}
int bdev_disk_changed(struct gendisk *disk, bool invalidate);
void blk_drop_partitions(struct gendisk *disk);
extern struct gendisk *__alloc_disk_node(int minors, int node_id);
extern void put_disk(struct gendisk *disk);
#define alloc_disk_node(minors, node_id) \
({ \
static struct lock_class_key __key; \
const char *__name; \
struct gendisk *__disk; \
\
__name = "(gendisk_completion)"#minors"("#node_id")"; \
\
__disk = __alloc_disk_node(minors, node_id); \
\
if (__disk) \
lockdep_init_map(&__disk->lockdep_map, __name, &__key, 0); \
\
__disk; \
})
#define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE)
/**
* blk_alloc_disk - allocate a gendisk structure
* @node_id: numa node to allocate on
*
* Allocate and pre-initialize a gendisk structure for use with BIO based
* drivers.
*
* Context: can sleep
*/
#define blk_alloc_disk(node_id) \
({ \
struct gendisk *__disk = __blk_alloc_disk(node_id); \
static struct lock_class_key __key; \
\
if (__disk) \
lockdep_init_map(&__disk->lockdep_map, \
"(bio completion)", &__key, 0); \
__disk; \
})
struct gendisk *__blk_alloc_disk(int node);
void blk_cleanup_disk(struct gendisk *disk);
int __register_blkdev(unsigned int major, const char *name,
void (*probe)(dev_t devt));
#define register_blkdev(major, name) \
__register_blkdev(major, name, NULL)
void unregister_blkdev(unsigned int major, const char *name);
bool bdev_check_media_change(struct block_device *bdev);
int __invalidate_device(struct block_device *bdev, bool kill_dirty);
void set_capacity(struct gendisk *disk, sector_t size);
/* for drivers/char/raw.c: */
int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
#ifdef CONFIG_SYSFS
int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
#else
static inline int bd_link_disk_holder(struct block_device *bdev,
struct gendisk *disk)
{
return 0;
}
static inline void bd_unlink_disk_holder(struct block_device *bdev,
struct gendisk *disk)
{
}
#endif /* CONFIG_SYSFS */
dev_t part_devt(struct gendisk *disk, u8 partno);
void inc_diskseq(struct gendisk *disk);
dev_t blk_lookup_devt(const char *name, int partno);
void blk_request_module(dev_t devt);
#ifdef CONFIG_BLOCK
void printk_all_partitions(void);
#else /* CONFIG_BLOCK */
static inline void printk_all_partitions(void)
{
}
#endif /* CONFIG_BLOCK */
#endif /* _LINUX_GENHD_H */