linux-stable/block/ioctl.c

828 lines
21 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
#include <linux/capability.h>
#include <linux/compat.h>
#include <linux/blkdev.h>
#include <linux/export.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/gfp.h>
#include <linux/blkpg.h>
#include <linux/hdreg.h>
#include <linux/backing-dev.h>
#include <linux/fs.h>
#include <linux/blktrace_api.h>
#include <linux/pr.h>
#include <linux/uaccess.h>
static int blkpg_do_ioctl(struct block_device *bdev,
struct blkpg_partition __user *upart, int op)
{
struct block_device *bdevp;
struct gendisk *disk;
struct hd_struct *part, *lpart;
struct blkpg_partition p;
struct disk_part_iter piter;
long long start, length;
int partno;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
return -EFAULT;
disk = bdev->bd_disk;
if (bdev != bdev->bd_contains)
return -EINVAL;
partno = p.pno;
if (partno <= 0)
return -EINVAL;
switch (op) {
case BLKPG_ADD_PARTITION:
start = p.start >> 9;
length = p.length >> 9;
/* check for fit in a hd_struct */
if (sizeof(sector_t) == sizeof(long) &&
sizeof(long long) > sizeof(long)) {
long pstart = start, plength = length;
if (pstart != start || plength != length
|| pstart < 0 || plength < 0 || partno > 65535)
return -EINVAL;
}
/* check if partition is aligned to blocksize */
if (p.start & (bdev_logical_block_size(bdev) - 1))
return -EINVAL;
mutex_lock(&bdev->bd_mutex);
/* overlap? */
disk_part_iter_init(&piter, disk,
DISK_PITER_INCL_EMPTY);
while ((part = disk_part_iter_next(&piter))) {
if (!(start + length <= part->start_sect ||
start >= part->start_sect + part->nr_sects)) {
disk_part_iter_exit(&piter);
mutex_unlock(&bdev->bd_mutex);
return -EBUSY;
}
}
disk_part_iter_exit(&piter);
/* all seems OK */
part = add_partition(disk, partno, start, length,
ADDPART_FLAG_NONE, NULL);
mutex_unlock(&bdev->bd_mutex);
return PTR_ERR_OR_ZERO(part);
case BLKPG_DEL_PARTITION:
part = disk_get_part(disk, partno);
if (!part)
return -ENXIO;
bdevp = bdget(part_devt(part));
disk_put_part(part);
if (!bdevp)
return -ENOMEM;
mutex_lock(&bdevp->bd_mutex);
if (bdevp->bd_openers) {
mutex_unlock(&bdevp->bd_mutex);
bdput(bdevp);
return -EBUSY;
}
/* all seems OK */
fsync_bdev(bdevp);
invalidate_bdev(bdevp);
mutex_lock_nested(&bdev->bd_mutex, 1);
delete_partition(disk, partno);
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdevp->bd_mutex);
bdput(bdevp);
return 0;
case BLKPG_RESIZE_PARTITION:
start = p.start >> 9;
/* new length of partition in bytes */
length = p.length >> 9;
/* check for fit in a hd_struct */
if (sizeof(sector_t) == sizeof(long) &&
sizeof(long long) > sizeof(long)) {
long pstart = start, plength = length;
if (pstart != start || plength != length
|| pstart < 0 || plength < 0)
return -EINVAL;
}
part = disk_get_part(disk, partno);
if (!part)
return -ENXIO;
bdevp = bdget(part_devt(part));
if (!bdevp) {
disk_put_part(part);
return -ENOMEM;
}
mutex_lock(&bdevp->bd_mutex);
mutex_lock_nested(&bdev->bd_mutex, 1);
if (start != part->start_sect) {
mutex_unlock(&bdevp->bd_mutex);
mutex_unlock(&bdev->bd_mutex);
bdput(bdevp);
disk_put_part(part);
return -EINVAL;
}
/* overlap? */
disk_part_iter_init(&piter, disk,
DISK_PITER_INCL_EMPTY);
while ((lpart = disk_part_iter_next(&piter))) {
if (lpart->partno != partno &&
!(start + length <= lpart->start_sect ||
start >= lpart->start_sect + lpart->nr_sects)
) {
disk_part_iter_exit(&piter);
mutex_unlock(&bdevp->bd_mutex);
mutex_unlock(&bdev->bd_mutex);
bdput(bdevp);
disk_put_part(part);
return -EBUSY;
}
}
disk_part_iter_exit(&piter);
part_nr_sects_write(part, (sector_t)length);
i_size_write(bdevp->bd_inode, p.length);
mutex_unlock(&bdevp->bd_mutex);
mutex_unlock(&bdev->bd_mutex);
bdput(bdevp);
disk_put_part(part);
return 0;
default:
return -EINVAL;
}
}
static int blkpg_ioctl(struct block_device *bdev,
struct blkpg_ioctl_arg __user *arg)
{
struct blkpg_partition __user *udata;
int op;
if (get_user(op, &arg->op) || get_user(udata, &arg->data))
return -EFAULT;
return blkpg_do_ioctl(bdev, udata, op);
}
#ifdef CONFIG_COMPAT
struct compat_blkpg_ioctl_arg {
compat_int_t op;
compat_int_t flags;
compat_int_t datalen;
compat_caddr_t data;
};
static int compat_blkpg_ioctl(struct block_device *bdev,
struct compat_blkpg_ioctl_arg __user *arg)
{
compat_caddr_t udata;
int op;
if (get_user(op, &arg->op) || get_user(udata, &arg->data))
return -EFAULT;
return blkpg_do_ioctl(bdev, compat_ptr(udata), op);
}
#endif
static int blkdev_reread_part(struct block_device *bdev)
{
int ret;
if (!disk_part_scan_enabled(bdev->bd_disk) || bdev != bdev->bd_contains)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
mutex_lock(&bdev->bd_mutex);
ret = bdev_disk_changed(bdev, false);
mutex_unlock(&bdev->bd_mutex);
return ret;
}
static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
unsigned long arg, unsigned long flags)
{
uint64_t range[2];
uint64_t start, len;
struct request_queue *q = bdev_get_queue(bdev);
struct address_space *mapping = bdev->bd_inode->i_mapping;
if (!(mode & FMODE_WRITE))
return -EBADF;
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
if (copy_from_user(range, (void __user *)arg, sizeof(range)))
return -EFAULT;
start = range[0];
len = range[1];
if (start & 511)
return -EINVAL;
if (len & 511)
return -EINVAL;
if (start + len > i_size_read(bdev->bd_inode))
return -EINVAL;
truncate_inode_pages_range(mapping, start, start + len - 1);
return blkdev_issue_discard(bdev, start >> 9, len >> 9,
GFP_KERNEL, flags);
}
static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
unsigned long arg)
{
uint64_t range[2];
block: invalidate the page cache when issuing BLKZEROOUT Patch series "fallocate for block devices", v11. This is a patchset to fix page cache coherency with BLKZEROOUT and implement fallocate for block devices. The first patch is a fix to the existing BLKZEROOUT ioctl to invalidate the page cache if the zeroing command to the underlying device succeeds. Without this patch we still have the pagecache coherence bug that's been in the kernel forever. The second patch changes the internal block device functions to reject attempts to discard or zeroout that are not aligned to the logical block size. Previously, we only checked that the start/len parameters were 512-byte aligned, which caused kernel BUG_ONs for unaligned IOs to 4k-LBA devices. The third patch creates an fallocate handler for block devices, wires up the FALLOC_FL_PUNCH_HOLE flag to zeroing-discard, and connects FALLOC_FL_ZERO_RANGE to write-same so that we can have a consistent fallocate interface between files and block devices. It also allows the combination of PUNCH_HOLE and NO_HIDE_STALE to invoke non-zeroing discard. Test cases for the new block device fallocate are now in xfstests as generic/349-351. This patch (of 3): Invalidate the page cache (as a regular O_DIRECT write would do) to avoid returning stale cache contents at a later time. Link: http://lkml.kernel.org/r/147518378313.22791.16649519283678515021.stgit@birch.djwong.org Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com> Reviewed-by: Hannes Reinecke <hare@suse.com> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Mike Snitzer <snitzer@redhat.com> Cc: Brian Foster <bfoster@redhat.com> Cc: Jens Axboe <axboe@kernel.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-11 20:51:05 +00:00
struct address_space *mapping;
uint64_t start, end, len;
if (!(mode & FMODE_WRITE))
return -EBADF;
if (copy_from_user(range, (void __user *)arg, sizeof(range)))
return -EFAULT;
start = range[0];
len = range[1];
block: invalidate the page cache when issuing BLKZEROOUT Patch series "fallocate for block devices", v11. This is a patchset to fix page cache coherency with BLKZEROOUT and implement fallocate for block devices. The first patch is a fix to the existing BLKZEROOUT ioctl to invalidate the page cache if the zeroing command to the underlying device succeeds. Without this patch we still have the pagecache coherence bug that's been in the kernel forever. The second patch changes the internal block device functions to reject attempts to discard or zeroout that are not aligned to the logical block size. Previously, we only checked that the start/len parameters were 512-byte aligned, which caused kernel BUG_ONs for unaligned IOs to 4k-LBA devices. The third patch creates an fallocate handler for block devices, wires up the FALLOC_FL_PUNCH_HOLE flag to zeroing-discard, and connects FALLOC_FL_ZERO_RANGE to write-same so that we can have a consistent fallocate interface between files and block devices. It also allows the combination of PUNCH_HOLE and NO_HIDE_STALE to invoke non-zeroing discard. Test cases for the new block device fallocate are now in xfstests as generic/349-351. This patch (of 3): Invalidate the page cache (as a regular O_DIRECT write would do) to avoid returning stale cache contents at a later time. Link: http://lkml.kernel.org/r/147518378313.22791.16649519283678515021.stgit@birch.djwong.org Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com> Reviewed-by: Hannes Reinecke <hare@suse.com> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Mike Snitzer <snitzer@redhat.com> Cc: Brian Foster <bfoster@redhat.com> Cc: Jens Axboe <axboe@kernel.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-11 20:51:05 +00:00
end = start + len - 1;
if (start & 511)
return -EINVAL;
if (len & 511)
return -EINVAL;
block: invalidate the page cache when issuing BLKZEROOUT Patch series "fallocate for block devices", v11. This is a patchset to fix page cache coherency with BLKZEROOUT and implement fallocate for block devices. The first patch is a fix to the existing BLKZEROOUT ioctl to invalidate the page cache if the zeroing command to the underlying device succeeds. Without this patch we still have the pagecache coherence bug that's been in the kernel forever. The second patch changes the internal block device functions to reject attempts to discard or zeroout that are not aligned to the logical block size. Previously, we only checked that the start/len parameters were 512-byte aligned, which caused kernel BUG_ONs for unaligned IOs to 4k-LBA devices. The third patch creates an fallocate handler for block devices, wires up the FALLOC_FL_PUNCH_HOLE flag to zeroing-discard, and connects FALLOC_FL_ZERO_RANGE to write-same so that we can have a consistent fallocate interface between files and block devices. It also allows the combination of PUNCH_HOLE and NO_HIDE_STALE to invoke non-zeroing discard. Test cases for the new block device fallocate are now in xfstests as generic/349-351. This patch (of 3): Invalidate the page cache (as a regular O_DIRECT write would do) to avoid returning stale cache contents at a later time. Link: http://lkml.kernel.org/r/147518378313.22791.16649519283678515021.stgit@birch.djwong.org Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com> Reviewed-by: Hannes Reinecke <hare@suse.com> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Mike Snitzer <snitzer@redhat.com> Cc: Brian Foster <bfoster@redhat.com> Cc: Jens Axboe <axboe@kernel.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-11 20:51:05 +00:00
if (end >= (uint64_t)i_size_read(bdev->bd_inode))
return -EINVAL;
if (end < start)
return -EINVAL;
block: invalidate the page cache when issuing BLKZEROOUT Patch series "fallocate for block devices", v11. This is a patchset to fix page cache coherency with BLKZEROOUT and implement fallocate for block devices. The first patch is a fix to the existing BLKZEROOUT ioctl to invalidate the page cache if the zeroing command to the underlying device succeeds. Without this patch we still have the pagecache coherence bug that's been in the kernel forever. The second patch changes the internal block device functions to reject attempts to discard or zeroout that are not aligned to the logical block size. Previously, we only checked that the start/len parameters were 512-byte aligned, which caused kernel BUG_ONs for unaligned IOs to 4k-LBA devices. The third patch creates an fallocate handler for block devices, wires up the FALLOC_FL_PUNCH_HOLE flag to zeroing-discard, and connects FALLOC_FL_ZERO_RANGE to write-same so that we can have a consistent fallocate interface between files and block devices. It also allows the combination of PUNCH_HOLE and NO_HIDE_STALE to invoke non-zeroing discard. Test cases for the new block device fallocate are now in xfstests as generic/349-351. This patch (of 3): Invalidate the page cache (as a regular O_DIRECT write would do) to avoid returning stale cache contents at a later time. Link: http://lkml.kernel.org/r/147518378313.22791.16649519283678515021.stgit@birch.djwong.org Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com> Reviewed-by: Hannes Reinecke <hare@suse.com> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Mike Snitzer <snitzer@redhat.com> Cc: Brian Foster <bfoster@redhat.com> Cc: Jens Axboe <axboe@kernel.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-11 20:51:05 +00:00
/* Invalidate the page cache, including dirty pages */
mapping = bdev->bd_inode->i_mapping;
truncate_inode_pages_range(mapping, start, end);
return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
BLKDEV_ZERO_NOUNMAP);
}
static int put_ushort(unsigned long arg, unsigned short val)
{
return put_user(val, (unsigned short __user *)arg);
}
static int put_int(unsigned long arg, int val)
{
return put_user(val, (int __user *)arg);
}
static int put_uint(unsigned long arg, unsigned int val)
{
return put_user(val, (unsigned int __user *)arg);
}
static int put_long(unsigned long arg, long val)
{
return put_user(val, (long __user *)arg);
}
static int put_ulong(unsigned long arg, unsigned long val)
{
return put_user(val, (unsigned long __user *)arg);
}
static int put_u64(unsigned long arg, u64 val)
{
return put_user(val, (u64 __user *)arg);
}
#ifdef CONFIG_COMPAT
static int compat_put_ushort(unsigned long arg, unsigned short val)
{
return put_user(val, (unsigned short __user *)compat_ptr(arg));
}
static int compat_put_int(unsigned long arg, int val)
{
return put_user(val, (compat_int_t __user *)compat_ptr(arg));
}
static int compat_put_uint(unsigned long arg, unsigned int val)
{
return put_user(val, (compat_uint_t __user *)compat_ptr(arg));
}
static int compat_put_long(unsigned long arg, long val)
{
return put_user(val, (compat_long_t __user *)compat_ptr(arg));
}
static int compat_put_ulong(unsigned long arg, compat_ulong_t val)
{
return put_user(val, (compat_ulong_t __user *)compat_ptr(arg));
}
static int compat_put_u64(unsigned long arg, u64 val)
{
return put_user(val, (compat_u64 __user *)compat_ptr(arg));
}
#endif
int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
unsigned cmd, unsigned long arg)
{
struct gendisk *disk = bdev->bd_disk;
if (disk->fops->ioctl)
return disk->fops->ioctl(bdev, mode, cmd, arg);
return -ENOTTY;
}
/*
* For the record: _GPL here is only because somebody decided to slap it
* on the previous export. Sheer idiocy, since it wasn't copyrightable
* at all and could be open-coded without any exports by anybody who cares.
*/
EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
#ifdef CONFIG_COMPAT
/*
* This is the equivalent of compat_ptr_ioctl(), to be used by block
* drivers that implement only commands that are completely compatible
* between 32-bit and 64-bit user space
*/
int blkdev_compat_ptr_ioctl(struct block_device *bdev, fmode_t mode,
unsigned cmd, unsigned long arg)
{
struct gendisk *disk = bdev->bd_disk;
if (disk->fops->ioctl)
return disk->fops->ioctl(bdev, mode, cmd,
(unsigned long)compat_ptr(arg));
return -ENOIOCTLCMD;
}
EXPORT_SYMBOL(blkdev_compat_ptr_ioctl);
#endif
static int blkdev_pr_register(struct block_device *bdev,
struct pr_registration __user *arg)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_registration reg;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!ops || !ops->pr_register)
return -EOPNOTSUPP;
if (copy_from_user(&reg, arg, sizeof(reg)))
return -EFAULT;
if (reg.flags & ~PR_FL_IGNORE_KEY)
return -EOPNOTSUPP;
return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags);
}
static int blkdev_pr_reserve(struct block_device *bdev,
struct pr_reservation __user *arg)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_reservation rsv;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!ops || !ops->pr_reserve)
return -EOPNOTSUPP;
if (copy_from_user(&rsv, arg, sizeof(rsv)))
return -EFAULT;
if (rsv.flags & ~PR_FL_IGNORE_KEY)
return -EOPNOTSUPP;
return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags);
}
static int blkdev_pr_release(struct block_device *bdev,
struct pr_reservation __user *arg)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_reservation rsv;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!ops || !ops->pr_release)
return -EOPNOTSUPP;
if (copy_from_user(&rsv, arg, sizeof(rsv)))
return -EFAULT;
if (rsv.flags)
return -EOPNOTSUPP;
return ops->pr_release(bdev, rsv.key, rsv.type);
}
static int blkdev_pr_preempt(struct block_device *bdev,
struct pr_preempt __user *arg, bool abort)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_preempt p;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!ops || !ops->pr_preempt)
return -EOPNOTSUPP;
if (copy_from_user(&p, arg, sizeof(p)))
return -EFAULT;
if (p.flags)
return -EOPNOTSUPP;
return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort);
}
static int blkdev_pr_clear(struct block_device *bdev,
struct pr_clear __user *arg)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_clear c;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!ops || !ops->pr_clear)
return -EOPNOTSUPP;
if (copy_from_user(&c, arg, sizeof(c)))
return -EFAULT;
if (c.flags)
return -EOPNOTSUPP;
return ops->pr_clear(bdev, c.key);
}
/*
* Is it an unrecognized ioctl? The correct returns are either
* ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a
* fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl
* code before returning.
*
* Confused drivers sometimes return EINVAL, which is wrong. It
* means "I understood the ioctl command, but the parameters to
* it were wrong".
*
* We should aim to just fix the broken drivers, the EINVAL case
* should go away.
*/
static inline int is_unrecognized_ioctl(int ret)
{
return ret == -EINVAL ||
ret == -ENOTTY ||
ret == -ENOIOCTLCMD;
}
static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
unsigned cmd, unsigned long arg)
{
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
if (!is_unrecognized_ioctl(ret))
return ret;
fsync_bdev(bdev);
invalidate_bdev(bdev);
return 0;
}
static int blkdev_roset(struct block_device *bdev, fmode_t mode,
unsigned cmd, unsigned long arg)
{
int ret, n;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
if (!is_unrecognized_ioctl(ret))
return ret;
if (get_user(n, (int __user *)arg))
return -EFAULT;
set_device_ro(bdev, n);
return 0;
}
static int blkdev_getgeo(struct block_device *bdev,
struct hd_geometry __user *argp)
{
struct gendisk *disk = bdev->bd_disk;
struct hd_geometry geo;
int ret;
if (!argp)
return -EINVAL;
if (!disk->fops->getgeo)
return -ENOTTY;
/*
* We need to set the startsect first, the driver may
* want to override it.
*/
memset(&geo, 0, sizeof(geo));
geo.start = get_start_sect(bdev);
ret = disk->fops->getgeo(bdev, &geo);
if (ret)
return ret;
if (copy_to_user(argp, &geo, sizeof(geo)))
return -EFAULT;
return 0;
}
#ifdef CONFIG_COMPAT
struct compat_hd_geometry {
unsigned char heads;
unsigned char sectors;
unsigned short cylinders;
u32 start;
};
static int compat_hdio_getgeo(struct gendisk *disk, struct block_device *bdev,
struct compat_hd_geometry __user *ugeo)
{
struct hd_geometry geo;
int ret;
if (!ugeo)
return -EINVAL;
if (!disk->fops->getgeo)
return -ENOTTY;
memset(&geo, 0, sizeof(geo));
/*
* We need to set the startsect first, the driver may
* want to override it.
*/
geo.start = get_start_sect(bdev);
ret = disk->fops->getgeo(bdev, &geo);
if (ret)
return ret;
ret = copy_to_user(ugeo, &geo, 4);
ret |= put_user(geo.start, &ugeo->start);
if (ret)
ret = -EFAULT;
return ret;
}
#endif
/* set the logical block size */
static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
int __user *argp)
{
int ret, n;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (!argp)
return -EINVAL;
if (get_user(n, argp))
return -EFAULT;
if (!(mode & FMODE_EXCL)) {
bdgrab(bdev);
if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
return -EBUSY;
}
ret = set_blocksize(bdev, n);
if (!(mode & FMODE_EXCL))
blkdev_put(bdev, mode | FMODE_EXCL);
return ret;
}
/*
* always keep this in sync with compat_blkdev_ioctl()
*/
int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
unsigned long arg)
{
void __user *argp = (void __user *)arg;
loff_t size;
unsigned int max_sectors;
switch (cmd) {
case BLKFLSBUF:
return blkdev_flushbuf(bdev, mode, cmd, arg);
case BLKROSET:
return blkdev_roset(bdev, mode, cmd, arg);
case BLKDISCARD:
return blk_ioctl_discard(bdev, mode, arg, 0);
case BLKSECDISCARD:
return blk_ioctl_discard(bdev, mode, arg,
BLKDEV_DISCARD_SECURE);
case BLKZEROOUT:
return blk_ioctl_zeroout(bdev, mode, arg);
case BLKREPORTZONE:
return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
case BLKRESETZONE:
case BLKOPENZONE:
case BLKCLOSEZONE:
case BLKFINISHZONE:
return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
case BLKGETZONESZ:
return put_uint(arg, bdev_zone_sectors(bdev));
case BLKGETNRZONES:
return put_uint(arg, blkdev_nr_zones(bdev->bd_disk));
case HDIO_GETGEO:
return blkdev_getgeo(bdev, argp);
case BLKRAGET:
case BLKFRAGET:
if (!arg)
return -EINVAL;
return put_long(arg, (bdev->bd_bdi->ra_pages*PAGE_SIZE) / 512);
case BLKROGET:
return put_int(arg, bdev_read_only(bdev) != 0);
case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
return put_int(arg, block_size(bdev));
case BLKSSZGET: /* get block device logical block size */
return put_int(arg, bdev_logical_block_size(bdev));
case BLKPBSZGET: /* get block device physical block size */
return put_uint(arg, bdev_physical_block_size(bdev));
case BLKIOMIN:
return put_uint(arg, bdev_io_min(bdev));
case BLKIOOPT:
return put_uint(arg, bdev_io_opt(bdev));
case BLKALIGNOFF:
return put_int(arg, bdev_alignment_offset(bdev));
case BLKDISCARDZEROES:
return put_uint(arg, 0);
case BLKSECTGET:
max_sectors = min_t(unsigned int, USHRT_MAX,
queue_max_sectors(bdev_get_queue(bdev)));
return put_ushort(arg, max_sectors);
case BLKROTATIONAL:
return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev)));
case BLKRASET:
case BLKFRASET:
if(!capable(CAP_SYS_ADMIN))
return -EACCES;
bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE;
return 0;
case BLKBSZSET:
return blkdev_bszset(bdev, mode, argp);
case BLKPG:
return blkpg_ioctl(bdev, argp);
case BLKRRPART:
return blkdev_reread_part(bdev);
case BLKGETSIZE:
size = i_size_read(bdev->bd_inode);
if ((size >> 9) > ~0UL)
return -EFBIG;
return put_ulong(arg, size >> 9);
case BLKGETSIZE64:
return put_u64(arg, i_size_read(bdev->bd_inode));
case BLKTRACESTART:
case BLKTRACESTOP:
case BLKTRACESETUP:
case BLKTRACETEARDOWN:
return blk_trace_ioctl(bdev, cmd, argp);
case IOC_PR_REGISTER:
return blkdev_pr_register(bdev, argp);
case IOC_PR_RESERVE:
return blkdev_pr_reserve(bdev, argp);
case IOC_PR_RELEASE:
return blkdev_pr_release(bdev, argp);
case IOC_PR_PREEMPT:
return blkdev_pr_preempt(bdev, argp, false);
case IOC_PR_PREEMPT_ABORT:
return blkdev_pr_preempt(bdev, argp, true);
case IOC_PR_CLEAR:
return blkdev_pr_clear(bdev, argp);
default:
return __blkdev_driver_ioctl(bdev, mode, cmd, arg);
}
}
EXPORT_SYMBOL_GPL(blkdev_ioctl);
#ifdef CONFIG_COMPAT
#define BLKBSZGET_32 _IOR(0x12, 112, int)
#define BLKBSZSET_32 _IOW(0x12, 113, int)
#define BLKGETSIZE64_32 _IOR(0x12, 114, int)
/* Most of the generic ioctls are handled in the normal fallback path.
This assumes the blkdev's low level compat_ioctl always returns
ENOIOCTLCMD for unknown ioctls. */
long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
int ret = -ENOIOCTLCMD;
struct inode *inode = file->f_mapping->host;
struct block_device *bdev = inode->i_bdev;
struct gendisk *disk = bdev->bd_disk;
fmode_t mode = file->f_mode;
loff_t size;
unsigned int max_sectors;
/*
* O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
* to updated it before every ioctl.
*/
if (file->f_flags & O_NDELAY)
mode |= FMODE_NDELAY;
else
mode &= ~FMODE_NDELAY;
switch (cmd) {
case HDIO_GETGEO:
return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
case BLKPBSZGET:
return compat_put_uint(arg, bdev_physical_block_size(bdev));
case BLKIOMIN:
return compat_put_uint(arg, bdev_io_min(bdev));
case BLKIOOPT:
return compat_put_uint(arg, bdev_io_opt(bdev));
case BLKALIGNOFF:
return compat_put_int(arg, bdev_alignment_offset(bdev));
case BLKDISCARDZEROES:
return compat_put_uint(arg, 0);
case BLKFLSBUF:
case BLKROSET:
case BLKDISCARD:
case BLKSECDISCARD:
case BLKZEROOUT:
/*
* the ones below are implemented in blkdev_locked_ioctl,
* but we call blkdev_ioctl, which gets the lock for us
*/
case BLKRRPART:
case BLKREPORTZONE:
case BLKRESETZONE:
case BLKOPENZONE:
case BLKCLOSEZONE:
case BLKFINISHZONE:
case BLKGETZONESZ:
case BLKGETNRZONES:
return blkdev_ioctl(bdev, mode, cmd,
(unsigned long)compat_ptr(arg));
case BLKBSZSET_32:
return blkdev_ioctl(bdev, mode, BLKBSZSET,
(unsigned long)compat_ptr(arg));
case BLKPG:
return compat_blkpg_ioctl(bdev, compat_ptr(arg));
case BLKRAGET:
case BLKFRAGET:
if (!arg)
return -EINVAL;
return compat_put_long(arg,
(bdev->bd_bdi->ra_pages * PAGE_SIZE) / 512);
case BLKROGET: /* compatible */
return compat_put_int(arg, bdev_read_only(bdev) != 0);
case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */
return compat_put_int(arg, block_size(bdev));
case BLKSSZGET: /* get block device hardware sector size */
return compat_put_int(arg, bdev_logical_block_size(bdev));
case BLKSECTGET:
max_sectors = min_t(unsigned int, USHRT_MAX,
queue_max_sectors(bdev_get_queue(bdev)));
return compat_put_ushort(arg, max_sectors);
case BLKROTATIONAL:
return compat_put_ushort(arg,
!blk_queue_nonrot(bdev_get_queue(bdev)));
case BLKRASET: /* compatible, but no compat_ptr (!) */
case BLKFRASET:
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE;
return 0;
case BLKGETSIZE:
size = i_size_read(bdev->bd_inode);
if ((size >> 9) > ~0UL)
return -EFBIG;
return compat_put_ulong(arg, size >> 9);
case BLKGETSIZE64_32:
return compat_put_u64(arg, i_size_read(bdev->bd_inode));
case BLKTRACESETUP32:
case BLKTRACESTART: /* compatible */
case BLKTRACESTOP: /* compatible */
case BLKTRACETEARDOWN: /* compatible */
ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg));
return ret;
case IOC_PR_REGISTER:
case IOC_PR_RESERVE:
case IOC_PR_RELEASE:
case IOC_PR_PREEMPT:
case IOC_PR_PREEMPT_ABORT:
case IOC_PR_CLEAR:
return blkdev_ioctl(bdev, mode, cmd,
(unsigned long)compat_ptr(arg));
default:
if (disk->fops->compat_ioctl)
ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg);
return ret;
}
}
#endif