for-5.19/drivers-2022-05-22

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmKKrTcQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgph/REAC0/7odRfJeTJ1PkJhSKFc7dhyS7rK4du2s
 3+z+H6Yeua2yVIJb0mYYGEJcOUUQ9nD2T9424n3NzDOw88U4y8Vg2YEH+UiJBuj4
 AJoxPNkQdxL7WzmwHmRNLCcOOFhISLqWiCJSr45d+LP1f6aO24Q9lewYWxtNA4TW
 mqb7Ne7e3Z77m9rmsCsZ26bzQHg1EEQ6qgjZM9tqMhOeTqYhmrqfrD9KtG8TIkpK
 N8277E5QcequHf7v6VpKqEOzf3d2kx55JaZdu+oxLPVMED3wJJFwcYF1/xmM7Fgx
 tp7xCjqqUHXwKvJNCFJpnvw+cXu0Ct7cWOIG4ROCvaTD4vBI1KzZLc0gO7pKFW0Y
 hNIlMXr4n8PmonS81tMV4TqmRWxedX/jxuaeJCVNr89PqYU4luPpigJZqv7rlGry
 KZUlktQot22M/7FC2MS6KhgbQKLPrRGTAEyY/JNwBHckCZiduWQFlmKLQ926xQIJ
 6vdjSzHK5MrT/d+yow3bGFxAJWloGJ+L+RsH0b+WikF81+6ic9P3AoStgbVilfKD
 6sbjcju8SShDlQ+W/Ocm0rHC+i/RDKT3QqItXgfhA/1FfMPODQGc/xcZg+AdTswn
 VSnUIkvk9/mTO0StilVfNJDfG1QkSpJ5Ilvs/DnIahZj6IG4QbJvtnVNbmQX6ptz
 AUB4DdGwXg==
 =geQL
 -----END PGP SIGNATURE-----

Merge tag 'for-5.19/drivers-2022-05-22' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe:
 "Here are the driver updates queued up for 5.19. This contains:

   - NVMe pull requests via Christoph:
       - tighten the PCI presence check (Stefan Roese)
       - fix a potential NULL pointer dereference in an error path (Kyle
         Miller Smith)
       - fix interpretation of the DMRSL field (Tom Yan)
       - relax the data transfer alignment (Keith Busch)
       - verbose error logging improvements (Max Gurtovoy, Chaitanya
         Kulkarni)
       - misc cleanups (Chaitanya Kulkarni, Christoph)
       - set non-mdts limits in nvme_scan_work (Chaitanya Kulkarni)
       - add support for TP4084 - Time-to-Ready Enhancements (Christoph)

   - MD pull request via Song:
       - Improve annotation in raid5 code, by Logan Gunthorpe
       - Support MD_BROKEN flag in raid-1/5/10, by Mariusz Tkaczyk
       - Other small fixes/cleanups

   - null_blk series making the configfs side much saner (Damien)

   - Various minor drbd cleanups and fixes (Haowen, Uladzislau, Jiapeng,
     Arnd, Cai)

   - Avoid using the system workqueue (and hence flushing it) in rnbd
     (Jack)

   - Avoid using the system workqueue (and hence flushing it) in aoe
     (Tetsuo)

   - Series fixing discard_alignment issues in drivers (Christoph)

   - Small series fixing drivers poking at disk->part0 for openers
     information (Christoph)

   - Series fixing deadlocks in loop (Christoph, Tetsuo)

   - Remove loop.h and add SPDX headers (Christoph)

   - Various fixes and cleanups (Julia, Xie, Yu)"

* tag 'for-5.19/drivers-2022-05-22' of git://git.kernel.dk/linux-block: (72 commits)
  mtip32xx: fix typo in comment
  nvme: set non-mdts limits in nvme_scan_work
  nvme: add support for TP4084 - Time-to-Ready Enhancements
  nvme: split the enum used for various register constants
  nbd: Fix hung on disconnect request if socket is closed before
  nvme-fabrics: add a request timeout helper
  nvme-pci: harden drive presence detect in nvme_dev_disable()
  nvme-pci: fix a NULL pointer dereference in nvme_alloc_admin_tags
  nvme: mark internal passthru request RQF_QUIET
  nvme: remove unneeded include from constants file
  nvme: add missing status values to verbose logging
  nvme: set dma alignment to dword
  nvme: fix interpretation of DMRSL
  loop: remove most the top-of-file boilerplate comment from the UAPI header
  loop: remove most the top-of-file boilerplate comment
  loop: add a SPDX header
  loop: remove loop.h
  block: null_blk: Improve device creation with configfs
  block: null_blk: Cleanup messages
  block: null_blk: Cleanup device creation and deletion
  ...
This commit is contained in:
Linus Torvalds 2022-05-23 14:04:14 -07:00
commit 5dc921868c
50 changed files with 847 additions and 655 deletions

View File

@ -799,7 +799,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
}
if (ubd_dev->no_trim == 0) {
ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
}

View File

@ -673,17 +673,17 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
}
}
if (!bdev->bd_openers)
if (!atomic_read(&bdev->bd_openers))
set_init_blocksize(bdev);
if (test_bit(GD_NEED_PART_SCAN, &disk->state))
bdev_disk_changed(disk, false);
bdev->bd_openers++;
atomic_inc(&bdev->bd_openers);
return 0;
}
static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
{
if (!--bdev->bd_openers)
if (atomic_dec_and_test(&bdev->bd_openers))
blkdev_flush_mapping(bdev);
if (bdev->bd_disk->fops->release)
bdev->bd_disk->fops->release(bdev->bd_disk, mode);
@ -694,7 +694,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
struct gendisk *disk = part->bd_disk;
int ret;
if (part->bd_openers)
if (atomic_read(&part->bd_openers))
goto done;
ret = blkdev_get_whole(bdev_whole(part), mode);
@ -708,7 +708,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
disk->open_partitions++;
set_init_blocksize(part);
done:
part->bd_openers++;
atomic_inc(&part->bd_openers);
return 0;
out_blkdev_put:
@ -720,7 +720,7 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
{
struct block_device *whole = bdev_whole(part);
if (--part->bd_openers)
if (!atomic_dec_and_test(&part->bd_openers))
return;
blkdev_flush_mapping(part);
whole->bd_disk->open_partitions--;
@ -899,7 +899,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
* of the world and we want to avoid long (could be several minute)
* syncs while holding the mutex.
*/
if (bdev->bd_openers == 1)
if (atomic_read(&bdev->bd_openers) == 1)
sync_blockdev(bdev);
mutex_lock(&disk->open_mutex);
@ -1044,7 +1044,7 @@ void sync_bdevs(bool wait)
bdev = I_BDEV(inode);
mutex_lock(&bdev->bd_disk->open_mutex);
if (!bdev->bd_openers) {
if (!atomic_read(&bdev->bd_openers)) {
; /* skip */
} else if (wait) {
/*

View File

@ -478,7 +478,7 @@ int bdev_del_partition(struct gendisk *disk, int partno)
goto out_unlock;
ret = -EBUSY;
if (part->bd_openers)
if (atomic_read(&part->bd_openers))
goto out_unlock;
delete_partition(part);

View File

@ -244,3 +244,5 @@ void aoenet_exit(void);
void aoenet_xmit(struct sk_buff_head *);
int is_aoe_netif(struct net_device *ifp);
int set_aoe_iflist(const char __user *str, size_t size);
extern struct workqueue_struct *aoe_wq;

View File

@ -435,7 +435,7 @@ err_mempool:
err:
spin_lock_irqsave(&d->lock, flags);
d->flags &= ~DEVFL_GD_NOW;
schedule_work(&d->work);
queue_work(aoe_wq, &d->work);
spin_unlock_irqrestore(&d->lock, flags);
}

View File

@ -968,7 +968,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
d->flags |= DEVFL_NEWSIZE;
else
d->flags |= DEVFL_GDALLOC;
schedule_work(&d->work);
queue_work(aoe_wq, &d->work);
}
static void

View File

@ -321,7 +321,7 @@ flush(const char __user *str, size_t cnt, int exiting)
specified = 1;
}
flush_scheduled_work();
flush_workqueue(aoe_wq);
/* pass one: do aoedev_downdev, which might sleep */
restart1:
spin_lock_irqsave(&devlist_lock, flags);
@ -520,7 +520,7 @@ freetgt(struct aoedev *d, struct aoetgt *t)
void
aoedev_exit(void)
{
flush_scheduled_work();
flush_workqueue(aoe_wq);
flush(NULL, 0, EXITING);
}

View File

@ -16,6 +16,7 @@ MODULE_DESCRIPTION("AoE block/char driver for 2.6.2 and newer 2.6 kernels");
MODULE_VERSION(VERSION);
static struct timer_list timer;
struct workqueue_struct *aoe_wq;
static void discover_timer(struct timer_list *t)
{
@ -35,6 +36,7 @@ aoe_exit(void)
aoechr_exit();
aoedev_exit();
aoeblk_exit(); /* free cache after de-allocating bufs */
destroy_workqueue(aoe_wq);
}
static int __init
@ -42,9 +44,13 @@ aoe_init(void)
{
int ret;
aoe_wq = alloc_workqueue("aoe_wq", 0, 0);
if (!aoe_wq)
return -ENOMEM;
ret = aoedev_init();
if (ret)
return ret;
goto dev_fail;
ret = aoechr_init();
if (ret)
goto chr_fail;
@ -77,6 +83,8 @@ aoe_init(void)
aoechr_exit();
chr_fail:
aoedev_exit();
dev_fail:
destroy_workqueue(aoe_wq);
printk(KERN_INFO "aoe: initialisation failure.\n");
return ret;

View File

@ -683,7 +683,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
}
}
want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
want = PFN_UP(words*sizeof(long));
have = b->bm_number_of_pages;
if (want == have) {
D_ASSERT(device, b->bm_pages != NULL);

View File

@ -3581,9 +3581,8 @@ const char *cmdname(enum drbd_packet cmd)
* when we want to support more than
* one PRO_VERSION */
static const char *cmdnames[] = {
[P_DATA] = "Data",
[P_WSAME] = "WriteSame",
[P_TRIM] = "Trim",
[P_DATA_REPLY] = "DataReply",
[P_RS_DATA_REPLY] = "RSDataReply",
[P_BARRIER] = "Barrier",
@ -3594,7 +3593,6 @@ const char *cmdname(enum drbd_packet cmd)
[P_DATA_REQUEST] = "DataRequest",
[P_RS_DATA_REQUEST] = "RSDataRequest",
[P_SYNC_PARAM] = "SyncParam",
[P_SYNC_PARAM89] = "SyncParam89",
[P_PROTOCOL] = "ReportProtocol",
[P_UUIDS] = "ReportUUIDs",
[P_SIZES] = "ReportSizes",
@ -3602,6 +3600,7 @@ const char *cmdname(enum drbd_packet cmd)
[P_SYNC_UUID] = "ReportSyncUUID",
[P_AUTH_CHALLENGE] = "AuthChallenge",
[P_AUTH_RESPONSE] = "AuthResponse",
[P_STATE_CHG_REQ] = "StateChgRequest",
[P_PING] = "Ping",
[P_PING_ACK] = "PingAck",
[P_RECV_ACK] = "RecvAck",
@ -3612,23 +3611,25 @@ const char *cmdname(enum drbd_packet cmd)
[P_NEG_DREPLY] = "NegDReply",
[P_NEG_RS_DREPLY] = "NegRSDReply",
[P_BARRIER_ACK] = "BarrierAck",
[P_STATE_CHG_REQ] = "StateChgRequest",
[P_STATE_CHG_REPLY] = "StateChgReply",
[P_OV_REQUEST] = "OVRequest",
[P_OV_REPLY] = "OVReply",
[P_OV_RESULT] = "OVResult",
[P_CSUM_RS_REQUEST] = "CsumRSRequest",
[P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
[P_SYNC_PARAM89] = "SyncParam89",
[P_COMPRESSED_BITMAP] = "CBitmap",
[P_DELAY_PROBE] = "DelayProbe",
[P_OUT_OF_SYNC] = "OutOfSync",
[P_RETRY_WRITE] = "RetryWrite",
[P_RS_CANCEL] = "RSCancel",
[P_CONN_ST_CHG_REQ] = "conn_st_chg_req",
[P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply",
[P_PROTOCOL_UPDATE] = "protocol_update",
[P_TRIM] = "Trim",
[P_RS_THIN_REQ] = "rs_thin_req",
[P_RS_DEALLOCATED] = "rs_deallocated",
[P_WSAME] = "WriteSame",
[P_ZEROES] = "Zeroes",
/* enum drbd_packet, but not commands - obsoleted flags:
* P_MAY_IGNORE

View File

@ -770,6 +770,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
struct set_role_parms parms;
int err;
enum drbd_ret_code retcode;
enum drbd_state_rv rv;
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
@ -790,14 +791,14 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&adm_ctx.resource->adm_mutex);
if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
R_PRIMARY, parms.assume_uptodate);
rv = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
else
retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
R_SECONDARY, 0);
rv = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
mutex_unlock(&adm_ctx.resource->adm_mutex);
genl_lock();
drbd_adm_finish(&adm_ctx, info, rv);
return 0;
out:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
@ -1601,8 +1602,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
drbd_send_sync_param(peer_device);
}
synchronize_rcu();
kfree(old_disk_conf);
kvfree_rcu(old_disk_conf);
kfree(old_plan);
mod_timer(&device->request_timer, jiffies + HZ);
goto success;
@ -2433,8 +2433,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
mutex_unlock(&connection->resource->conf_update);
mutex_unlock(&connection->data.mutex);
synchronize_rcu();
kfree(old_net_conf);
kvfree_rcu(old_net_conf);
if (connection->cstate >= C_WF_REPORT_PARAMS) {
struct drbd_peer_device *peer_device;
@ -2492,6 +2491,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
struct drbd_resource *resource;
struct drbd_connection *connection;
enum drbd_ret_code retcode;
enum drbd_state_rv rv;
int i;
int err;
@ -2611,12 +2611,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
}
rcu_read_unlock();
retcode = (enum drbd_ret_code)conn_request_state(connection,
NS(conn, C_UNCONNECTED), CS_VERBOSE);
rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
conn_reconfig_done(connection);
mutex_unlock(&adm_ctx.resource->adm_mutex);
drbd_adm_finish(&adm_ctx, info, retcode);
drbd_adm_finish(&adm_ctx, info, rv);
return 0;
fail:
@ -2724,11 +2723,12 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&adm_ctx.resource->adm_mutex);
rv = conn_try_disconnect(connection, parms.force_disconnect);
if (rv < SS_SUCCESS)
retcode = (enum drbd_ret_code)rv;
else
retcode = NO_ERROR;
mutex_unlock(&adm_ctx.resource->adm_mutex);
if (rv < SS_SUCCESS) {
drbd_adm_finish(&adm_ctx, info, rv);
return 0;
}
retcode = NO_ERROR;
fail:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
@ -2847,8 +2847,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
new_disk_conf->disk_size = (sector_t)rs.resize_size;
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
mutex_unlock(&device->resource->conf_update);
synchronize_rcu();
kfree(old_disk_conf);
kvfree_rcu(old_disk_conf);
new_disk_conf = NULL;
}

View File

@ -364,7 +364,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
struct drbd_device *device = peer_device->device;
struct drbd_peer_request *peer_req;
struct page *page = NULL;
unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
unsigned int nr_pages = PFN_UP(payload_size);
if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
return NULL;
@ -1628,9 +1628,9 @@ int drbd_submit_peer_request(struct drbd_device *device,
struct bio *bio;
struct page *page = peer_req->pages;
sector_t sector = peer_req->i.sector;
unsigned data_size = peer_req->i.size;
unsigned n_bios = 0;
unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
unsigned int data_size = peer_req->i.size;
unsigned int n_bios = 0;
unsigned int nr_pages = PFN_UP(data_size);
/* TRIM/DISCARD: for now, always use the helper function
* blkdev_issue_zeroout(..., discard=true).
@ -3750,8 +3750,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
drbd_info(connection, "peer data-integrity-alg: %s\n",
integrity_alg[0] ? integrity_alg : "(none)");
synchronize_rcu();
kfree(old_net_conf);
kvfree_rcu(old_net_conf);
return 0;
disconnect_rcu_unlock:
@ -3902,7 +3901,6 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
drbd_err(device, "verify-alg of wrong size, "
"peer wants %u, accepting only up to %u byte\n",
data_size, SHARED_SECRET_MAX);
err = -EIO;
goto reconnect;
}
@ -4120,8 +4118,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
mutex_unlock(&connection->resource->conf_update);
synchronize_rcu();
kfree(old_disk_conf);
kvfree_rcu(old_disk_conf);
drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
(unsigned long)p_usize, (unsigned long)my_usize);

View File

@ -922,7 +922,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se
switch (rbm) {
case RB_CONGESTED_REMOTE:
return 0;
return false;
case RB_LEAST_PENDING:
return atomic_read(&device->local_cnt) >
atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);

View File

@ -2071,8 +2071,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
conn_free_crypto(connection);
mutex_unlock(&connection->resource->conf_update);
synchronize_rcu();
kfree(old_conf);
kvfree_rcu(old_conf);
}
if (ns_max.susp_fen) {

View File

@ -1030,7 +1030,7 @@ static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_
{
if (drbd_peer_req_has_active_page(peer_req)) {
/* This might happen if sendpage() has not finished */
int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
int i = PFN_UP(peer_req->i.size);
atomic_add(i, &device->pp_in_use_by_net);
atomic_sub(i, &device->pp_in_use);
spin_lock_irq(&device->resource->req_lock);

View File

@ -1,54 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/drivers/block/loop.c
*
* Written by Theodore Ts'o, 3/29/93
*
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
* permitted under the GNU General Public License.
*
* DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
* more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
*
* Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
* Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
*
* Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
*
* Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
*
* Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
*
* Loadable modules and other fixes by AK, 1998
*
* Make real block number available to downstream transfer functions, enables
* CBC (and relatives) mode encryption requiring unique IVs per data block.
* Reed H. Petty, rhp@draper.net
*
* Maximum number of loop devices now dynamic via max_loop module parameter.
* Russell Kroll <rkroll@exploits.org> 19990701
*
* Maximum number of loop devices when compiled-in now selectable by passing
* max_loop=<1-255> to the kernel on boot.
* Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
*
* Completely rewrite request handling to be make_request_fn style and
* non blocking, pushing work to a helper thread. Lots of fixes from
* Al Viro too.
* Jens Axboe <axboe@suse.de>, Nov 2000
*
* Support up to 256 loop devices
* Heinz Mauelshagen <mge@sistina.com>, Feb 2002
*
* Support for falling back on the write file operation when the address space
* operations write_begin is not available on the backing filesystem.
* Anton Altaparmakov, 16 Feb 2005
*
* Still To Fix:
* - Advisory locking is ignored here.
* - Should use an own CAP_* category instead of CAP_SYS_ADMIN
*
* Copyright 1993 by Theodore Ts'o.
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
@ -59,7 +12,6 @@
#include <linux/errno.h>
#include <linux/major.h>
#include <linux/wait.h>
#include <linux/blkdev.h>
#include <linux/blkpg.h>
#include <linux/init.h>
#include <linux/swap.h>
@ -80,10 +32,62 @@
#include <linux/blk-cgroup.h>
#include <linux/sched/mm.h>
#include <linux/statfs.h>
#include "loop.h"
#include <linux/uaccess.h>
#include <linux/blk-mq.h>
#include <linux/spinlock.h>
#include <uapi/linux/loop.h>
/* Possible states of device */
enum {
Lo_unbound,
Lo_bound,
Lo_rundown,
Lo_deleting,
};
struct loop_func_table;
struct loop_device {
int lo_number;
loff_t lo_offset;
loff_t lo_sizelimit;
int lo_flags;
char lo_file_name[LO_NAME_SIZE];
struct file * lo_backing_file;
struct block_device *lo_device;
gfp_t old_gfp_mask;
spinlock_t lo_lock;
int lo_state;
spinlock_t lo_work_lock;
struct workqueue_struct *workqueue;
struct work_struct rootcg_work;
struct list_head rootcg_cmd_list;
struct list_head idle_worker_list;
struct rb_root worker_tree;
struct timer_list timer;
bool use_dio;
bool sysfs_inited;
struct request_queue *lo_queue;
struct blk_mq_tag_set tag_set;
struct gendisk *lo_disk;
struct mutex lo_mutex;
bool idr_visible;
};
struct loop_cmd {
struct list_head list_entry;
bool use_aio; /* use AIO interface to handle I/O */
atomic_t ref; /* only for aio */
long ret;
struct kiocb iocb;
struct bio_vec *bvec;
struct cgroup_subsys_state *blkcg_css;
struct cgroup_subsys_state *memcg_css;
};
#define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
#define LOOP_DEFAULT_HW_Q_DEPTH (128)
@ -569,6 +573,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
if (!file)
return -EBADF;
/* suppress uevents while reconfiguring the device */
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
is_loop = is_loop_device(file);
error = loop_global_lock_killable(lo, is_loop);
if (error)
@ -623,13 +631,18 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
fput(old_file);
if (partscan)
loop_reread_partitions(lo);
return 0;
error = 0;
done:
/* enable and uncork uevent now that we are done */
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
return error;
out_err:
loop_global_unlock(lo, is_loop);
out_putf:
fput(file);
return error;
goto done;
}
/* loop sysfs attributes */
@ -789,7 +802,6 @@ static void loop_config_discard(struct loop_device *lo)
blk_queue_max_discard_sectors(q, 0);
blk_queue_max_write_zeroes_sectors(q, 0);
}
q->limits.discard_alignment = 0;
}
struct loop_worker {
@ -803,8 +815,6 @@ struct loop_worker {
};
static void loop_workfn(struct work_struct *work);
static void loop_rootcg_workfn(struct work_struct *work);
static void loop_free_idle_workers(struct timer_list *timer);
#ifdef CONFIG_BLK_CGROUP
static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
@ -888,6 +898,39 @@ queue_work:
spin_unlock_irq(&lo->lo_work_lock);
}
static void loop_set_timer(struct loop_device *lo)
{
timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
}
static void loop_free_idle_workers(struct loop_device *lo, bool delete_all)
{
struct loop_worker *pos, *worker;
spin_lock_irq(&lo->lo_work_lock);
list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
idle_list) {
if (!delete_all &&
time_is_after_jiffies(worker->last_ran_at +
LOOP_IDLE_WORKER_TIMEOUT))
break;
list_del(&worker->idle_list);
rb_erase(&worker->rb_node, &lo->worker_tree);
css_put(worker->blkcg_css);
kfree(worker);
}
if (!list_empty(&lo->idle_worker_list))
loop_set_timer(lo);
spin_unlock_irq(&lo->lo_work_lock);
}
static void loop_free_idle_workers_timer(struct timer_list *timer)
{
struct loop_device *lo = container_of(timer, struct loop_device, timer);
return loop_free_idle_workers(lo, false);
}
static void loop_update_rotational(struct loop_device *lo)
{
struct file *file = lo->lo_backing_file;
@ -962,6 +1005,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
/* suppress uevents while reconfiguring the device */
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
/*
* If we don't hold exclusive handle for the device, upgrade to it
* here to avoid changing device under exclusive owner.
@ -1006,24 +1052,19 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter)
lo->lo_flags |= LO_FLAGS_READ_ONLY;
lo->workqueue = alloc_workqueue("loop%d",
WQ_UNBOUND | WQ_FREEZABLE,
0,
lo->lo_number);
if (!lo->workqueue) {
error = -ENOMEM;
goto out_unlock;
lo->workqueue = alloc_workqueue("loop%d",
WQ_UNBOUND | WQ_FREEZABLE,
0, lo->lo_number);
if (!lo->workqueue) {
error = -ENOMEM;
goto out_unlock;
}
}
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
INIT_LIST_HEAD(&lo->rootcg_cmd_list);
INIT_LIST_HEAD(&lo->idle_worker_list);
lo->worker_tree = RB_ROOT;
timer_setup(&lo->timer, loop_free_idle_workers,
TIMER_DEFERRABLE);
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
lo->lo_device = bdev;
lo->lo_backing_file = file;
@ -1068,7 +1109,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
loop_reread_partitions(lo);
if (!(mode & FMODE_EXCL))
bd_abort_claiming(bdev, loop_configure);
return 0;
error = 0;
done:
/* enable and uncork uevent now that we are done */
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
return error;
out_unlock:
loop_global_unlock(lo, is_loop);
@ -1079,53 +1125,24 @@ out_putf:
fput(file);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
return error;
goto done;
}
static void __loop_clr_fd(struct loop_device *lo, bool release)
{
struct file *filp;
gfp_t gfp = lo->old_gfp_mask;
struct loop_worker *pos, *worker;
/*
* Flush loop_configure() and loop_change_fd(). It is acceptable for
* loop_validate_file() to succeed, for actual clear operation has not
* started yet.
*/
mutex_lock(&loop_validate_mutex);
mutex_unlock(&loop_validate_mutex);
/*
* loop_validate_file() now fails because l->lo_state != Lo_bound
* became visible.
*/
/*
* Since this function is called upon "ioctl(LOOP_CLR_FD)" xor "close()
* after ioctl(LOOP_CLR_FD)", it is a sign of something going wrong if
* lo->lo_state has changed while waiting for lo->lo_mutex.
*/
mutex_lock(&lo->lo_mutex);
BUG_ON(lo->lo_state != Lo_rundown);
mutex_unlock(&lo->lo_mutex);
if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
blk_queue_write_cache(lo->lo_queue, false, false);
/* freeze request queue during the transition */
blk_mq_freeze_queue(lo->lo_queue);
destroy_workqueue(lo->workqueue);
spin_lock_irq(&lo->lo_work_lock);
list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
idle_list) {
list_del(&worker->idle_list);
rb_erase(&worker->rb_node, &lo->worker_tree);
css_put(worker->blkcg_css);
kfree(worker);
}
spin_unlock_irq(&lo->lo_work_lock);
del_timer_sync(&lo->timer);
/*
* Freeze the request queue when unbinding on a live file descriptor and
* thus an open device. When called from ->release we are guaranteed
* that there is no I/O in progress already.
*/
if (!release)
blk_mq_freeze_queue(lo->lo_queue);
spin_lock_irq(&lo->lo_lock);
filp = lo->lo_backing_file;
@ -1146,7 +1163,8 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
mapping_set_gfp_mask(filp->f_mapping, gfp);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
blk_mq_unfreeze_queue(lo->lo_queue);
if (!release)
blk_mq_unfreeze_queue(lo->lo_queue);
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
@ -1197,11 +1215,20 @@ static int loop_clr_fd(struct loop_device *lo)
{
int err;
err = mutex_lock_killable(&lo->lo_mutex);
/*
* Since lo_ioctl() is called without locks held, it is possible that
* loop_configure()/loop_change_fd() and loop_clr_fd() run in parallel.
*
* Therefore, use global lock when setting Lo_rundown state in order to
* make sure that loop_validate_file() will fail if the "struct file"
* which loop_configure()/loop_change_fd() found via fget() was this
* loop device.
*/
err = loop_global_lock_killable(lo, true);
if (err)
return err;
if (lo->lo_state != Lo_bound) {
mutex_unlock(&lo->lo_mutex);
loop_global_unlock(lo, true);
return -ENXIO;
}
/*
@ -1214,13 +1241,13 @@ static int loop_clr_fd(struct loop_device *lo)
* <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
* command to fail with EBUSY.
*/
if (atomic_read(&lo->lo_refcnt) > 1) {
if (disk_openers(lo->lo_disk) > 1) {
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
mutex_unlock(&lo->lo_mutex);
loop_global_unlock(lo, true);
return 0;
}
lo->lo_state = Lo_rundown;
mutex_unlock(&lo->lo_mutex);
loop_global_unlock(lo, true);
__loop_clr_fd(lo, false);
return 0;
@ -1252,15 +1279,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
/* I/O need to be drained during transfer transition */
blk_mq_freeze_queue(lo->lo_queue);
if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
/* If any pages were dirtied after invalidate_bdev(), try again */
err = -EAGAIN;
pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
}
prev_lo_flags = lo->lo_flags;
err = loop_set_status_from_info(lo, info);
@ -1471,21 +1489,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
invalidate_bdev(lo->lo_device);
blk_mq_freeze_queue(lo->lo_queue);
/* invalidate_bdev should have truncated all the pages */
if (lo->lo_device->bd_inode->i_mapping->nrpages) {
err = -EAGAIN;
pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
}
blk_queue_logical_block_size(lo->lo_queue, arg);
blk_queue_physical_block_size(lo->lo_queue, arg);
blk_queue_io_min(lo->lo_queue, arg);
loop_update_dio(lo);
out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);
return err;
@ -1715,33 +1722,15 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
}
#endif
static int lo_open(struct block_device *bdev, fmode_t mode)
{
struct loop_device *lo = bdev->bd_disk->private_data;
int err;
err = mutex_lock_killable(&lo->lo_mutex);
if (err)
return err;
if (lo->lo_state == Lo_deleting)
err = -ENXIO;
else
atomic_inc(&lo->lo_refcnt);
mutex_unlock(&lo->lo_mutex);
return err;
}
static void lo_release(struct gendisk *disk, fmode_t mode)
{
struct loop_device *lo = disk->private_data;
mutex_lock(&lo->lo_mutex);
if (atomic_dec_return(&lo->lo_refcnt))
goto out_unlock;
if (disk_openers(disk) > 0)
return;
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
if (lo->lo_state != Lo_bound)
goto out_unlock;
mutex_lock(&lo->lo_mutex);
if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
lo->lo_state = Lo_rundown;
mutex_unlock(&lo->lo_mutex);
/*
@ -1750,27 +1739,30 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
*/
__loop_clr_fd(lo, true);
return;
} else if (lo->lo_state == Lo_bound) {
/*
* Otherwise keep thread (if running) and config,
* but flush possible ongoing bios in thread.
*/
blk_mq_freeze_queue(lo->lo_queue);
blk_mq_unfreeze_queue(lo->lo_queue);
}
out_unlock:
mutex_unlock(&lo->lo_mutex);
}
static void lo_free_disk(struct gendisk *disk)
{
struct loop_device *lo = disk->private_data;
if (lo->workqueue)
destroy_workqueue(lo->workqueue);
loop_free_idle_workers(lo, true);
del_timer_sync(&lo->timer);
mutex_destroy(&lo->lo_mutex);
kfree(lo);
}
static const struct block_device_operations lo_fops = {
.owner = THIS_MODULE,
.open = lo_open,
.release = lo_release,
.ioctl = lo_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = lo_compat_ioctl,
#endif
.free_disk = lo_free_disk,
};
/*
@ -1885,11 +1877,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
}
}
static void loop_set_timer(struct loop_device *lo)
{
timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
}
static void loop_process_work(struct loop_worker *worker,
struct list_head *cmd_list, struct loop_device *lo)
{
@ -1938,27 +1925,6 @@ static void loop_rootcg_workfn(struct work_struct *work)
loop_process_work(NULL, &lo->rootcg_cmd_list, lo);
}
static void loop_free_idle_workers(struct timer_list *timer)
{
struct loop_device *lo = container_of(timer, struct loop_device, timer);
struct loop_worker *pos, *worker;
spin_lock_irq(&lo->lo_work_lock);
list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
idle_list) {
if (time_is_after_jiffies(worker->last_ran_at +
LOOP_IDLE_WORKER_TIMEOUT))
break;
list_del(&worker->idle_list);
rb_erase(&worker->rb_node, &lo->worker_tree);
css_put(worker->blkcg_css);
kfree(worker);
}
if (!list_empty(&lo->idle_worker_list))
loop_set_timer(lo);
spin_unlock_irq(&lo->lo_work_lock);
}
static const struct blk_mq_ops loop_mq_ops = {
.queue_rq = loop_queue_rq,
.complete = lo_complete_rq,
@ -1974,6 +1940,9 @@ static int loop_add(int i)
lo = kzalloc(sizeof(*lo), GFP_KERNEL);
if (!lo)
goto out;
lo->worker_tree = RB_ROOT;
INIT_LIST_HEAD(&lo->idle_worker_list);
timer_setup(&lo->timer, loop_free_idle_workers_timer, TIMER_DEFERRABLE);
lo->lo_state = Lo_unbound;
err = mutex_lock_killable(&loop_ctl_mutex);
@ -2043,11 +2012,12 @@ static int loop_add(int i)
*/
if (!part_shift)
disk->flags |= GENHD_FL_NO_PART;
atomic_set(&lo->lo_refcnt, 0);
mutex_init(&lo->lo_mutex);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);
spin_lock_init(&lo->lo_work_lock);
INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
INIT_LIST_HEAD(&lo->rootcg_cmd_list);
disk->major = LOOP_MAJOR;
disk->first_minor = i << part_shift;
disk->minors = 1 << part_shift;
@ -2087,15 +2057,14 @@ static void loop_remove(struct loop_device *lo)
{
/* Make this loop device unreachable from pathname. */
del_gendisk(lo->lo_disk);
blk_cleanup_disk(lo->lo_disk);
blk_cleanup_queue(lo->lo_disk->queue);
blk_mq_free_tag_set(&lo->tag_set);
mutex_lock(&loop_ctl_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
mutex_unlock(&loop_ctl_mutex);
/* There is no route which can find this loop device. */
mutex_destroy(&lo->lo_mutex);
kfree(lo);
put_disk(lo->lo_disk);
}
static void loop_probe(dev_t dev)
@ -2134,13 +2103,12 @@ static int loop_control_remove(int idx)
ret = mutex_lock_killable(&lo->lo_mutex);
if (ret)
goto mark_visible;
if (lo->lo_state != Lo_unbound ||
atomic_read(&lo->lo_refcnt) > 0) {
if (lo->lo_state != Lo_unbound || disk_openers(lo->lo_disk) > 0) {
mutex_unlock(&lo->lo_mutex);
ret = -EBUSY;
goto mark_visible;
}
/* Mark this loop device no longer open()-able. */
/* Mark this loop device as no more bound, but not quite unbound yet */
lo->lo_state = Lo_deleting;
mutex_unlock(&lo->lo_mutex);

View File

@ -1,72 +0,0 @@
/*
* loop.h
*
* Written by Theodore Ts'o, 3/29/93.
*
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
* permitted under the GNU General Public License.
*/
#ifndef _LINUX_LOOP_H
#define _LINUX_LOOP_H
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <uapi/linux/loop.h>
/* Possible states of device */
enum {
Lo_unbound,
Lo_bound,
Lo_rundown,
Lo_deleting,
};
struct loop_func_table;
struct loop_device {
int lo_number;
atomic_t lo_refcnt;
loff_t lo_offset;
loff_t lo_sizelimit;
int lo_flags;
char lo_file_name[LO_NAME_SIZE];
struct file * lo_backing_file;
struct block_device *lo_device;
gfp_t old_gfp_mask;
spinlock_t lo_lock;
int lo_state;
spinlock_t lo_work_lock;
struct workqueue_struct *workqueue;
struct work_struct rootcg_work;
struct list_head rootcg_cmd_list;
struct list_head idle_worker_list;
struct rb_root worker_tree;
struct timer_list timer;
bool use_dio;
bool sysfs_inited;
struct request_queue *lo_queue;
struct blk_mq_tag_set tag_set;
struct gendisk *lo_disk;
struct mutex lo_mutex;
bool idr_visible;
};
struct loop_cmd {
struct list_head list_entry;
bool use_aio; /* use AIO interface to handle I/O */
atomic_t ref; /* only for aio */
long ret;
struct kiocb iocb;
struct bio_vec *bvec;
struct cgroup_subsys_state *blkcg_css;
struct cgroup_subsys_state *memcg_css;
};
#endif

View File

@ -2729,7 +2729,7 @@ static int mtip_dma_alloc(struct driver_data *dd)
{
struct mtip_port *port = dd->port;
/* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
/* Allocate dma memory for RX Fis, Identify, and Sector Buffer */
port->block1 =
dma_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
&port->block1_dma, GFP_KERNEL);

View File

@ -333,7 +333,6 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
nbd->disk->queue->limits.discard_granularity = blksize;
nbd->disk->queue->limits.discard_alignment = blksize;
blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
}
blk_queue_logical_block_size(nbd->disk->queue, blksize);
@ -947,11 +946,15 @@ static int wait_for_reconnect(struct nbd_device *nbd)
struct nbd_config *config = nbd->config;
if (!config->dead_conn_timeout)
return 0;
if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
if (!wait_event_timeout(config->conn_wait,
test_bit(NBD_RT_DISCONNECTED,
&config->runtime_flags) ||
atomic_read(&config->live_connections) > 0,
config->dead_conn_timeout))
return 0;
return wait_event_timeout(config->conn_wait,
atomic_read(&config->live_connections) > 0,
config->dead_conn_timeout) > 0;
return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
}
static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
@ -1217,11 +1220,11 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
return -ENOSPC;
}
static void nbd_bdev_reset(struct block_device *bdev)
static void nbd_bdev_reset(struct nbd_device *nbd)
{
if (bdev->bd_openers > 1)
if (disk_openers(nbd->disk) > 1)
return;
set_capacity(bdev->bd_disk, 0);
set_capacity(nbd->disk, 0);
}
static void nbd_parse_flags(struct nbd_device *nbd)
@ -1316,7 +1319,6 @@ static void nbd_config_put(struct nbd_device *nbd)
nbd->tag_set.timeout = 0;
nbd->disk->queue->limits.discard_granularity = 0;
nbd->disk->queue->limits.discard_alignment = 0;
blk_queue_max_discard_sectors(nbd->disk->queue, 0);
mutex_unlock(&nbd->config_lock);
@ -1386,7 +1388,7 @@ static int nbd_start_device(struct nbd_device *nbd)
return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
}
static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
static int nbd_start_device_ioctl(struct nbd_device *nbd)
{
struct nbd_config *config = nbd->config;
int ret;
@ -1405,7 +1407,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
flush_workqueue(nbd->recv_workq);
mutex_lock(&nbd->config_lock);
nbd_bdev_reset(bdev);
nbd_bdev_reset(nbd);
/* user requested, ignore socket errors */
if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
ret = 0;
@ -1419,7 +1421,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
{
sock_shutdown(nbd);
__invalidate_device(bdev, true);
nbd_bdev_reset(bdev);
nbd_bdev_reset(nbd);
if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
@ -1465,7 +1467,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
config->flags = arg;
return 0;
case NBD_DO_IT:
return nbd_start_device_ioctl(nbd, bdev);
return nbd_start_device_ioctl(nbd);
case NBD_CLEAR_QUE:
/*
* This is for compatibility only. The queue is always cleared
@ -1576,7 +1578,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
struct nbd_device *nbd = disk->private_data;
if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
disk->part0->bd_openers == 0)
disk_openers(disk) == 0)
nbd_disconnect_and_put(nbd);
nbd_config_put(nbd);
@ -1781,7 +1783,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
disk->queue->limits.discard_granularity = 0;
disk->queue->limits.discard_alignment = 0;
blk_queue_max_discard_sectors(disk->queue, 0);
blk_queue_max_segment_size(disk->queue, UINT_MAX);
blk_queue_max_segments(disk->queue, USHRT_MAX);
@ -2079,6 +2080,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
mutex_lock(&nbd->config_lock);
nbd_disconnect(nbd);
sock_shutdown(nbd);
wake_up(&nbd->config->conn_wait);
/*
* Make sure recv thread has finished, we can safely call nbd_clear_que()
* to cancel the inflight I/Os.

View File

@ -11,6 +11,9 @@
#include <linux/init.h>
#include "null_blk.h"
#undef pr_fmt
#define pr_fmt(fmt) "null_blk: " fmt
#define FREE_BATCH 16
#define TICKS_PER_SEC 50ULL
@ -232,6 +235,7 @@ static struct nullb_device *null_alloc_dev(void);
static void null_free_dev(struct nullb_device *dev);
static void null_del_dev(struct nullb *nullb);
static int null_add_dev(struct nullb_device *dev);
static struct nullb *null_find_dev_by_name(const char *name);
static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
static inline struct nullb_device *to_nullb_device(struct config_item *item)
@ -560,6 +564,9 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name)
{
struct nullb_device *dev;
if (null_find_dev_by_name(name))
return ERR_PTR(-EEXIST);
dev = null_alloc_dev();
if (!dev)
return ERR_PTR(-ENOMEM);
@ -1765,7 +1772,6 @@ static void null_config_discard(struct nullb *nullb)
}
nullb->q->limits.discard_granularity = nullb->dev->blocksize;
nullb->q->limits.discard_alignment = nullb->dev->blocksize;
blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
}
@ -2060,7 +2066,13 @@ static int null_add_dev(struct nullb_device *dev)
null_config_discard(nullb);
sprintf(nullb->disk_name, "nullb%d", nullb->index);
if (config_item_name(&dev->item)) {
/* Use configfs dir name as the device name */
snprintf(nullb->disk_name, sizeof(nullb->disk_name),
"%s", config_item_name(&dev->item));
} else {
sprintf(nullb->disk_name, "nullb%d", nullb->index);
}
rv = null_gendisk_register(nullb);
if (rv)
@ -2070,6 +2082,8 @@ static int null_add_dev(struct nullb_device *dev)
list_add_tail(&nullb->list, &nullb_list);
mutex_unlock(&lock);
pr_info("disk %s created\n", nullb->disk_name);
return 0;
out_cleanup_zone:
null_free_zoned_dev(dev);
@ -2087,12 +2101,53 @@ out:
return rv;
}
static struct nullb *null_find_dev_by_name(const char *name)
{
struct nullb *nullb = NULL, *nb;
mutex_lock(&lock);
list_for_each_entry(nb, &nullb_list, list) {
if (strcmp(nb->disk_name, name) == 0) {
nullb = nb;
break;
}
}
mutex_unlock(&lock);
return nullb;
}
static int null_create_dev(void)
{
struct nullb_device *dev;
int ret;
dev = null_alloc_dev();
if (!dev)
return -ENOMEM;
ret = null_add_dev(dev);
if (ret) {
null_free_dev(dev);
return ret;
}
return 0;
}
static void null_destroy_dev(struct nullb *nullb)
{
struct nullb_device *dev = nullb->dev;
null_del_dev(nullb);
null_free_dev(dev);
}
static int __init null_init(void)
{
int ret = 0;
unsigned int i;
struct nullb *nullb;
struct nullb_device *dev;
if (g_bs > PAGE_SIZE) {
pr_warn("invalid block size\n");
@ -2112,19 +2167,21 @@ static int __init null_init(void)
}
if (g_queue_mode == NULL_Q_RQ) {
pr_err("legacy IO path no longer available\n");
pr_err("legacy IO path is no longer available\n");
return -EINVAL;
}
if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
if (g_submit_queues != nr_online_nodes) {
pr_warn("submit_queues param is set to %u.\n",
nr_online_nodes);
nr_online_nodes);
g_submit_queues = nr_online_nodes;
}
} else if (g_submit_queues > nr_cpu_ids)
} else if (g_submit_queues > nr_cpu_ids) {
g_submit_queues = nr_cpu_ids;
else if (g_submit_queues <= 0)
} else if (g_submit_queues <= 0) {
g_submit_queues = 1;
}
if (g_queue_mode == NULL_Q_MQ && shared_tags) {
ret = null_init_tag_set(NULL, &tag_set);
@ -2148,16 +2205,9 @@ static int __init null_init(void)
}
for (i = 0; i < nr_devices; i++) {
dev = null_alloc_dev();
if (!dev) {
ret = -ENOMEM;
ret = null_create_dev();
if (ret)
goto err_dev;
}
ret = null_add_dev(dev);
if (ret) {
null_free_dev(dev);
goto err_dev;
}
}
pr_info("module loaded\n");
@ -2166,9 +2216,7 @@ static int __init null_init(void)
err_dev:
while (!list_empty(&nullb_list)) {
nullb = list_entry(nullb_list.next, struct nullb, list);
dev = nullb->dev;
null_del_dev(nullb);
null_free_dev(dev);
null_destroy_dev(nullb);
}
unregister_blkdev(null_major, "nullb");
err_conf:
@ -2189,12 +2237,8 @@ static void __exit null_exit(void)
mutex_lock(&lock);
while (!list_empty(&nullb_list)) {
struct nullb_device *dev;
nullb = list_entry(nullb_list.next, struct nullb, list);
dev = nullb->dev;
null_del_dev(nullb);
null_free_dev(dev);
null_destroy_dev(nullb);
}
mutex_unlock(&lock);

View File

@ -16,13 +16,15 @@
#include <linux/mutex.h>
struct nullb_cmd {
struct request *rq;
struct bio *bio;
union {
struct request *rq;
struct bio *bio;
};
unsigned int tag;
blk_status_t error;
bool fake_timeout;
struct nullb_queue *nq;
struct hrtimer timer;
bool fake_timeout;
};
struct nullb_queue {

View File

@ -6,6 +6,9 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
#undef pr_fmt
#define pr_fmt(fmt) "null_blk: " fmt
static inline sector_t mb_to_sects(unsigned long mb)
{
return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT;
@ -75,8 +78,8 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
dev->zone_capacity = dev->zone_size;
if (dev->zone_capacity > dev->zone_size) {
pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
dev->zone_capacity, dev->zone_size);
pr_err("zone capacity (%lu MB) larger than zone size (%lu MB)\n",
dev->zone_capacity, dev->zone_size);
return -EINVAL;
}

View File

@ -25,6 +25,7 @@ static int rnbd_client_major;
static DEFINE_IDA(index_ida);
static DEFINE_MUTEX(sess_lock);
static LIST_HEAD(sess_list);
static struct workqueue_struct *rnbd_clt_wq;
/*
* Maximum number of partitions an instance can have.
@ -1759,12 +1760,12 @@ static void rnbd_destroy_sessions(void)
* procedure takes minutes.
*/
INIT_WORK(&dev->unmap_on_rmmod_work, unmap_device_work);
queue_work(system_long_wq, &dev->unmap_on_rmmod_work);
queue_work(rnbd_clt_wq, &dev->unmap_on_rmmod_work);
}
rnbd_clt_put_sess(sess);
}
/* Wait for all scheduled unmap works */
flush_workqueue(system_long_wq);
flush_workqueue(rnbd_clt_wq);
WARN_ON(!list_empty(&sess_list));
}
@ -1789,6 +1790,14 @@ static int __init rnbd_client_init(void)
pr_err("Failed to load module, creating sysfs device files failed, err: %d\n",
err);
unregister_blkdev(rnbd_client_major, "rnbd");
return err;
}
rnbd_clt_wq = alloc_workqueue("rnbd_clt_wq", 0, 0);
if (!rnbd_clt_wq) {
pr_err("Failed to load module, alloc_workqueue failed.\n");
rnbd_clt_destroy_sysfs_files();
unregister_blkdev(rnbd_client_major, "rnbd");
err = -ENOMEM;
}
return err;
@ -1799,6 +1808,7 @@ static void __exit rnbd_client_exit(void)
rnbd_destroy_sessions();
unregister_blkdev(rnbd_client_major, "rnbd");
ida_destroy(&index_ida);
destroy_workqueue(rnbd_clt_wq);
}
module_init(rnbd_client_init);

View File

@ -59,7 +59,7 @@ static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev)
static inline int rnbd_dev_get_discard_alignment(const struct rnbd_dev *dev)
{
return bdev_get_queue(dev->bdev)->limits.discard_alignment;
return bdev_discard_alignment(dev->bdev);
}
#endif /* RNBD_SRV_DEV_H */

View File

@ -867,11 +867,12 @@ static int virtblk_probe(struct virtio_device *vdev)
blk_queue_io_opt(q, blk_size * opt_io_size);
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
q->limits.discard_granularity = blk_size;
virtio_cread(vdev, struct virtio_blk_config,
discard_sector_alignment, &v);
q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
if (v)
q->limits.discard_granularity = v << SECTOR_SHIFT;
else
q->limits.discard_granularity = blk_size;
virtio_cread(vdev, struct virtio_blk_config,
max_discard_sectors, &v);

View File

@ -575,7 +575,6 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
int err;
int state = 0;
struct block_device *bdev = be->blkif->vbd.bdev;
struct request_queue *q = bdev_get_queue(bdev);
if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1))
return;
@ -583,14 +582,14 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
if (bdev_max_discard_sectors(bdev)) {
err = xenbus_printf(xbt, dev->nodename,
"discard-granularity", "%u",
q->limits.discard_granularity);
bdev_discard_granularity(bdev));
if (err) {
dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
return;
}
err = xenbus_printf(xbt, dev->nodename,
"discard-alignment", "%u",
q->limits.discard_alignment);
bdev_discard_alignment(bdev));
if (err) {
dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
return;

View File

@ -1787,7 +1787,7 @@ static ssize_t reset_store(struct device *dev,
int ret;
unsigned short do_reset;
struct zram *zram;
struct block_device *bdev;
struct gendisk *disk;
ret = kstrtou16(buf, 10, &do_reset);
if (ret)
@ -1797,26 +1797,26 @@ static ssize_t reset_store(struct device *dev,
return -EINVAL;
zram = dev_to_zram(dev);
bdev = zram->disk->part0;
disk = zram->disk;
mutex_lock(&bdev->bd_disk->open_mutex);
mutex_lock(&disk->open_mutex);
/* Do not reset an active device or claimed device */
if (bdev->bd_openers || zram->claim) {
mutex_unlock(&bdev->bd_disk->open_mutex);
if (disk_openers(disk) || zram->claim) {
mutex_unlock(&disk->open_mutex);
return -EBUSY;
}
/* From now on, anyone can't open /dev/zram[0-9] */
zram->claim = true;
mutex_unlock(&bdev->bd_disk->open_mutex);
mutex_unlock(&disk->open_mutex);
/* Make sure all the pending I/O are finished */
sync_blockdev(bdev);
sync_blockdev(disk->part0);
zram_reset_device(zram);
mutex_lock(&bdev->bd_disk->open_mutex);
mutex_lock(&disk->open_mutex);
zram->claim = false;
mutex_unlock(&bdev->bd_disk->open_mutex);
mutex_unlock(&disk->open_mutex);
return len;
}
@ -1987,19 +1987,18 @@ out_free_dev:
static int zram_remove(struct zram *zram)
{
struct block_device *bdev = zram->disk->part0;
bool claimed;
mutex_lock(&bdev->bd_disk->open_mutex);
if (bdev->bd_openers) {
mutex_unlock(&bdev->bd_disk->open_mutex);
mutex_lock(&zram->disk->open_mutex);
if (disk_openers(zram->disk)) {
mutex_unlock(&zram->disk->open_mutex);
return -EBUSY;
}
claimed = zram->claim;
if (!claimed)
zram->claim = true;
mutex_unlock(&bdev->bd_disk->open_mutex);
mutex_unlock(&zram->disk->open_mutex);
zram_debugfs_unregister(zram);
@ -2011,7 +2010,7 @@ static int zram_remove(struct zram *zram)
;
} else {
/* Make sure all the pending I/O are finished */
sync_blockdev(bdev);
sync_blockdev(zram->disk->part0);
zram_reset_device(zram);
}

View File

@ -1001,7 +1001,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_min(limits, DMZ_BLOCK_SIZE);
blk_limits_io_opt(limits, DMZ_BLOCK_SIZE);
limits->discard_alignment = DMZ_BLOCK_SIZE;
limits->discard_alignment = 0;
limits->discard_granularity = DMZ_BLOCK_SIZE;
limits->max_discard_sectors = chunk_sectors;
limits->max_hw_discard_sectors = chunk_sectors;

View File

@ -639,14 +639,6 @@ re_read:
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
write_behind = le32_to_cpu(sb->write_behind);
sectors_reserved = le32_to_cpu(sb->sectors_reserved);
/* Setup nodes/clustername only if bitmap version is
* cluster-compatible
*/
if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
nodes = le32_to_cpu(sb->nodes);
strlcpy(bitmap->mddev->bitmap_info.cluster_name,
sb->cluster_name, 64);
}
/* verify that the bitmap-specific fields are valid */
if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
@ -668,6 +660,16 @@ re_read:
goto out;
}
/*
* Setup nodes/clustername only if bitmap version is
* cluster-compatible
*/
if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
nodes = le32_to_cpu(sb->nodes);
strscpy(bitmap->mddev->bitmap_info.cluster_name,
sb->cluster_name, 64);
}
/* keep the array size field of the bitmap superblock up to date */
sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
@ -695,14 +697,13 @@ re_read:
if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
err = 0;
out:
kunmap_atomic(sb);
/* Assigning chunksize is required for "re_read" */
bitmap->mddev->bitmap_info.chunksize = chunksize;
if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
/* Assigning chunksize is required for "re_read" */
bitmap->mddev->bitmap_info.chunksize = chunksize;
err = md_setup_cluster(bitmap->mddev, nodes);
if (err) {
pr_warn("%s: Could not setup cluster service (%d)\n",
@ -713,18 +714,18 @@ out:
goto re_read;
}
out_no_sb:
if (test_bit(BITMAP_STALE, &bitmap->flags))
bitmap->events_cleared = bitmap->mddev->events;
bitmap->mddev->bitmap_info.chunksize = chunksize;
bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
bitmap->mddev->bitmap_info.max_write_behind = write_behind;
bitmap->mddev->bitmap_info.nodes = nodes;
if (bitmap->mddev->bitmap_info.space == 0 ||
bitmap->mddev->bitmap_info.space > sectors_reserved)
bitmap->mddev->bitmap_info.space = sectors_reserved;
if (err) {
if (err == 0) {
if (test_bit(BITMAP_STALE, &bitmap->flags))
bitmap->events_cleared = bitmap->mddev->events;
bitmap->mddev->bitmap_info.chunksize = chunksize;
bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
bitmap->mddev->bitmap_info.max_write_behind = write_behind;
bitmap->mddev->bitmap_info.nodes = nodes;
if (bitmap->mddev->bitmap_info.space == 0 ||
bitmap->mddev->bitmap_info.space > sectors_reserved)
bitmap->mddev->bitmap_info.space = sectors_reserved;
} else {
md_bitmap_print_sb(bitmap);
if (bitmap->cluster_slot < 0)
md_cluster_stop(bitmap->mddev);

View File

@ -201,7 +201,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
goto out_err;
}
strlcpy(res->name, name, namelen + 1);
strscpy(res->name, name, namelen + 1);
if (with_lvb) {
res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
if (!res->lksb.sb_lvbptr) {

View File

@ -2627,14 +2627,16 @@ static void sync_sbs(struct mddev *mddev, int nospares)
static bool does_sb_need_changing(struct mddev *mddev)
{
struct md_rdev *rdev;
struct md_rdev *rdev = NULL, *iter;
struct mdp_superblock_1 *sb;
int role;
/* Find a good rdev */
rdev_for_each(rdev, mddev)
if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
rdev_for_each(iter, mddev)
if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
rdev = iter;
break;
}
/* No good device found. */
if (!rdev)
@ -2645,11 +2647,11 @@ static bool does_sb_need_changing(struct mddev *mddev)
rdev_for_each(rdev, mddev) {
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
/* Device activated? */
if (role == 0xffff && rdev->raid_disk >=0 &&
if (role == MD_DISK_ROLE_SPARE && rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags))
return true;
/* Device turned faulty? */
if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
if (test_bit(Faulty, &rdev->flags) && (role < MD_DISK_ROLE_MAX))
return true;
}
@ -2984,10 +2986,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
md_error(rdev->mddev, rdev);
if (test_bit(Faulty, &rdev->flags))
err = 0;
else
if (test_bit(MD_BROKEN, &rdev->mddev->flags))
err = -EBUSY;
else
err = 0;
} else if (cmd_match(buf, "remove")) {
if (rdev->mddev->pers) {
clear_bit(Blocked, &rdev->flags);
@ -4028,7 +4031,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
oldpriv = mddev->private;
mddev->pers = pers;
mddev->private = priv;
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
mddev->level = mddev->new_level;
mddev->layout = mddev->new_layout;
mddev->chunk_sectors = mddev->new_chunk_sectors;
@ -4353,10 +4356,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
* like active, but no writes have been seen for a while (100msec).
*
* broken
* RAID0/LINEAR-only: same as clean, but array is missing a member.
* It's useful because RAID0/LINEAR mounted-arrays aren't stopped
* when a member is gone, so this state will at least alert the
* user that something is wrong.
* Array is failed. It's useful because mounted-arrays aren't stopped
* when array is failed, so this state will at least alert the user that
* something is wrong.
*/
enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
write_pending, active_idle, broken, bad_word};
@ -5763,7 +5765,7 @@ static int add_named_array(const char *val, const struct kernel_param *kp)
len--;
if (len >= DISK_NAME_LEN)
return -E2BIG;
strlcpy(buf, val, len+1);
strscpy(buf, val, len+1);
if (strncmp(buf, "md_", 3) == 0)
return md_alloc(0, buf);
if (strncmp(buf, "md", 2) == 0 &&
@ -5896,7 +5898,7 @@ int md_run(struct mddev *mddev)
mddev->level = pers->level;
mddev->new_level = pers->level;
}
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
if (mddev->reshape_position != MaxSector &&
pers->start_reshape == NULL) {
@ -7443,7 +7445,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
err = -ENODEV;
else {
md_error(mddev, rdev);
if (!test_bit(Faulty, &rdev->flags))
if (test_bit(MD_BROKEN, &mddev->flags))
err = -EBUSY;
}
rcu_read_unlock();
@ -7984,13 +7986,16 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
if (!mddev->pers || !mddev->pers->error_handler)
return;
mddev->pers->error_handler(mddev,rdev);
if (mddev->degraded)
mddev->pers->error_handler(mddev, rdev);
if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
sysfs_notify_dirent_safe(rdev->sysfs_state);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
if (!test_bit(MD_BROKEN, &mddev->flags)) {
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
if (mddev->event_work.func)
queue_work(md_misc_wq, &mddev->event_work);
md_new_event();
@ -9670,7 +9675,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
if (test_bit(Candidate, &rdev2->flags)) {
if (role == 0xfffe) {
if (role == MD_DISK_ROLE_FAULTY) {
pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
md_kick_rdev_from_array(rdev2);
continue;
@ -9683,7 +9688,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
/*
* got activated except reshape is happening.
*/
if (rdev2->raid_disk == -1 && role != 0xffff &&
if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
!(le32_to_cpu(sb->feature_map) &
MD_FEATURE_RESHAPE_ACTIVE)) {
rdev2->saved_raid_disk = role;
@ -9700,7 +9705,8 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
* as faulty. The recovery is performed by the
* one who initiated the error.
*/
if ((role == 0xfffe) || (role == 0xfffd)) {
if (role == MD_DISK_ROLE_FAULTY ||
role == MD_DISK_ROLE_JOURNAL) {
md_error(mddev, rdev2);
clear_bit(Blocked, &rdev2->flags);
}
@ -9790,16 +9796,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
void md_reload_sb(struct mddev *mddev, int nr)
{
struct md_rdev *rdev;
struct md_rdev *rdev = NULL, *iter;
int err;
/* Find the rdev */
rdev_for_each_rcu(rdev, mddev) {
if (rdev->desc_nr == nr)
rdev_for_each_rcu(iter, mddev) {
if (iter->desc_nr == nr) {
rdev = iter;
break;
}
}
if (!rdev || rdev->desc_nr != nr) {
if (!rdev) {
pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
return;
}

View File

@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
struct md_cluster_info;
/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
/**
* enum mddev_flags - md device flags.
* @MD_ARRAY_FIRST_USE: First use of array, needs initialization.
* @MD_CLOSING: If set, we are closing the array, do not open it then.
* @MD_JOURNAL_CLEAN: A raid with journal is already clean.
* @MD_HAS_JOURNAL: The raid array has journal feature set.
* @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
* resync lock, need to release the lock.
* @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
* calls to md_error() will never cause the array to
* become failed.
* @MD_HAS_PPL: The raid array has PPL feature set.
* @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
* @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the metadata
* without taking reconfig_mutex.
* @MD_UPDATING_SB: md_check_recovery is updating the metadata without
* explicitly holding reconfig_mutex.
* @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
* array is ready yet.
* @MD_BROKEN: This is used to stop writes and mark array as failed.
*
* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
*/
enum mddev_flags {
MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
MD_CLOSING, /* If set, we are closing the array, do not open
* it then */
MD_JOURNAL_CLEAN, /* A raid with journal is already clean */
MD_HAS_JOURNAL, /* The raid array has journal feature set */
MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
* already took resync lock, need to
* release the lock */
MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is
* supported as calls to md_error() will
* never cause the array to become failed.
*/
MD_HAS_PPL, /* The raid array has PPL feature set */
MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update
* the metadata without taking reconfig_mutex.
*/
MD_UPDATING_SB, /* md_check_recovery is updating the metadata
* without explicitly holding reconfig_mutex.
*/
MD_NOT_READY, /* do_md_run() is active, so 'array_state'
* must not report that array is ready yet
*/
MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
* I/O in case an array member is gone/failed.
*/
MD_ARRAY_FIRST_USE,
MD_CLOSING,
MD_JOURNAL_CLEAN,
MD_HAS_JOURNAL,
MD_CLUSTER_RESYNC_LOCKED,
MD_FAILFAST_SUPPORTED,
MD_HAS_PPL,
MD_HAS_MULTIPLE_PPLS,
MD_ALLOW_SB_UPDATE,
MD_UPDATING_SB,
MD_NOT_READY,
MD_BROKEN,
};
enum mddev_sb_flags {

View File

@ -128,21 +128,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
pr_debug("md/raid0:%s: FINAL %d zones\n",
mdname(mddev), conf->nr_strip_zones);
if (conf->nr_strip_zones == 1) {
conf->layout = RAID0_ORIG_LAYOUT;
} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = mddev->layout;
} else if (default_layout == RAID0_ORIG_LAYOUT ||
default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = default_layout;
} else {
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
mdname(mddev));
pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
err = -ENOTSUPP;
goto abort;
}
/*
* now since we have the hard sector sizes, we can make sure
* chunk size is a multiple of that sector size
@ -273,6 +258,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
(unsigned long long)smallest->sectors);
}
if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) {
conf->layout = RAID0_ORIG_LAYOUT;
} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = mddev->layout;
} else if (default_layout == RAID0_ORIG_LAYOUT ||
default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = default_layout;
} else {
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
mdname(mddev));
pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
err = -EOPNOTSUPP;
goto abort;
}
pr_debug("md/raid0:%s: done.\n", mdname(mddev));
*private_conf = conf;

View File

@ -1641,30 +1641,39 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
seq_printf(seq, "]");
}
/**
* raid1_error() - RAID1 error handler.
* @mddev: affected md device.
* @rdev: member device to fail.
*
* The routine acknowledges &rdev failure and determines new @mddev state.
* If it failed, then:
* - &MD_BROKEN flag is set in &mddev->flags.
* - recovery is disabled.
* Otherwise, it must be degraded:
* - recovery is interrupted.
* - &mddev->degraded is bumped.
*
* @rdev is marked as &Faulty excluding case when array is failed and
* &mddev->fail_last_dev is off.
*/
static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
{
char b[BDEVNAME_SIZE];
struct r1conf *conf = mddev->private;
unsigned long flags;
/*
* If it is not operational, then we have already marked it as dead
* else if it is the last working disks with "fail_last_dev == false",
* ignore the error, let the next level up know.
* else mark the drive as failed
*/
spin_lock_irqsave(&conf->device_lock, flags);
if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
&& (conf->raid_disks - mddev->degraded) == 1) {
/*
* Don't fail the drive, act as though we were just a
* normal single drive.
* However don't try a recovery from this drive as
* it is very likely to fail.
*/
conf->recovery_disabled = mddev->recovery_disabled;
spin_unlock_irqrestore(&conf->device_lock, flags);
return;
if (test_bit(In_sync, &rdev->flags) &&
(conf->raid_disks - mddev->degraded) == 1) {
set_bit(MD_BROKEN, &mddev->flags);
if (!mddev->fail_last_dev) {
conf->recovery_disabled = mddev->recovery_disabled;
spin_unlock_irqrestore(&conf->device_lock, flags);
return;
}
}
set_bit(Blocked, &rdev->flags);
if (test_and_clear_bit(In_sync, &rdev->flags))

View File

@ -1970,32 +1970,40 @@ static int enough(struct r10conf *conf, int ignore)
_enough(conf, 1, ignore);
}
/**
* raid10_error() - RAID10 error handler.
* @mddev: affected md device.
* @rdev: member device to fail.
*
* The routine acknowledges &rdev failure and determines new @mddev state.
* If it failed, then:
* - &MD_BROKEN flag is set in &mddev->flags.
* Otherwise, it must be degraded:
* - recovery is interrupted.
* - &mddev->degraded is bumped.
* @rdev is marked as &Faulty excluding case when array is failed and
* &mddev->fail_last_dev is off.
*/
static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
{
char b[BDEVNAME_SIZE];
struct r10conf *conf = mddev->private;
unsigned long flags;
/*
* If it is not operational, then we have already marked it as dead
* else if it is the last working disks with "fail_last_dev == false",
* ignore the error, let the next level up know.
* else mark the drive as failed
*/
spin_lock_irqsave(&conf->device_lock, flags);
if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
&& !enough(conf, rdev->raid_disk)) {
/*
* Don't fail the drive, just return an IO error.
*/
spin_unlock_irqrestore(&conf->device_lock, flags);
return;
if (test_bit(In_sync, &rdev->flags) && !enough(conf, rdev->raid_disk)) {
set_bit(MD_BROKEN, &mddev->flags);
if (!mddev->fail_last_dev) {
spin_unlock_irqrestore(&conf->device_lock, flags);
return;
}
}
if (test_and_clear_bit(In_sync, &rdev->flags))
mddev->degraded++;
/*
* If recovery is running, make sure it aborts.
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(Blocked, &rdev->flags);
set_bit(Faulty, &rdev->flags);

View File

@ -883,7 +883,9 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
(unsigned long long)r_sector, dd_idx,
(unsigned long long)sector);
rdev = conf->disks[dd_idx].rdev;
/* Array has not started so rcu dereference is safe */
rdev = rcu_dereference_protected(
conf->disks[dd_idx].rdev, 1);
if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
sector >= rdev->recovery_offset)) {
pr_debug("%s:%*s data member disk %d missing\n",
@ -934,7 +936,10 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
parity_sector = raid5_compute_sector(conf, r_sector_first + i,
0, &disk, &sh);
BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
parity_rdev = conf->disks[sh.pd_idx].rdev;
/* Array has not started so rcu dereference is safe */
parity_rdev = rcu_dereference_protected(
conf->disks[sh.pd_idx].rdev, 1);
BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
pr_debug("%s:%*s write parity at sector %llu, disk %s\n",
@ -1404,7 +1409,9 @@ int ppl_init_log(struct r5conf *conf)
for (i = 0; i < ppl_conf->count; i++) {
struct ppl_log *log = &ppl_conf->child_logs[i];
struct md_rdev *rdev = conf->disks[i].rdev;
/* Array has not started so rcu dereference is safe */
struct md_rdev *rdev =
rcu_dereference_protected(conf->disks[i].rdev, 1);
mutex_init(&log->io_mutex);
spin_lock_init(&log->io_list_lock);

View File

@ -79,18 +79,21 @@ static inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect)
}
static inline void lock_device_hash_lock(struct r5conf *conf, int hash)
__acquires(&conf->device_lock)
{
spin_lock_irq(conf->hash_locks + hash);
spin_lock(&conf->device_lock);
}
static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
__releases(&conf->device_lock)
{
spin_unlock(&conf->device_lock);
spin_unlock_irq(conf->hash_locks + hash);
}
static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
__acquires(&conf->device_lock)
{
int i;
spin_lock_irq(conf->hash_locks);
@ -100,6 +103,7 @@ static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
}
static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
__releases(&conf->device_lock)
{
int i;
spin_unlock(&conf->device_lock);
@ -164,6 +168,7 @@ static bool stripe_is_lowprio(struct stripe_head *sh)
}
static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
__must_hold(&sh->raid_conf->device_lock)
{
struct r5conf *conf = sh->raid_conf;
struct r5worker_group *group;
@ -211,6 +216,7 @@ static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
struct list_head *temp_inactive_list)
__must_hold(&conf->device_lock)
{
int i;
int injournal = 0; /* number of date pages with R5_InJournal */
@ -296,6 +302,7 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
struct list_head *temp_inactive_list)
__must_hold(&conf->device_lock)
{
if (atomic_dec_and_test(&sh->count))
do_release_stripe(conf, sh, temp_inactive_list);
@ -350,9 +357,9 @@ static void release_inactive_stripe_list(struct r5conf *conf,
}
}
/* should hold conf->device_lock already */
static int release_stripe_list(struct r5conf *conf,
struct list_head *temp_inactive_list)
__must_hold(&conf->device_lock)
{
struct stripe_head *sh, *t;
int count = 0;
@ -629,6 +636,10 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
* This is because some failed devices may only affect one
* of the two sections, and some non-in_sync devices may
* be insync in the section most affected by failed devices.
*
* Most calls to this function hold &conf->device_lock. Calls
* in raid5_run() do not require the lock as no other threads
* have been started yet.
*/
int raid5_calc_degraded(struct r5conf *conf)
{
@ -686,17 +697,17 @@ int raid5_calc_degraded(struct r5conf *conf)
return degraded;
}
static int has_failed(struct r5conf *conf)
static bool has_failed(struct r5conf *conf)
{
int degraded;
int degraded = conf->mddev->degraded;
if (conf->mddev->reshape_position == MaxSector)
return conf->mddev->degraded > conf->max_degraded;
if (test_bit(MD_BROKEN, &conf->mddev->flags))
return true;
degraded = raid5_calc_degraded(conf);
if (degraded > conf->max_degraded)
return 1;
return 0;
if (conf->mddev->reshape_position != MaxSector)
degraded = raid5_calc_degraded(conf);
return degraded > conf->max_degraded;
}
struct stripe_head *
@ -2648,6 +2659,28 @@ static void shrink_stripes(struct r5conf *conf)
conf->slab_cache = NULL;
}
/*
* This helper wraps rcu_dereference_protected() and can be used when
* it is known that the nr_pending of the rdev is elevated.
*/
static struct md_rdev *rdev_pend_deref(struct md_rdev __rcu *rdev)
{
return rcu_dereference_protected(rdev,
atomic_read(&rcu_access_pointer(rdev)->nr_pending));
}
/*
* This helper wraps rcu_dereference_protected() and should be used
* when it is known that the mddev_lock() is held. This is safe
* seeing raid5_remove_disk() has the same lock held.
*/
static struct md_rdev *rdev_mdlock_deref(struct mddev *mddev,
struct md_rdev __rcu *rdev)
{
return rcu_dereference_protected(rdev,
lockdep_is_held(&mddev->reconfig_mutex));
}
static void raid5_end_read_request(struct bio * bi)
{
struct stripe_head *sh = bi->bi_private;
@ -2674,9 +2707,9 @@ static void raid5_end_read_request(struct bio * bi)
* In that case it moved down to 'rdev'.
* rdev is not removed until all requests are finished.
*/
rdev = conf->disks[i].replacement;
rdev = rdev_pend_deref(conf->disks[i].replacement);
if (!rdev)
rdev = conf->disks[i].rdev;
rdev = rdev_pend_deref(conf->disks[i].rdev);
if (use_new_offset(conf, sh))
s = sh->sector + rdev->new_data_offset;
@ -2790,11 +2823,11 @@ static void raid5_end_write_request(struct bio *bi)
for (i = 0 ; i < disks; i++) {
if (bi == &sh->dev[i].req) {
rdev = conf->disks[i].rdev;
rdev = rdev_pend_deref(conf->disks[i].rdev);
break;
}
if (bi == &sh->dev[i].rreq) {
rdev = conf->disks[i].replacement;
rdev = rdev_pend_deref(conf->disks[i].replacement);
if (rdev)
replacement = 1;
else
@ -2802,7 +2835,7 @@ static void raid5_end_write_request(struct bio *bi)
* replaced it. rdev is not removed
* until all requests are finished.
*/
rdev = conf->disks[i].rdev;
rdev = rdev_pend_deref(conf->disks[i].rdev);
break;
}
}
@ -2863,34 +2896,31 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
unsigned long flags;
pr_debug("raid456: error called\n");
pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n",
mdname(mddev), bdevname(rdev->bdev, b));
spin_lock_irqsave(&conf->device_lock, flags);
if (test_bit(In_sync, &rdev->flags) &&
mddev->degraded == conf->max_degraded) {
/*
* Don't allow to achieve failed state
* Don't try to recover this device
*/
conf->recovery_disabled = mddev->recovery_disabled;
spin_unlock_irqrestore(&conf->device_lock, flags);
return;
}
set_bit(Faulty, &rdev->flags);
clear_bit(In_sync, &rdev->flags);
mddev->degraded = raid5_calc_degraded(conf);
if (has_failed(conf)) {
set_bit(MD_BROKEN, &conf->mddev->flags);
conf->recovery_disabled = mddev->recovery_disabled;
pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n",
mdname(mddev), mddev->degraded, conf->raid_disks);
} else {
pr_crit("md/raid:%s: Operation continuing on %d devices.\n",
mdname(mddev), conf->raid_disks - mddev->degraded);
}
spin_unlock_irqrestore(&conf->device_lock, flags);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(Blocked, &rdev->flags);
set_mask_bits(&mddev->sb_flags, 0,
BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n"
"md/raid:%s: Operation continuing on %d devices.\n",
mdname(mddev),
bdevname(rdev->bdev, b),
mdname(mddev),
conf->raid_disks - mddev->degraded);
r5c_update_on_rdev_error(mddev, rdev);
}
@ -5213,23 +5243,23 @@ finish:
struct r5dev *dev = &sh->dev[i];
if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
/* We own a safe reference to the rdev */
rdev = conf->disks[i].rdev;
rdev = rdev_pend_deref(conf->disks[i].rdev);
if (!rdev_set_badblocks(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0))
md_error(conf->mddev, rdev);
rdev_dec_pending(rdev, conf->mddev);
}
if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
rdev = conf->disks[i].rdev;
rdev = rdev_pend_deref(conf->disks[i].rdev);
rdev_clear_badblocks(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0);
rdev_dec_pending(rdev, conf->mddev);
}
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
rdev = conf->disks[i].replacement;
rdev = rdev_pend_deref(conf->disks[i].replacement);
if (!rdev)
/* rdev have been moved down */
rdev = conf->disks[i].rdev;
rdev = rdev_pend_deref(conf->disks[i].rdev);
rdev_clear_badblocks(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0);
rdev_dec_pending(rdev, conf->mddev);
@ -5256,6 +5286,7 @@ finish:
}
static void raid5_activate_delayed(struct r5conf *conf)
__must_hold(&conf->device_lock)
{
if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
while (!list_empty(&conf->delayed_list)) {
@ -5273,9 +5304,9 @@ static void raid5_activate_delayed(struct r5conf *conf)
}
static void activate_bit_delay(struct r5conf *conf,
struct list_head *temp_inactive_list)
struct list_head *temp_inactive_list)
__must_hold(&conf->device_lock)
{
/* device_lock is held */
struct list_head head;
list_add(&head, &conf->bitmap_list);
list_del_init(&conf->bitmap_list);
@ -5500,6 +5531,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
* handle_list.
*/
static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
__must_hold(&conf->device_lock)
{
struct stripe_head *sh, *tmp;
struct list_head *handle_list = NULL;
@ -6288,7 +6320,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
*/
rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev == NULL || test_bit(Faulty, &rdev->flags))
still_degraded = 1;
@ -6371,8 +6403,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio,
static int handle_active_stripes(struct r5conf *conf, int group,
struct r5worker *worker,
struct list_head *temp_inactive_list)
__releases(&conf->device_lock)
__acquires(&conf->device_lock)
__must_hold(&conf->device_lock)
{
struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
int i, batch_size = 0, hash;
@ -7166,7 +7197,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
int i;
int group_cnt;
struct r5worker_group *new_group;
int ret;
int ret = -ENOMEM;
if (mddev->new_level != 5
&& mddev->new_level != 4
@ -7225,6 +7256,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
spin_lock_init(&conf->device_lock);
seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock);
mutex_init(&conf->cache_size_mutex);
init_waitqueue_head(&conf->wait_for_quiescent);
init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_overlap);
@ -7302,11 +7334,13 @@ static struct r5conf *setup_conf(struct mddev *mddev)
conf->level = mddev->new_level;
conf->chunk_sectors = mddev->new_chunk_sectors;
if (raid5_alloc_percpu(conf) != 0)
ret = raid5_alloc_percpu(conf);
if (ret)
goto abort;
pr_debug("raid456: run(%s) called.\n", mdname(mddev));
ret = -EIO;
rdev_for_each(rdev, mddev) {
raid_disk = rdev->raid_disk;
if (raid_disk >= max_disks
@ -7317,11 +7351,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
if (test_bit(Replacement, &rdev->flags)) {
if (disk->replacement)
goto abort;
disk->replacement = rdev;
RCU_INIT_POINTER(disk->replacement, rdev);
} else {
if (disk->rdev)
goto abort;
disk->rdev = rdev;
RCU_INIT_POINTER(disk->rdev, rdev);
}
if (test_bit(In_sync, &rdev->flags)) {
@ -7370,6 +7404,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
if (grow_stripes(conf, conf->min_nr_stripes)) {
pr_warn("md/raid:%s: couldn't allocate %dkB for buffers\n",
mdname(mddev), memory);
ret = -ENOMEM;
goto abort;
} else
pr_debug("md/raid:%s: allocated %dkB\n", mdname(mddev), memory);
@ -7383,7 +7418,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
conf->shrinker.count_objects = raid5_cache_count;
conf->shrinker.batch = 128;
conf->shrinker.flags = 0;
if (register_shrinker(&conf->shrinker)) {
ret = register_shrinker(&conf->shrinker);
if (ret) {
pr_warn("md/raid:%s: couldn't register shrinker.\n",
mdname(mddev));
goto abort;
@ -7394,17 +7430,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
if (!conf->thread) {
pr_warn("md/raid:%s: couldn't allocate thread.\n",
mdname(mddev));
ret = -ENOMEM;
goto abort;
}
return conf;
abort:
if (conf) {
if (conf)
free_conf(conf);
return ERR_PTR(-EIO);
} else
return ERR_PTR(-ENOMEM);
return ERR_PTR(ret);
}
static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
@ -7621,17 +7656,18 @@ static int raid5_run(struct mddev *mddev)
for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
i++) {
rdev = conf->disks[i].rdev;
rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
if (!rdev && conf->disks[i].replacement) {
/* The replacement is all we have yet */
rdev = conf->disks[i].replacement;
rdev = rdev_mdlock_deref(mddev,
conf->disks[i].replacement);
conf->disks[i].replacement = NULL;
clear_bit(Replacement, &rdev->flags);
conf->disks[i].rdev = rdev;
rcu_assign_pointer(conf->disks[i].rdev, rdev);
}
if (!rdev)
continue;
if (conf->disks[i].replacement &&
if (rcu_access_pointer(conf->disks[i].replacement) &&
conf->reshape_progress != MaxSector) {
/* replacements and reshape simply do not mix. */
pr_warn("md: cannot handle concurrent replacement and reshape.\n");
@ -7749,7 +7785,6 @@ static int raid5_run(struct mddev *mddev)
*/
stripe = stripe * PAGE_SIZE;
stripe = roundup_pow_of_two(stripe);
mddev->queue->limits.discard_alignment = stripe;
mddev->queue->limits.discard_granularity = stripe;
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
@ -7828,8 +7863,8 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
static void print_raid5_conf (struct r5conf *conf)
{
struct md_rdev *rdev;
int i;
struct disk_info *tmp;
pr_debug("RAID conf printout:\n");
if (!conf) {
@ -7840,50 +7875,54 @@ static void print_raid5_conf (struct r5conf *conf)
conf->raid_disks,
conf->raid_disks - conf->mddev->degraded);
rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) {
char b[BDEVNAME_SIZE];
tmp = conf->disks + i;
if (tmp->rdev)
rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev)
pr_debug(" disk %d, o:%d, dev:%s\n",
i, !test_bit(Faulty, &tmp->rdev->flags),
bdevname(tmp->rdev->bdev, b));
i, !test_bit(Faulty, &rdev->flags),
bdevname(rdev->bdev, b));
}
rcu_read_unlock();
}
static int raid5_spare_active(struct mddev *mddev)
{
int i;
struct r5conf *conf = mddev->private;
struct disk_info *tmp;
struct md_rdev *rdev, *replacement;
int count = 0;
unsigned long flags;
for (i = 0; i < conf->raid_disks; i++) {
tmp = conf->disks + i;
if (tmp->replacement
&& tmp->replacement->recovery_offset == MaxSector
&& !test_bit(Faulty, &tmp->replacement->flags)
&& !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
replacement = rdev_mdlock_deref(mddev,
conf->disks[i].replacement);
if (replacement
&& replacement->recovery_offset == MaxSector
&& !test_bit(Faulty, &replacement->flags)
&& !test_and_set_bit(In_sync, &replacement->flags)) {
/* Replacement has just become active. */
if (!tmp->rdev
|| !test_and_clear_bit(In_sync, &tmp->rdev->flags))
if (!rdev
|| !test_and_clear_bit(In_sync, &rdev->flags))
count++;
if (tmp->rdev) {
if (rdev) {
/* Replaced device not technically faulty,
* but we need to be sure it gets removed
* and never re-added.
*/
set_bit(Faulty, &tmp->rdev->flags);
set_bit(Faulty, &rdev->flags);
sysfs_notify_dirent_safe(
tmp->rdev->sysfs_state);
rdev->sysfs_state);
}
sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
} else if (tmp->rdev
&& tmp->rdev->recovery_offset == MaxSector
&& !test_bit(Faulty, &tmp->rdev->flags)
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
sysfs_notify_dirent_safe(replacement->sysfs_state);
} else if (rdev
&& rdev->recovery_offset == MaxSector
&& !test_bit(Faulty, &rdev->flags)
&& !test_and_set_bit(In_sync, &rdev->flags)) {
count++;
sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
sysfs_notify_dirent_safe(rdev->sysfs_state);
}
}
spin_lock_irqsave(&conf->device_lock, flags);
@ -7898,8 +7937,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
struct r5conf *conf = mddev->private;
int err = 0;
int number = rdev->raid_disk;
struct md_rdev **rdevp;
struct md_rdev __rcu **rdevp;
struct disk_info *p = conf->disks + number;
struct md_rdev *tmp;
print_raid5_conf(conf);
if (test_bit(Journal, &rdev->flags) && conf->log) {
@ -7917,9 +7957,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
log_exit(conf);
return 0;
}
if (rdev == p->rdev)
if (rdev == rcu_access_pointer(p->rdev))
rdevp = &p->rdev;
else if (rdev == p->replacement)
else if (rdev == rcu_access_pointer(p->replacement))
rdevp = &p->replacement;
else
return 0;
@ -7939,18 +7979,20 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
if (!test_bit(Faulty, &rdev->flags) &&
mddev->recovery_disabled != conf->recovery_disabled &&
!has_failed(conf) &&
(!p->replacement || p->replacement == rdev) &&
(!rcu_access_pointer(p->replacement) ||
rcu_access_pointer(p->replacement) == rdev) &&
number < conf->raid_disks) {
err = -EBUSY;
goto abort;
}
*rdevp = NULL;
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
lockdep_assert_held(&mddev->reconfig_mutex);
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
/* lost the race, try later */
err = -EBUSY;
*rdevp = rdev;
rcu_assign_pointer(*rdevp, rdev);
}
}
if (!err) {
@ -7958,17 +8000,19 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
if (err)
goto abort;
}
if (p->replacement) {
tmp = rcu_access_pointer(p->replacement);
if (tmp) {
/* We must have just cleared 'rdev' */
p->rdev = p->replacement;
clear_bit(Replacement, &p->replacement->flags);
rcu_assign_pointer(p->rdev, tmp);
clear_bit(Replacement, &tmp->flags);
smp_mb(); /* Make sure other CPUs may see both as identical
* but will never see neither - if they are careful
*/
p->replacement = NULL;
rcu_assign_pointer(p->replacement, NULL);
if (!err)
err = log_modify(conf, p->rdev, true);
err = log_modify(conf, tmp, true);
}
clear_bit(WantReplacement, &rdev->flags);
@ -7984,6 +8028,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
int ret, err = -EEXIST;
int disk;
struct disk_info *p;
struct md_rdev *tmp;
int first = 0;
int last = conf->raid_disks - 1;
@ -8041,7 +8086,8 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
}
for (disk = first; disk <= last; disk++) {
p = conf->disks + disk;
if (test_bit(WantReplacement, &p->rdev->flags) &&
tmp = rdev_mdlock_deref(mddev, p->rdev);
if (test_bit(WantReplacement, &tmp->flags) &&
p->replacement == NULL) {
clear_bit(In_sync, &rdev->flags);
set_bit(Replacement, &rdev->flags);
@ -8332,6 +8378,7 @@ static void end_reshape(struct r5conf *conf)
static void raid5_finish_reshape(struct mddev *mddev)
{
struct r5conf *conf = mddev->private;
struct md_rdev *rdev;
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@ -8343,10 +8390,12 @@ static void raid5_finish_reshape(struct mddev *mddev)
for (d = conf->raid_disks ;
d < conf->raid_disks - mddev->delta_disks;
d++) {
struct md_rdev *rdev = conf->disks[d].rdev;
rdev = rdev_mdlock_deref(mddev,
conf->disks[d].rdev);
if (rdev)
clear_bit(In_sync, &rdev->flags);
rdev = conf->disks[d].replacement;
rdev = rdev_mdlock_deref(mddev,
conf->disks[d].replacement);
if (rdev)
clear_bit(In_sync, &rdev->flags);
}

View File

@ -473,7 +473,8 @@ enum {
*/
struct disk_info {
struct md_rdev *rdev, *replacement;
struct md_rdev __rcu *rdev;
struct md_rdev __rcu *replacement;
struct page *extra_page; /* extra page to use in prexor */
};
@ -560,6 +561,16 @@ struct r5pending_data {
struct bio_list bios;
};
struct raid5_percpu {
struct page *spare_page; /* Used when checking P/Q in raid6 */
void *scribble; /* space for constructing buffer
* lists and performing address
* conversions
*/
int scribble_obj_size;
local_lock_t lock;
};
struct r5conf {
struct hlist_head *stripe_hashtbl;
/* only protect corresponding hash list and inactive_list */
@ -635,15 +646,7 @@ struct r5conf {
*/
int recovery_disabled;
/* per cpu variables */
struct raid5_percpu {
struct page *spare_page; /* Used when checking P/Q in raid6 */
void *scribble; /* space for constructing buffer
* lists and performing address
* conversions
*/
int scribble_obj_size;
local_lock_t lock;
} __percpu *percpu;
struct raid5_percpu __percpu *percpu;
int scribble_disks;
int scribble_sectors;
struct hlist_node node;

View File

@ -4,7 +4,6 @@
* Copyright (c) 2022, Oracle and/or its affiliates
*/
#include <linux/blkdev.h>
#include "nvme.h"
#ifdef CONFIG_NVME_VERBOSE_ERRORS
@ -92,6 +91,7 @@ static const char * const nvme_statuses[] = {
[NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected",
[NVME_SC_CMD_INTERRUPTED] = "Command Interrupted",
[NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error",
[NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY] = "Admin Command Media Not Ready",
[NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set",
[NVME_SC_LBA_RANGE] = "LBA Out of Range",
[NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded",
@ -155,10 +155,13 @@ static const char * const nvme_statuses[] = {
[NVME_SC_COMPARE_FAILED] = "Compare Failure",
[NVME_SC_ACCESS_DENIED] = "Access Denied",
[NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block",
[NVME_SC_INTERNAL_PATH_ERROR] = "Internal Pathing Error",
[NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss",
[NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible",
[NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition",
[NVME_SC_CTRL_PATH_ERROR] = "Controller Pathing Error",
[NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error",
[NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command",
};
const unsigned char *nvme_get_error_status_str(u16 status)

View File

@ -1207,6 +1207,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
rq->timeout = ctrl->kato * HZ;
rq->end_io_data = ctrl;
rq->rq_flags |= RQF_QUIET;
blk_execute_rq_nowait(rq, false, nvme_keep_alive_end_io);
}
@ -1426,6 +1427,32 @@ out_free_id:
return error;
}
static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid,
struct nvme_id_ns_cs_indep **id)
{
struct nvme_command c = {
.identify.opcode = nvme_admin_identify,
.identify.nsid = cpu_to_le32(nsid),
.identify.cns = NVME_ID_CNS_NS_CS_INDEP,
};
int ret;
*id = kmalloc(sizeof(**id), GFP_KERNEL);
if (!*id)
return -ENOMEM;
ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
if (ret) {
dev_warn(ctrl->device,
"Identify namespace (CS independent) failed (%d)\n",
ret);
kfree(*id);
return ret;
}
return 0;
}
static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
unsigned int dword11, void *buffer, size_t buflen, u32 *result)
{
@ -1628,13 +1655,15 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
queue->limits.discard_alignment = 0;
queue->limits.discard_granularity = size;
/* If discard is already enabled, don't reset queue limits */
if (queue->limits.max_discard_sectors)
return;
if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX))
ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl);
blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
@ -1771,7 +1800,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
}
blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1);
blk_queue_dma_alignment(q, 7);
blk_queue_dma_alignment(q, 3);
blk_queue_write_cache(q, vwc, vwc);
}
@ -2100,10 +2129,9 @@ static const struct block_device_operations nvme_bdev_ops = {
.pr_ops = &nvme_pr_ops,
};
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
{
unsigned long timeout =
((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies;
u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
int ret;
@ -2116,7 +2144,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
usleep_range(1000, 2000);
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
if (time_after(jiffies, timeout_jiffies)) {
dev_err(ctrl->device,
"Device not ready; aborting %s, CSTS=0x%x\n",
enabled ? "initialisation" : "reset", csts);
@ -2147,13 +2175,14 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
msleep(NVME_QUIRK_DELAY_AMOUNT);
return nvme_wait_ready(ctrl, ctrl->cap, false);
return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false);
}
EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
{
unsigned dev_page_min;
u32 timeout;
int ret;
ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
@ -2174,6 +2203,27 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
ctrl->ctrl_config = NVME_CC_CSS_CSI;
else
ctrl->ctrl_config = NVME_CC_CSS_NVM;
if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
u32 crto;
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
if (ret) {
dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
ret);
return ret;
}
if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
ctrl->ctrl_config |= NVME_CC_CRIME;
timeout = NVME_CRTO_CRIMT(crto);
} else {
timeout = NVME_CRTO_CRWMT(crto);
}
} else {
timeout = NVME_CAP_TIMEOUT(ctrl->cap);
}
ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
@ -2182,7 +2232,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
if (ret)
return ret;
return nvme_wait_ready(ctrl, ctrl->cap, true);
return nvme_wait_ready(ctrl, timeout, true);
}
EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
@ -2894,8 +2944,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
if (id->dmrl)
ctrl->max_discard_segments = id->dmrl;
if (id->dmrsl)
ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl);
ctrl->dmrsl = le32_to_cpu(id->dmrsl);
if (id->wzsl)
ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
@ -3080,10 +3129,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
if (ret)
return ret;
ret = nvme_init_non_mdts_limits(ctrl);
if (ret < 0)
return ret;
ret = nvme_configure_apst(ctrl);
if (ret < 0)
return ret;
@ -4092,11 +4137,26 @@ out:
static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns_ids ids = { };
struct nvme_id_ns_cs_indep *id;
struct nvme_ns *ns;
bool ready = true;
if (nvme_identify_ns_descs(ctrl, nsid, &ids))
return;
/*
* Check if the namespace is ready. If not ignore it, we will get an
* AEN once it becomes ready and restart the scan.
*/
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) &&
!nvme_identify_ns_cs_indep(ctrl, nsid, &id)) {
ready = id->nstat & NVME_NSTAT_NRDY;
kfree(id);
}
if (!ready)
return;
ns = nvme_find_get_ns(ctrl, nsid);
if (ns) {
nvme_validate_ns(ns, &ids);
@ -4239,11 +4299,26 @@ static void nvme_scan_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl =
container_of(work, struct nvme_ctrl, scan_work);
int ret;
/* No tagset on a live ctrl means IO queues could not created */
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
return;
/*
* Identify controller limits can change at controller reset due to
* new firmware download, even though it is not common we cannot ignore
* such scenario. Controller's non-mdts limits are reported in the unit
* of logical blocks that is dependent on the format of attached
* namespace. Hence re-read the limits at the time of ns allocation.
*/
ret = nvme_init_non_mdts_limits(ctrl);
if (ret < 0) {
dev_warn(ctrl->device,
"reading non-mdts-limits failed: %d\n", ret);
return;
}
if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
dev_info(ctrl->device, "rescanning namespaces.\n");
nvme_clear_changed_ns_log(ctrl);
@ -4841,6 +4916,8 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns_cs_indep) !=
NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);

View File

@ -187,6 +187,14 @@ static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl)
return ctrl->subsys->subnqn;
}
static inline void nvmf_complete_timed_out_request(struct request *rq)
{
if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
blk_mq_complete_request(rq);
}
}
int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);

View File

@ -284,6 +284,7 @@ struct nvme_ctrl {
#endif
u16 crdt[3];
u16 oncs;
u32 dmrsl;
u16 oacs;
u16 sqsize;
u32 max_namespaces;

View File

@ -1439,6 +1439,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
nvme_init_request(abort_req, &cmd);
abort_req->end_io_data = NULL;
abort_req->rq_flags |= RQF_QUIET;
blk_execute_rq_nowait(abort_req, false, abort_endio);
/*
@ -1775,6 +1776,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
if (IS_ERR(dev->ctrl.admin_q)) {
blk_mq_free_tag_set(&dev->admin_tagset);
dev->ctrl.admin_q = NULL;
return -ENOMEM;
}
if (!blk_get_queue(dev->ctrl.admin_q)) {
@ -2486,6 +2488,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
req->end_io_data = nvmeq;
init_completion(&nvmeq->delete_done);
req->rq_flags |= RQF_QUIET;
blk_execute_rq_nowait(req, false, opcode == nvme_admin_delete_cq ?
nvme_del_cq_end : nvme_del_queue_end);
return 0;
@ -2675,7 +2678,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
struct pci_dev *pdev = to_pci_dev(dev->dev);
mutex_lock(&dev->shutdown_lock);
if (pci_is_enabled(pdev)) {
if (pci_device_is_present(pdev) && pci_is_enabled(pdev)) {
u32 csts = readl(dev->bar + NVME_REG_CSTS);
if (dev->ctrl.state == NVME_CTRL_LIVE ||

View File

@ -2010,10 +2010,7 @@ static void nvme_rdma_complete_timed_out(struct request *rq)
struct nvme_rdma_queue *queue = req->queue;
nvme_rdma_stop_queue(queue);
if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
blk_mq_complete_request(rq);
}
nvmf_complete_timed_out_request(rq);
}
static enum blk_eh_timer_return

View File

@ -2318,10 +2318,7 @@ static void nvme_tcp_complete_timed_out(struct request *rq)
struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
blk_mq_complete_request(rq);
}
nvmf_complete_timed_out_request(rq);
}
static enum blk_eh_timer_return

View File

@ -782,7 +782,6 @@ static void dasd_fba_setup_blk_queue(struct dasd_block *block)
blk_queue_segment_boundary(q, PAGE_SIZE - 1);
q->limits.discard_granularity = logical_block_size;
q->limits.discard_alignment = PAGE_SIZE;
/* Calculate max_discard_sectors and make it PAGE aligned */
max_bytes = USHRT_MAX * logical_block_size;

View File

@ -44,7 +44,7 @@ struct block_device {
unsigned long bd_stamp;
bool bd_read_only; /* read-only policy */
dev_t bd_dev;
int bd_openers;
atomic_t bd_openers;
struct inode * bd_inode; /* will die */
struct super_block * bd_super;
void * bd_claiming;

View File

@ -176,6 +176,21 @@ static inline bool disk_live(struct gendisk *disk)
return !inode_unhashed(disk->part0->bd_inode);
}
/**
* disk_openers - returns how many openers are there for a disk
* @disk: disk to check
*
* This returns the number of openers for a disk. Note that this value is only
* stable if disk->open_mutex is held.
*
* Note: Due to a quirk in the block layer open code, each open partition is
* only counted once even if there are multiple openers.
*/
static inline unsigned int disk_openers(struct gendisk *disk)
{
return atomic_read(&disk->part0->bd_openers);
}
/*
* The gendisk is refcounted by the part0 block_device, and the bd_device
* therein is also used for device model presentation in sysfs.

View File

@ -137,6 +137,7 @@ enum {
NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory
* Space Control
*/
NVME_REG_CRTO = 0x0068, /* Controller Ready Timeouts */
NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */
NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */
NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */
@ -161,6 +162,9 @@ enum {
#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
#define NVME_CRTO_CRIMT(crto) ((crto) >> 16)
#define NVME_CRTO_CRWMT(crto) ((crto) & 0xffff)
enum {
NVME_CMBSZ_SQS = 1 << 0,
NVME_CMBSZ_CQS = 1 << 1,
@ -204,8 +208,10 @@ enum {
NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
NVME_CAP_CSS_NVM = 1 << 0,
NVME_CAP_CSS_CSI = 1 << 6,
NVME_CC_CRIME = 1 << 24,
};
enum {
NVME_CSTS_RDY = 1 << 0,
NVME_CSTS_CFS = 1 << 1,
NVME_CSTS_NSSRO = 1 << 4,
@ -214,10 +220,23 @@ enum {
NVME_CSTS_SHST_OCCUR = 1 << 2,
NVME_CSTS_SHST_CMPLT = 2 << 2,
NVME_CSTS_SHST_MASK = 3 << 2,
};
enum {
NVME_CMBMSC_CRE = 1 << 0,
NVME_CMBMSC_CMSE = 1 << 1,
};
enum {
NVME_CAP_CSS_NVM = 1 << 0,
NVME_CAP_CSS_CSI = 1 << 6,
};
enum {
NVME_CAP_CRMS_CRIMS = 1ULL << 59,
NVME_CAP_CRMS_CRWMS = 1ULL << 60,
};
struct nvme_id_power_state {
__le16 max_power; /* centiwatts */
__u8 rsvd2;
@ -405,6 +424,21 @@ struct nvme_id_ns {
__u8 vs[3712];
};
/* I/O Command Set Independent Identify Namespace Data Structure */
struct nvme_id_ns_cs_indep {
__u8 nsfeat;
__u8 nmic;
__u8 rescap;
__u8 fpi;
__le32 anagrpid;
__u8 nsattr;
__u8 rsvd9;
__le16 nvmsetid;
__le16 endgid;
__u8 nstat;
__u8 rsvd15[4081];
};
struct nvme_zns_lbafe {
__le64 zsze;
__u8 zdes;
@ -469,6 +503,7 @@ enum {
NVME_ID_CNS_NS_DESC_LIST = 0x03,
NVME_ID_CNS_CS_NS = 0x05,
NVME_ID_CNS_CS_CTRL = 0x06,
NVME_ID_CNS_NS_CS_INDEP = 0x08,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11,
NVME_ID_CNS_CTRL_NS_LIST = 0x12,
@ -522,6 +557,10 @@ enum {
NVME_NS_DPS_PI_TYPE3 = 3,
};
enum {
NVME_NSTAT_NRDY = 1 << 0,
};
enum {
NVME_NVM_NS_16B_GUARD = 0,
NVME_NVM_NS_32B_GUARD = 1,
@ -1583,6 +1622,7 @@ enum {
NVME_SC_NS_WRITE_PROTECTED = 0x20,
NVME_SC_CMD_INTERRUPTED = 0x21,
NVME_SC_TRANSIENT_TR_ERR = 0x22,
NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY = 0x24,
NVME_SC_INVALID_IO_CMD_SET = 0x2C,
NVME_SC_LBA_RANGE = 0x80,
@ -1679,9 +1719,11 @@ enum {
/*
* Path-related Errors:
*/
NVME_SC_INTERNAL_PATH_ERROR = 0x300,
NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
NVME_SC_ANA_INACCESSIBLE = 0x302,
NVME_SC_ANA_TRANSITION = 0x303,
NVME_SC_CTRL_PATH_ERROR = 0x360,
NVME_SC_HOST_PATH_ERROR = 0x370,
NVME_SC_HOST_ABORTED_CMD = 0x371,

View File

@ -1,11 +1,6 @@
/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */
/*
* include/linux/loop.h
*
* Written by Theodore Ts'o, 3/29/93.
*
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
* permitted under the GNU General Public License.
* Copyright 1993 by Theodore Ts'o.
*/
#ifndef _UAPI_LINUX_LOOP_H
#define _UAPI_LINUX_LOOP_H