nvme updates for 5.10

- fix keep alive timer modification (Amit Engel)
  - order the PCI ID list more sensibly (Andy Shevchenko)
  - cleanup the open by controller helper (Chaitanya Kulkarni)
  - use an xarray for th CSE log lookup (Chaitanya Kulkarni)
  - support ZNS in nvmet passthrough mode (Chaitanya Kulkarni)
  - fix nvme_ns_report_zones (me)
  - add a sanity check to nvmet-fc (James Smart)
  - fix interrupt allocation when too many polled queues are specified
    (Jeffle Xu)
  - small nvmet-tcp optimization (Mark Wunderlich)
 -----BEGIN PGP SIGNATURE-----
 
 iQI/BAABCgApFiEEgdbnc3r/njty3Iq9D55TZVIEUYMFAl9wPVALHGhjaEBsc3Qu
 ZGUACgkQD55TZVIEUYP/BQ//XcnwDXYly0aNHcLCi/sVpVcRVKtuFrbRAi28WOQV
 OVSY0WSyM+JIFvHYI8LLLLqSE9n7mIkwhjjKHFGaU0F7qafO1jrRibgZcHYcOaga
 mzaHfo3O/5Ag3Mid7cwjhgjX7IIQhNAYbCU8VEfz2qOy6AZPf/1/hH3gekyM+wSK
 Qwfxj9MARNp6N8lYoVeU7xK8y/tW3PoVpKLB/B8b0RlU39xgRUWgaJtiCzmfciRq
 OMBPBPePlgynPFFIiXiOh4iaNJ8GPCCkd+kqZhF1Orcc9AGPDUHww6yooendHjCf
 zlKtMNiHn2tGVIivkTXmX6n5JUo5OQPuaUh0/Mdr8FyxH+elNR4DM/OqsGNY+oJx
 z0la6OASIq/TZzbrcV2icuAL8ppRoQLq9+HVIyt9u5uajsP9Noug8HtoTyvE8LdJ
 fu69fi9is89w+6dIIKenXKjJTObv0qA7ILCDHbaTAUOKq+FnFkpZcgBb2Wo9eS94
 3+Mx/vpGGJQV0FdWKjF8yOZFSGyAuifsqw4Fnbp5c6gRgTLZz/awUP0QaYMULs79
 HH+EiZ7kmJU+TTjtA+M2ao62L+/hE0/CzABX0GZTt5eRInlOu4DpuSqtV/o0w2YT
 22Ru1f0tt+N/q2sm64XNtj5nL9DcPtny3SGMJtlG9RdE5aR8W/bBoViYzvxsjkf1
 RVk=
 =Jw5w
 -----END PGP SIGNATURE-----

Merge tag 'nvme-5.10-2020-09-27' of git://git.infradead.org/nvme into for-5.10/drivers

Pull NVMe updates from Christoph:

"nvme updates for 5.10

 - fix keep alive timer modification (Amit Engel)
 - order the PCI ID list more sensibly (Andy Shevchenko)
 - cleanup the open by controller helper (Chaitanya Kulkarni)
 - use an xarray for th CSE log lookup (Chaitanya Kulkarni)
 - support ZNS in nvmet passthrough mode (Chaitanya Kulkarni)
 - fix nvme_ns_report_zones (me)
 - add a sanity check to nvmet-fc (James Smart)
 - fix interrupt allocation when too many polled queues are specified
   (Jeffle Xu)
 - small nvmet-tcp optimization (Mark Wunderlich)"

* tag 'nvme-5.10-2020-09-27' of git://git.infradead.org/nvme:
  nvme-pci: allocate separate interrupt for the reserved non-polled I/O queue
  nvme: fix error handling in nvme_ns_report_zones
  nvmet-fc: fix missing check for no hostport struct
  nvmet: add passthru ZNS support
  nvmet: handle keep-alive timer when kato is modified by a set features cmd
  nvmet-tcp: have queue io_work context run on sock incoming cpu
  nvme-pci: Move enumeration by class to be last in the table
  nvme: use an xarray to lookup the Commands Supported and Effects log
  nvme: lift the file open code from nvme_ctrl_get_by_path
This commit is contained in:
Jens Axboe 2020-09-28 09:02:48 -06:00
commit 1ed4211dc1
10 changed files with 94 additions and 118 deletions

View file

@ -3025,26 +3025,10 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
}
static struct nvme_cel *nvme_find_cel(struct nvme_ctrl *ctrl, u8 csi)
{
struct nvme_cel *cel, *ret = NULL;
spin_lock_irq(&ctrl->lock);
list_for_each_entry(cel, &ctrl->cels, entry) {
if (cel->csi == csi) {
ret = cel;
break;
}
}
spin_unlock_irq(&ctrl->lock);
return ret;
}
static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
struct nvme_effects_log **log)
{
struct nvme_cel *cel = nvme_find_cel(ctrl, csi);
struct nvme_cel *cel = xa_load(&ctrl->cels, csi);
int ret;
if (cel)
@ -3062,10 +3046,7 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
}
cel->csi = csi;
spin_lock_irq(&ctrl->lock);
list_add_tail(&cel->entry, &ctrl->cels);
spin_unlock_irq(&ctrl->lock);
xa_store(&ctrl->cels, cel->csi, cel, GFP_KERNEL);
out:
*log = &cel->log;
return 0;
@ -4448,15 +4429,11 @@ static void nvme_free_ctrl(struct device *dev)
struct nvme_ctrl *ctrl =
container_of(dev, struct nvme_ctrl, ctrl_device);
struct nvme_subsystem *subsys = ctrl->subsys;
struct nvme_cel *cel, *next;
if (!subsys || ctrl->instance != subsys->instance)
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
list_for_each_entry_safe(cel, next, &ctrl->cels, entry) {
list_del(&cel->entry);
kfree(cel);
}
xa_destroy(&ctrl->cels);
nvme_mpath_uninit(ctrl);
__free_page(ctrl->discard_page);
@ -4488,7 +4465,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
spin_lock_init(&ctrl->lock);
mutex_init(&ctrl->scan_lock);
INIT_LIST_HEAD(&ctrl->namespaces);
INIT_LIST_HEAD(&ctrl->cels);
xa_init(&ctrl->cels);
init_rwsem(&ctrl->namespaces_rwsem);
ctrl->dev = dev;
ctrl->ops = ops;
@ -4668,28 +4645,13 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_sync_queues);
struct nvme_ctrl *nvme_ctrl_get_by_path(const char *path)
struct nvme_ctrl *nvme_ctrl_from_file(struct file *file)
{
struct nvme_ctrl *ctrl;
struct file *f;
f = filp_open(path, O_RDWR, 0);
if (IS_ERR(f))
return ERR_CAST(f);
if (f->f_op != &nvme_dev_fops) {
ctrl = ERR_PTR(-EINVAL);
goto out_close;
if (file->f_op != &nvme_dev_fops)
return NULL;
return file->private_data;
}
ctrl = f->private_data;
nvme_get_ctrl(ctrl);
out_close:
filp_close(f, NULL);
return ctrl;
}
EXPORT_SYMBOL_NS_GPL(nvme_ctrl_get_by_path, NVME_TARGET_PASSTHRU);
EXPORT_SYMBOL_NS_GPL(nvme_ctrl_from_file, NVME_TARGET_PASSTHRU);
/*
* Check we didn't inadvertently grow the command structure sizes:

View file

@ -300,7 +300,7 @@ struct nvme_ctrl {
unsigned long quirks;
struct nvme_id_power_state psd[32];
struct nvme_effects_log *effects;
struct list_head cels;
struct xarray cels;
struct work_struct scan_work;
struct work_struct async_event_work;
struct delayed_work ka_work;
@ -822,7 +822,7 @@ static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { }
u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u8 opcode);
void nvme_execute_passthru_rq(struct request *rq);
struct nvme_ctrl *nvme_ctrl_get_by_path(const char *path);
struct nvme_ctrl *nvme_ctrl_from_file(struct file *file);
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid);
void nvme_put_ns(struct nvme_ns *ns);

View file

@ -2038,32 +2038,30 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
.calc_sets = nvme_calc_irq_sets,
.priv = dev,
};
unsigned int irq_queues, this_p_queues;
unsigned int irq_queues, poll_queues;
/*
* Poll queues don't need interrupts, but we need at least one IO
* queue left over for non-polled IO.
* Poll queues don't need interrupts, but we need at least one I/O queue
* left over for non-polled I/O.
*/
this_p_queues = dev->nr_poll_queues;
if (this_p_queues >= nr_io_queues) {
this_p_queues = nr_io_queues - 1;
irq_queues = 1;
} else {
irq_queues = nr_io_queues - this_p_queues + 1;
}
dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
poll_queues = min(dev->nr_poll_queues, nr_io_queues - 1);
dev->io_queues[HCTX_TYPE_POLL] = poll_queues;
/* Initialize for the single interrupt case */
/*
* Initialize for the single interrupt case, will be updated in
* nvme_calc_irq_sets().
*/
dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
dev->io_queues[HCTX_TYPE_READ] = 0;
/*
* Some Apple controllers require all queues to use the
* first vector.
* We need interrupts for the admin queue and each non-polled I/O queue,
* but some Apple controllers require all queues to use the first
* vector.
*/
if (dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR)
irq_queues = 1;
if (!(dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR))
irq_queues += (nr_io_queues - poll_queues);
return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues,
PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
}
@ -3187,7 +3185,6 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
.driver_data = NVME_QUIRK_SINGLE_VECTOR },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
@ -3195,6 +3192,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_SINGLE_VECTOR |
NVME_QUIRK_128_BYTES_SQES |
NVME_QUIRK_SHARED_TAGS },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, nvme_id_table);

View file

@ -133,28 +133,6 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
return NULL;
}
static int __nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
struct nvme_zone_report *report,
size_t buflen)
{
struct nvme_command c = { };
int ret;
c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
c.zmr.zra = NVME_ZRA_ZONE_REPORT;
c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
if (ret)
return ret;
return le64_to_cpu(report->nr_zones);
}
static int nvme_zone_parse_entry(struct nvme_ns *ns,
struct nvme_zone_descriptor *entry,
unsigned int idx, report_zones_cb cb,
@ -182,6 +160,7 @@ static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data)
{
struct nvme_zone_report *report;
struct nvme_command c = { };
int ret, zone_idx = 0;
unsigned int nz, i;
size_t buflen;
@ -190,14 +169,26 @@ static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
if (!report)
return -ENOMEM;
c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
c.zmr.zra = NVME_ZRA_ZONE_REPORT;
c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
sector &= ~(ns->zsze - 1);
while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
memset(report, 0, buflen);
ret = __nvme_ns_report_zones(ns, sector, report, buflen);
if (ret < 0)
goto out_free;
nz = min_t(unsigned int, ret, nr_zones);
c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
if (ret) {
if (ret > 0)
ret = -EIO;
goto out_free;
}
nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones);
if (!nz)
break;

View file

@ -727,7 +727,9 @@ u16 nvmet_set_feat_kato(struct nvmet_req *req)
{
u32 val32 = le32_to_cpu(req->cmd->common.cdw11);
nvmet_stop_keep_alive_timer(req->sq->ctrl);
req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
nvmet_start_keep_alive_timer(req->sq->ctrl);
nvmet_set_result(req, req->sq->ctrl->kato);

View file

@ -395,7 +395,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
nvmet_ctrl_fatal_error(ctrl);
}
static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
{
if (unlikely(ctrl->kato == 0))
return;
@ -407,7 +407,7 @@ static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
}
static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
{
if (unlikely(ctrl->kato == 0))
return;

View file

@ -1019,7 +1019,7 @@ static void
nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport)
{
/* if LLDD not implemented, leave as NULL */
if (!hostport->hosthandle)
if (!hostport || !hostport->hosthandle)
return;
nvmet_fc_hostport_put(hostport);

View file

@ -395,6 +395,8 @@ void nvmet_get_feat_async_event(struct nvmet_req *req);
u16 nvmet_set_feat_kato(struct nvmet_req *req);
u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask);
void nvmet_execute_async_event(struct nvmet_req *req);
void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl);
void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl);
u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id);

View file

@ -456,10 +456,26 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
req->execute = nvmet_passthru_execute_cmd;
req->p.use_workqueue = true;
return NVME_SC_SUCCESS;
case NVME_ID_CNS_CS_CTRL:
switch (req->cmd->identify.csi) {
case NVME_CSI_ZNS:
req->execute = nvmet_passthru_execute_cmd;
req->p.use_workqueue = true;
return NVME_SC_SUCCESS;
}
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
case NVME_ID_CNS_NS:
req->execute = nvmet_passthru_execute_cmd;
req->p.use_workqueue = true;
return NVME_SC_SUCCESS;
case NVME_ID_CNS_CS_NS:
switch (req->cmd->identify.csi) {
case NVME_CSI_ZNS:
req->execute = nvmet_passthru_execute_cmd;
req->p.use_workqueue = true;
return NVME_SC_SUCCESS;
}
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
default:
return nvmet_setup_passthru_command(req);
}
@ -474,6 +490,7 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
{
struct nvme_ctrl *ctrl;
struct file *file;
int ret = -EINVAL;
void *old;
@ -488,24 +505,29 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
goto out_unlock;
}
ctrl = nvme_ctrl_get_by_path(subsys->passthru_ctrl_path);
if (IS_ERR(ctrl)) {
ret = PTR_ERR(ctrl);
file = filp_open(subsys->passthru_ctrl_path, O_RDWR, 0);
if (IS_ERR(file)) {
ret = PTR_ERR(file);
goto out_unlock;
}
ctrl = nvme_ctrl_from_file(file);
if (!ctrl) {
pr_err("failed to open nvme controller %s\n",
subsys->passthru_ctrl_path);
goto out_unlock;
goto out_put_file;
}
old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL,
subsys, GFP_KERNEL);
if (xa_is_err(old)) {
ret = xa_err(old);
goto out_put_ctrl;
goto out_put_file;
}
if (old)
goto out_put_ctrl;
goto out_put_file;
subsys->passthru_ctrl = ctrl;
subsys->ver = ctrl->vs;
@ -516,13 +538,12 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
NVME_TERTIARY(subsys->ver));
subsys->ver = NVME_VS(1, 2, 1);
}
nvme_get_ctrl(ctrl);
__module_get(subsys->passthru_ctrl->ops->module);
mutex_unlock(&subsys->lock);
return 0;
ret = 0;
out_put_ctrl:
nvme_put_ctrl(ctrl);
out_put_file:
filp_close(file, NULL);
out_unlock:
mutex_unlock(&subsys->lock);
return ret;

View file

@ -94,7 +94,6 @@ struct nvmet_tcp_queue {
struct socket *sock;
struct nvmet_tcp_port *port;
struct work_struct io_work;
int cpu;
struct nvmet_cq nvme_cq;
struct nvmet_sq nvme_sq;
@ -144,7 +143,6 @@ struct nvmet_tcp_port {
struct work_struct accept_work;
struct nvmet_port *nport;
struct sockaddr_storage addr;
int last_cpu;
void (*data_ready)(struct sock *);
};
@ -219,6 +217,11 @@ static inline void nvmet_tcp_put_cmd(struct nvmet_tcp_cmd *cmd)
list_add_tail(&cmd->entry, &cmd->queue->free_list);
}
static inline int queue_cpu(struct nvmet_tcp_queue *queue)
{
return queue->sock->sk->sk_incoming_cpu;
}
static inline u8 nvmet_tcp_hdgst_len(struct nvmet_tcp_queue *queue)
{
return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0;
@ -506,7 +509,7 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req)
struct nvmet_tcp_queue *queue = cmd->queue;
llist_add(&cmd->lentry, &queue->resp_list);
queue_work_on(cmd->queue->cpu, nvmet_tcp_wq, &cmd->queue->io_work);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work);
}
static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
@ -1223,7 +1226,7 @@ static void nvmet_tcp_io_work(struct work_struct *w)
* We exahusted our budget, requeue our selves
*/
if (pending)
queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
}
static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue,
@ -1383,7 +1386,7 @@ static void nvmet_tcp_data_ready(struct sock *sk)
read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
if (likely(queue))
queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
read_unlock_bh(&sk->sk_callback_lock);
}
@ -1403,7 +1406,7 @@ static void nvmet_tcp_write_space(struct sock *sk)
if (sk_stream_is_writeable(sk)) {
clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
}
out:
read_unlock_bh(&sk->sk_callback_lock);
@ -1512,9 +1515,6 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
if (ret)
goto out_free_connect;
port->last_cpu = cpumask_next_wrap(port->last_cpu,
cpu_online_mask, -1, false);
queue->cpu = port->last_cpu;
nvmet_prepare_receive_pdu(queue);
mutex_lock(&nvmet_tcp_queue_mutex);
@ -1525,7 +1525,7 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
if (ret)
goto out_destroy_sq;
queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
return 0;
out_destroy_sq:
@ -1612,7 +1612,6 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
}
port->nport = nport;
port->last_cpu = -1;
INIT_WORK(&port->accept_work, nvmet_tcp_accept_work);
if (port->nport->inline_data_size < 0)
port->nport->inline_data_size = NVMET_TCP_DEF_INLINE_DATA_SIZE;