nvme updates for Linux 6.9

- Make an informative message less ominous (Keith)
  - Enhanced trace decoding (Guixin)
  - TCP updates (Hannes, Li)
  - Fabrics connect deadlock fix (Chunguang)
  - Platform API migration update (Uwe)
  - A new device quirk (Jiawei)
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEE3Fbyvv+648XNRdHTPe3zGtjzRgkFAmX8eFoACgkQPe3zGtjz
 RglB1A//Tw/U2aXAMSe6sX01vL6u3uCY6moXz/C7taa3tkn6TeFDg905jBDbX89E
 HCaQKBpppG9SfQg/CS5pkTmUboW0kDjDKUUwK5IOUrVbaPC9JQ4FhJ4dqoTvd8I+
 CgsUpuF6HWQaSWr+2atfJLDSklGCVJrvs2YxycCpYaaHaBeBkc8dwRk+ec1RlePW
 m2Kq4volZ8mVQ4FXu21YfH281Hw9gndqWMXPFMiv7U2zrVB9OYP9Z3BqLTq2cUa5
 ce/4CY7nXv/kEddnuLYTBKUz/ymnXvD+FE8Ibzt1YH8laC96Juj9WUZmRy/okzAZ
 V/BpGriYzzCANVNyQxNmp9okskBdGXuhpefwOXZ17jbRUrfmNo5yU/TuxM8nvG9P
 ikR5qZSaOq3cELDAwtVOHkq6dyhimxW4HNVr2fIY0KEVgqL1ph4zSVckMGiW4/Dq
 FbIB2fTc4sVjZmhHuuevYoelAulHnlMxX8RNrk5us6AT4f+1hYcqdxCCR3IsX/Jw
 GdUYPz/EB8HnSmxCGEWPyI2Ldf+Id9xFUOMpsoEWz+637TRGdNv1ADfBeUUnBoWz
 DBEBo8YMcYeKgv0ohZql5B5g3Bl9GfyRCcDua/uubwobrm/tzGfHamE4LYp5NeQY
 fqt0F8WmAG0Dey/6whSo+29dhwYnCpHl7xQLNhz6wMhqlXUdQm0=
 =Cntu
 -----END PGP SIGNATURE-----

Merge tag 'nvme-6.9-2024-03-21' of git://git.infradead.org/nvme into block-6.9

Pull NVMe fixes from Keith:

"nvme updates for Linux 6.9

 - Make an informative message less ominous (Keith)
 - Enhanced trace decoding (Guixin)
 - TCP updates (Hannes, Li)
 - Fabrics connect deadlock fix (Chunguang)
 - Platform API migration update (Uwe)
 - A new device quirk (Jiawei)"

* tag 'nvme-6.9-2024-03-21' of git://git.infradead.org/nvme:
  nvmet-rdma: remove NVMET_RDMA_REQ_INVALIDATE_RKEY flag
  nvme: remove redundant BUILD_BUG_ON check
  nvme/tcp: Add wq_unbound modparam for nvme_tcp_wq
  nvme-tcp: Export the nvme_tcp_wq to sysfs
  drivers/nvme: Add quirks for device 126f:2262
  nvme: parse format command's lbafu when tracing
  nvme: add tracing of reservation commands
  nvme: parse zns command's zsa and zrasf to string
  nvme: use nvme_disk_is_ns_head helper
  nvme: fix reconnection fail due to reserved tag allocation
  nvmet: add tracing of zns commands
  nvmet: add tracing of authentication commands
  nvme-apple: Convert to platform remove callback returning void
  nvmet-tcp: do not continue for invalid icreq
  nvme: change shutdown timeout setting message
This commit is contained in:
Jens Axboe 2024-03-21 13:23:07 -06:00
commit 0760267809
11 changed files with 233 additions and 33 deletions

View File

@ -1532,7 +1532,7 @@ put_dev:
return ret;
}
static int apple_nvme_remove(struct platform_device *pdev)
static void apple_nvme_remove(struct platform_device *pdev)
{
struct apple_nvme *anv = platform_get_drvdata(pdev);
@ -1547,8 +1547,6 @@ static int apple_nvme_remove(struct platform_device *pdev)
apple_rtkit_shutdown(anv->rtk);
apple_nvme_detach_genpd(anv);
return 0;
}
static void apple_nvme_shutdown(struct platform_device *pdev)
@ -1598,7 +1596,7 @@ static struct platform_driver apple_nvme_driver = {
.pm = pm_sleep_ptr(&apple_nvme_pm_ops),
},
.probe = apple_nvme_probe,
.remove = apple_nvme_remove,
.remove_new = apple_nvme_remove,
.shutdown = apple_nvme_shutdown,
};
module_platform_driver(apple_nvme_driver);

View File

@ -1807,9 +1807,6 @@ static void nvme_config_discard(struct nvme_ns *ns, struct queue_limits *lim)
{
struct nvme_ctrl *ctrl = ns->ctrl;
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX))
lim->max_hw_discard_sectors =
nvme_lba_to_sect(ns->head, ctrl->dmrsl);
@ -3237,7 +3234,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
if (ctrl->shutdown_timeout != shutdown_timeout)
dev_info(ctrl->device,
"Shutdown timeout set to %u seconds\n",
"D3 entry latency set to %u seconds\n",
ctrl->shutdown_timeout);
} else
ctrl->shutdown_timeout = shutdown_timeout;
@ -4391,7 +4388,8 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
set->ops = ops;
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
if (ctrl->ops->flags & NVME_F_FABRICS)
set->reserved_tags = NVMF_RESERVED_TAGS;
/* Reserved for fabric connect and keep alive */
set->reserved_tags = 2;
set->numa_node = ctrl->numa_node;
set->flags = BLK_MQ_F_NO_SCHED;
if (ctrl->ops->flags & NVME_F_BLOCKING)
@ -4460,7 +4458,8 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS)
set->reserved_tags = NVME_AQ_DEPTH;
else if (ctrl->ops->flags & NVME_F_FABRICS)
set->reserved_tags = NVMF_RESERVED_TAGS;
/* Reserved for fabric connect */
set->reserved_tags = 1;
set->numa_node = ctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE;
if (ctrl->ops->flags & NVME_F_BLOCKING)

View File

@ -18,13 +18,6 @@
/* default is -1: the fail fast mechanism is disabled */
#define NVMF_DEF_FAIL_FAST_TMO -1
/*
* Reserved one command for internal usage. This command is used for sending
* the connect command, as well as for the keep alive command on the admin
* queue once live.
*/
#define NVMF_RESERVED_TAGS 1
/*
* Define a host as seen by the target. We allocate one at boot, but also
* allow the override it when creating controllers. This is both to provide

View File

@ -3363,6 +3363,9 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_BOGUS_NID, },
{ PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
NVME_QUIRK_BOGUS_NID, },

View File

@ -97,8 +97,7 @@ static int nvme_sc_to_pr_err(int nvme_sc)
static int nvme_send_pr_command(struct block_device *bdev,
struct nvme_command *c, void *data, unsigned int data_len)
{
if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
nvme_disk_is_ns_head(bdev->bd_disk))
if (nvme_disk_is_ns_head(bdev->bd_disk))
return nvme_send_ns_head_pr_command(bdev, c, data, data_len);
return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data,

View File

@ -236,8 +236,7 @@ static ssize_t nuse_show(struct device *dev, struct device_attribute *attr,
struct block_device *bdev = disk->part0;
int ret;
if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
bdev->bd_disk->fops == &nvme_ns_head_ops)
if (nvme_disk_is_ns_head(bdev->bd_disk))
ret = ns_head_update_nuse(head);
else
ret = ns_update_nuse(bdev->bd_disk->private_data);

View File

@ -36,6 +36,14 @@ static int so_priority;
module_param(so_priority, int, 0644);
MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
/*
* Use the unbound workqueue for nvme_tcp_wq, then we can set the cpu affinity
* from sysfs.
*/
static bool wq_unbound;
module_param(wq_unbound, bool, 0644);
MODULE_PARM_DESC(wq_unbound, "Use unbound workqueue for nvme-tcp IO context (default false)");
/*
* TLS handshake timeout
*/
@ -1546,7 +1554,10 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
else if (nvme_tcp_poll_queue(queue))
n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
ctrl->io_queues[HCTX_TYPE_READ] - 1;
queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
if (wq_unbound)
queue->io_cpu = WORK_CPU_UNBOUND;
else
queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
}
static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid)
@ -2785,6 +2796,8 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
static int __init nvme_tcp_init_module(void)
{
unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24);
@ -2794,8 +2807,10 @@ static int __init nvme_tcp_init_module(void)
BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128);
BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24);
nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq",
WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
if (wq_unbound)
wq_flags |= WQ_UNBOUND;
nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0);
if (!nvme_tcp_wq)
return -ENOMEM;

View File

@ -119,7 +119,10 @@ static const char *nvme_trace_get_lba_status(struct trace_seq *p,
static const char *nvme_trace_admin_format_nvm(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
u8 lbaf = cdw10[0] & 0xF;
/*
* lbafu(bit 13:12) is already in the upper 4 bits, lbafl: bit 03:00.
*/
u8 lbaf = (cdw10[1] & 0x30) | (cdw10[0] & 0xF);
u8 mset = (cdw10[0] >> 4) & 0x1;
u8 pi = (cdw10[0] >> 5) & 0x7;
u8 pil = cdw10[1] & 0x1;
@ -164,12 +167,27 @@ static const char *nvme_trace_dsm(struct trace_seq *p, u8 *cdw10)
static const char *nvme_trace_zone_mgmt_send(struct trace_seq *p, u8 *cdw10)
{
static const char * const zsa_strs[] = {
[0x01] = "close zone",
[0x02] = "finish zone",
[0x03] = "open zone",
[0x04] = "reset zone",
[0x05] = "offline zone",
[0x10] = "set zone descriptor extension"
};
const char *ret = trace_seq_buffer_ptr(p);
u64 slba = get_unaligned_le64(cdw10);
const char *zsa_str;
u8 zsa = cdw10[12];
u8 all = cdw10[13];
trace_seq_printf(p, "slba=%llu, zsa=%u, all=%u", slba, zsa, all);
if (zsa < ARRAY_SIZE(zsa_strs) && zsa_strs[zsa])
zsa_str = zsa_strs[zsa];
else
zsa_str = "reserved";
trace_seq_printf(p, "slba=%llu, zsa=%u:%s, all=%u",
slba, zsa, zsa_str, all);
trace_seq_putc(p, 0);
return ret;
@ -177,15 +195,86 @@ static const char *nvme_trace_zone_mgmt_send(struct trace_seq *p, u8 *cdw10)
static const char *nvme_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10)
{
static const char * const zrasf_strs[] = {
[0x00] = "list all zones",
[0x01] = "list the zones in the ZSE: Empty state",
[0x02] = "list the zones in the ZSIO: Implicitly Opened state",
[0x03] = "list the zones in the ZSEO: Explicitly Opened state",
[0x04] = "list the zones in the ZSC: Closed state",
[0x05] = "list the zones in the ZSF: Full state",
[0x06] = "list the zones in the ZSRO: Read Only state",
[0x07] = "list the zones in the ZSO: Offline state",
[0x09] = "list the zones that have the zone attribute"
};
const char *ret = trace_seq_buffer_ptr(p);
u64 slba = get_unaligned_le64(cdw10);
u32 numd = get_unaligned_le32(cdw10 + 8);
u8 zra = cdw10[12];
u8 zrasf = cdw10[13];
const char *zrasf_str;
u8 pr = cdw10[14];
trace_seq_printf(p, "slba=%llu, numd=%u, zra=%u, zrasf=%u, pr=%u",
slba, numd, zra, zrasf, pr);
if (zrasf < ARRAY_SIZE(zrasf_strs) && zrasf_strs[zrasf])
zrasf_str = zrasf_strs[zrasf];
else
zrasf_str = "reserved";
trace_seq_printf(p, "slba=%llu, numd=%u, zra=%u, zrasf=%u:%s, pr=%u",
slba, numd, zra, zrasf, zrasf_str, pr);
trace_seq_putc(p, 0);
return ret;
}
static const char *nvme_trace_resv_reg(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
u8 rrega = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 ptpl = (cdw10[3] >> 6) & 0x3;
trace_seq_printf(p, "rrega=%u, iekey=%u, ptpl=%u",
rrega, iekey, ptpl);
trace_seq_putc(p, 0);
return ret;
}
static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
u8 racqa = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 rtype = cdw10[1];
trace_seq_printf(p, "racqa=%u, iekey=%u, rtype=%u",
racqa, iekey, rtype);
trace_seq_putc(p, 0);
return ret;
}
static const char *nvme_trace_resv_rel(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
u8 rrela = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 rtype = cdw10[1];
trace_seq_printf(p, "rrela=%u, iekey=%u, rtype=%u",
rrela, iekey, rtype);
trace_seq_putc(p, 0);
return ret;
}
static const char *nvme_trace_resv_report(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
u32 numd = get_unaligned_le32(cdw10);
u8 eds = cdw10[4] & 0x1;
trace_seq_printf(p, "numd=%u, eds=%u", numd, eds);
trace_seq_putc(p, 0);
return ret;
@ -243,6 +332,14 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p,
return nvme_trace_zone_mgmt_send(p, cdw10);
case nvme_cmd_zone_mgmt_recv:
return nvme_trace_zone_mgmt_recv(p, cdw10);
case nvme_cmd_resv_register:
return nvme_trace_resv_reg(p, cdw10);
case nvme_cmd_resv_acquire:
return nvme_trace_resv_acq(p, cdw10);
case nvme_cmd_resv_release:
return nvme_trace_resv_rel(p, cdw10);
case nvme_cmd_resv_report:
return nvme_trace_resv_report(p, cdw10);
default:
return nvme_trace_common(p, cdw10);
}

View File

@ -53,7 +53,6 @@ struct nvmet_rdma_cmd {
enum {
NVMET_RDMA_REQ_INLINE_DATA = (1 << 0),
NVMET_RDMA_REQ_INVALIDATE_RKEY = (1 << 1),
};
struct nvmet_rdma_rsp {
@ -722,7 +721,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
struct ib_send_wr *first_wr;
if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) {
if (rsp->invalidate_rkey) {
rsp->send_wr.opcode = IB_WR_SEND_WITH_INV;
rsp->send_wr.ex.invalidate_rkey = rsp->invalidate_rkey;
} else {
@ -905,10 +904,8 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
goto error_out;
rsp->n_rdma += ret;
if (invalidate) {
if (invalidate)
rsp->invalidate_rkey = key;
rsp->flags |= NVMET_RDMA_REQ_INVALIDATE_RKEY;
}
return 0;
@ -1047,6 +1044,7 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
rsp->req.cmd = cmd->nvme_cmd;
rsp->req.port = queue->port;
rsp->n_rdma = 0;
rsp->invalidate_rkey = 0;
if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) {
unsigned long flags;

View File

@ -898,6 +898,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
pr_err("bad nvme-tcp pdu length (%d)\n",
le32_to_cpu(icreq->hdr.plen));
nvmet_tcp_fatal_error(queue);
return -EPROTO;
}
if (icreq->pfv != NVME_TCP_PFV_1_0) {

View File

@ -119,6 +119,67 @@ const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p,
}
}
static const char *nvmet_trace_zone_mgmt_send(struct trace_seq *p, u8 *cdw10)
{
static const char * const zsa_strs[] = {
[0x01] = "close zone",
[0x02] = "finish zone",
[0x03] = "open zone",
[0x04] = "reset zone",
[0x05] = "offline zone",
[0x10] = "set zone descriptor extension"
};
const char *ret = trace_seq_buffer_ptr(p);
u64 slba = get_unaligned_le64(cdw10);
const char *zsa_str;
u8 zsa = cdw10[12];
u8 all = cdw10[13];
if (zsa < ARRAY_SIZE(zsa_strs) && zsa_strs[zsa])
zsa_str = zsa_strs[zsa];
else
zsa_str = "reserved";
trace_seq_printf(p, "slba=%llu, zsa=%u:%s, all=%u",
slba, zsa, zsa_str, all);
trace_seq_putc(p, 0);
return ret;
}
static const char *nvmet_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10)
{
static const char * const zrasf_strs[] = {
[0x00] = "list all zones",
[0x01] = "list the zones in the ZSE: Empty state",
[0x02] = "list the zones in the ZSIO: Implicitly Opened state",
[0x03] = "list the zones in the ZSEO: Explicitly Opened state",
[0x04] = "list the zones in the ZSC: Closed state",
[0x05] = "list the zones in the ZSF: Full state",
[0x06] = "list the zones in the ZSRO: Read Only state",
[0x07] = "list the zones in the ZSO: Offline state",
[0x09] = "list the zones that have the zone attribute"
};
const char *ret = trace_seq_buffer_ptr(p);
u64 slba = get_unaligned_le64(cdw10);
u32 numd = get_unaligned_le32(&cdw10[8]);
u8 zra = cdw10[12];
u8 zrasf = cdw10[13];
const char *zrasf_str;
u8 pr = cdw10[14];
if (zrasf < ARRAY_SIZE(zrasf_strs) && zrasf_strs[zrasf])
zrasf_str = zrasf_strs[zrasf];
else
zrasf_str = "reserved";
trace_seq_printf(p, "slba=%llu, numd=%u, zra=%u, zrasf=%u:%s, pr=%u",
slba, numd, zra, zrasf, zrasf_str, pr);
trace_seq_putc(p, 0);
return ret;
}
const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p,
u8 opcode, u8 *cdw10)
{
@ -126,9 +187,14 @@ const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p,
case nvme_cmd_read:
case nvme_cmd_write:
case nvme_cmd_write_zeroes:
case nvme_cmd_zone_append:
return nvmet_trace_read_write(p, cdw10);
case nvme_cmd_dsm:
return nvmet_trace_dsm(p, cdw10);
case nvme_cmd_zone_mgmt_send:
return nvmet_trace_zone_mgmt_send(p, cdw10);
case nvme_cmd_zone_mgmt_recv:
return nvmet_trace_zone_mgmt_recv(p, cdw10);
default:
return nvmet_trace_common(p, cdw10);
}
@ -176,6 +242,34 @@ static const char *nvmet_trace_fabrics_property_get(struct trace_seq *p,
return ret;
}
static const char *nvmet_trace_fabrics_auth_send(struct trace_seq *p, u8 *spc)
{
const char *ret = trace_seq_buffer_ptr(p);
u8 spsp0 = spc[1];
u8 spsp1 = spc[2];
u8 secp = spc[3];
u32 tl = get_unaligned_le32(spc + 4);
trace_seq_printf(p, "spsp0=%02x, spsp1=%02x, secp=%02x, tl=%u",
spsp0, spsp1, secp, tl);
trace_seq_putc(p, 0);
return ret;
}
static const char *nvmet_trace_fabrics_auth_receive(struct trace_seq *p, u8 *spc)
{
const char *ret = trace_seq_buffer_ptr(p);
u8 spsp0 = spc[1];
u8 spsp1 = spc[2];
u8 secp = spc[3];
u32 al = get_unaligned_le32(spc + 4);
trace_seq_printf(p, "spsp0=%02x, spsp1=%02x, secp=%02x, al=%u",
spsp0, spsp1, secp, al);
trace_seq_putc(p, 0);
return ret;
}
static const char *nvmet_trace_fabrics_common(struct trace_seq *p, u8 *spc)
{
const char *ret = trace_seq_buffer_ptr(p);
@ -195,6 +289,10 @@ const char *nvmet_trace_parse_fabrics_cmd(struct trace_seq *p,
return nvmet_trace_fabrics_connect(p, spc);
case nvme_fabrics_type_property_get:
return nvmet_trace_fabrics_property_get(p, spc);
case nvme_fabrics_type_auth_send:
return nvmet_trace_fabrics_auth_send(p, spc);
case nvme_fabrics_type_auth_receive:
return nvmet_trace_fabrics_auth_receive(p, spc);
default:
return nvmet_trace_fabrics_common(p, spc);
}