net/fungible: Add service module for Fungible drivers

Fungible cards have a number of different PCI functions and thus
different drivers, all of which use a common method to initialize and
interact with the device. This commit adds a library module that
collects these common mechanisms. They mainly deal with device
initialization, setting up and destroying queues, and operating an admin
queue. A subset of the FW interface is also included here.

Signed-off-by: Dimitris Michailidis <dmichail@fungible.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Dimitris Michailidis 2022-02-24 18:58:56 -08:00 committed by David S. Miller
parent e8eb9e3299
commit e1ffcc6681
6 changed files with 2976 additions and 0 deletions

View file

@ -0,0 +1,5 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
obj-$(CONFIG_FUN_CORE) += funcore.o
funcore-y := fun_dev.o fun_queue.o

View file

@ -0,0 +1,843 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
#include <linux/aer.h>
#include <linux/bitmap.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/nvme.h>
#include <linux/pci.h>
#include <linux/wait.h>
#include <linux/sched/signal.h>
#include "fun_queue.h"
#include "fun_dev.h"
#define FUN_ADMIN_CMD_TO_MS 3000
enum {
AQA_ASQS_SHIFT = 0,
AQA_ACQS_SHIFT = 16,
AQA_MIN_QUEUE_SIZE = 2,
AQA_MAX_QUEUE_SIZE = 4096
};
/* context for admin commands */
struct fun_cmd_ctx {
fun_admin_callback_t cb; /* callback to invoke on completion */
void *cb_data; /* user data provided to callback */
int cpu; /* CPU where the cmd's tag was allocated */
};
/* Context for synchronous admin commands. */
struct fun_sync_cmd_ctx {
struct completion compl;
u8 *rsp_buf; /* caller provided response buffer */
unsigned int rsp_len; /* response buffer size */
u8 rsp_status; /* command response status */
};
/* Wait for the CSTS.RDY bit to match @enabled. */
static int fun_wait_ready(struct fun_dev *fdev, bool enabled)
{
unsigned int cap_to = NVME_CAP_TIMEOUT(fdev->cap_reg);
u32 bit = enabled ? NVME_CSTS_RDY : 0;
unsigned long deadline;
deadline = ((cap_to + 1) * HZ / 2) + jiffies; /* CAP.TO is in 500ms */
for (;;) {
u32 csts = readl(fdev->bar + NVME_REG_CSTS);
if (csts == ~0) {
dev_err(fdev->dev, "CSTS register read %#x\n", csts);
return -EIO;
}
if ((csts & NVME_CSTS_RDY) == bit)
return 0;
if (time_is_before_jiffies(deadline))
break;
msleep(100);
}
dev_err(fdev->dev,
"Timed out waiting for device to indicate RDY %u; aborting %s\n",
enabled, enabled ? "initialization" : "reset");
return -ETIMEDOUT;
}
/* Check CSTS and return an error if it is unreadable or has unexpected
* RDY value.
*/
static int fun_check_csts_rdy(struct fun_dev *fdev, unsigned int expected_rdy)
{
u32 csts = readl(fdev->bar + NVME_REG_CSTS);
u32 actual_rdy = csts & NVME_CSTS_RDY;
if (csts == ~0) {
dev_err(fdev->dev, "CSTS register read %#x\n", csts);
return -EIO;
}
if (actual_rdy != expected_rdy) {
dev_err(fdev->dev, "Unexpected CSTS RDY %u\n", actual_rdy);
return -EINVAL;
}
return 0;
}
/* Check that CSTS RDY has the expected value. Then write a new value to the CC
* register and wait for CSTS RDY to match the new CC ENABLE state.
*/
static int fun_update_cc_enable(struct fun_dev *fdev, unsigned int initial_rdy)
{
int rc = fun_check_csts_rdy(fdev, initial_rdy);
if (rc)
return rc;
writel(fdev->cc_reg, fdev->bar + NVME_REG_CC);
return fun_wait_ready(fdev, !!(fdev->cc_reg & NVME_CC_ENABLE));
}
static int fun_disable_ctrl(struct fun_dev *fdev)
{
fdev->cc_reg &= ~(NVME_CC_SHN_MASK | NVME_CC_ENABLE);
return fun_update_cc_enable(fdev, 1);
}
static int fun_enable_ctrl(struct fun_dev *fdev, u32 admin_cqesz_log2,
u32 admin_sqesz_log2)
{
fdev->cc_reg = (admin_cqesz_log2 << NVME_CC_IOCQES_SHIFT) |
(admin_sqesz_log2 << NVME_CC_IOSQES_SHIFT) |
((PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT) |
NVME_CC_ENABLE;
return fun_update_cc_enable(fdev, 0);
}
static int fun_map_bars(struct fun_dev *fdev, const char *name)
{
struct pci_dev *pdev = to_pci_dev(fdev->dev);
int err;
err = pci_request_mem_regions(pdev, name);
if (err) {
dev_err(&pdev->dev,
"Couldn't get PCI memory resources, err %d\n", err);
return err;
}
fdev->bar = pci_ioremap_bar(pdev, 0);
if (!fdev->bar) {
dev_err(&pdev->dev, "Couldn't map BAR 0\n");
pci_release_mem_regions(pdev);
return -ENOMEM;
}
return 0;
}
static void fun_unmap_bars(struct fun_dev *fdev)
{
struct pci_dev *pdev = to_pci_dev(fdev->dev);
if (fdev->bar) {
iounmap(fdev->bar);
fdev->bar = NULL;
pci_release_mem_regions(pdev);
}
}
static int fun_set_dma_masks(struct device *dev)
{
int err;
err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (err)
dev_err(dev, "DMA mask configuration failed, err %d\n", err);
return err;
}
static irqreturn_t fun_admin_irq(int irq, void *data)
{
struct fun_queue *funq = data;
return fun_process_cq(funq, 0) ? IRQ_HANDLED : IRQ_NONE;
}
static void fun_complete_admin_cmd(struct fun_queue *funq, void *data,
void *entry, const struct fun_cqe_info *info)
{
const struct fun_admin_rsp_common *rsp_common = entry;
struct fun_dev *fdev = funq->fdev;
struct fun_cmd_ctx *cmd_ctx;
int cpu;
u16 cid;
if (info->sqhd == cpu_to_be16(0xffff)) {
dev_dbg(fdev->dev, "adminq event");
if (fdev->adminq_cb)
fdev->adminq_cb(fdev, entry);
return;
}
cid = be16_to_cpu(rsp_common->cid);
dev_dbg(fdev->dev, "admin CQE cid %u, op %u, ret %u\n", cid,
rsp_common->op, rsp_common->ret);
cmd_ctx = &fdev->cmd_ctx[cid];
if (cmd_ctx->cpu < 0) {
dev_err(fdev->dev,
"admin CQE with CID=%u, op=%u does not match a pending command\n",
cid, rsp_common->op);
return;
}
if (cmd_ctx->cb)
cmd_ctx->cb(fdev, entry, xchg(&cmd_ctx->cb_data, NULL));
cpu = cmd_ctx->cpu;
cmd_ctx->cpu = -1;
sbitmap_queue_clear(&fdev->admin_sbq, cid, cpu);
}
static int fun_init_cmd_ctx(struct fun_dev *fdev, unsigned int ntags)
{
unsigned int i;
fdev->cmd_ctx = kvcalloc(ntags, sizeof(*fdev->cmd_ctx), GFP_KERNEL);
if (!fdev->cmd_ctx)
return -ENOMEM;
for (i = 0; i < ntags; i++)
fdev->cmd_ctx[i].cpu = -1;
return 0;
}
/* Allocate and enable an admin queue and assign it the first IRQ vector. */
static int fun_enable_admin_queue(struct fun_dev *fdev,
const struct fun_dev_params *areq)
{
struct fun_queue_alloc_req qreq = {
.cqe_size_log2 = areq->cqe_size_log2,
.sqe_size_log2 = areq->sqe_size_log2,
.cq_depth = areq->cq_depth,
.sq_depth = areq->sq_depth,
.rq_depth = areq->rq_depth,
};
unsigned int ntags = areq->sq_depth - 1;
struct fun_queue *funq;
int rc;
if (fdev->admin_q)
return -EEXIST;
if (areq->sq_depth < AQA_MIN_QUEUE_SIZE ||
areq->sq_depth > AQA_MAX_QUEUE_SIZE ||
areq->cq_depth < AQA_MIN_QUEUE_SIZE ||
areq->cq_depth > AQA_MAX_QUEUE_SIZE)
return -EINVAL;
fdev->admin_q = fun_alloc_queue(fdev, 0, &qreq);
if (!fdev->admin_q)
return -ENOMEM;
rc = fun_init_cmd_ctx(fdev, ntags);
if (rc)
goto free_q;
rc = sbitmap_queue_init_node(&fdev->admin_sbq, ntags, -1, false,
GFP_KERNEL, dev_to_node(fdev->dev));
if (rc)
goto free_cmd_ctx;
funq = fdev->admin_q;
funq->cq_vector = 0;
rc = fun_request_irq(funq, dev_name(fdev->dev), fun_admin_irq, funq);
if (rc)
goto free_sbq;
fun_set_cq_callback(funq, fun_complete_admin_cmd, NULL);
fdev->adminq_cb = areq->event_cb;
writel((funq->sq_depth - 1) << AQA_ASQS_SHIFT |
(funq->cq_depth - 1) << AQA_ACQS_SHIFT,
fdev->bar + NVME_REG_AQA);
writeq(funq->sq_dma_addr, fdev->bar + NVME_REG_ASQ);
writeq(funq->cq_dma_addr, fdev->bar + NVME_REG_ACQ);
rc = fun_enable_ctrl(fdev, areq->cqe_size_log2, areq->sqe_size_log2);
if (rc)
goto free_irq;
if (areq->rq_depth) {
rc = fun_create_rq(funq);
if (rc)
goto disable_ctrl;
funq_rq_post(funq);
}
return 0;
disable_ctrl:
fun_disable_ctrl(fdev);
free_irq:
fun_free_irq(funq);
free_sbq:
sbitmap_queue_free(&fdev->admin_sbq);
free_cmd_ctx:
kvfree(fdev->cmd_ctx);
fdev->cmd_ctx = NULL;
free_q:
fun_free_queue(fdev->admin_q);
fdev->admin_q = NULL;
return rc;
}
static void fun_disable_admin_queue(struct fun_dev *fdev)
{
struct fun_queue *admq = fdev->admin_q;
if (!admq)
return;
fun_disable_ctrl(fdev);
fun_free_irq(admq);
__fun_process_cq(admq, 0);
sbitmap_queue_free(&fdev->admin_sbq);
kvfree(fdev->cmd_ctx);
fdev->cmd_ctx = NULL;
fun_free_queue(admq);
fdev->admin_q = NULL;
}
/* Return %true if the admin queue has stopped servicing commands as can be
* detected through registers. This isn't exhaustive and may provide false
* negatives.
*/
static bool fun_adminq_stopped(struct fun_dev *fdev)
{
u32 csts = readl(fdev->bar + NVME_REG_CSTS);
return (csts & (NVME_CSTS_CFS | NVME_CSTS_RDY)) != NVME_CSTS_RDY;
}
static int fun_wait_for_tag(struct fun_dev *fdev, int *cpup)
{
struct sbitmap_queue *sbq = &fdev->admin_sbq;
struct sbq_wait_state *ws = &sbq->ws[0];
DEFINE_SBQ_WAIT(wait);
int tag;
for (;;) {
sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_UNINTERRUPTIBLE);
if (fdev->suppress_cmds) {
tag = -ESHUTDOWN;
break;
}
tag = sbitmap_queue_get(sbq, cpup);
if (tag >= 0)
break;
schedule();
}
sbitmap_finish_wait(sbq, ws, &wait);
return tag;
}
/* Submit an asynchronous admin command. Caller is responsible for implementing
* any waiting or timeout. Upon command completion the callback @cb is called.
*/
int fun_submit_admin_cmd(struct fun_dev *fdev, struct fun_admin_req_common *cmd,
fun_admin_callback_t cb, void *cb_data, bool wait_ok)
{
struct fun_queue *funq = fdev->admin_q;
unsigned int cmdsize = cmd->len8 * 8;
struct fun_cmd_ctx *cmd_ctx;
int tag, cpu, rc = 0;
if (WARN_ON(cmdsize > (1 << funq->sqe_size_log2)))
return -EMSGSIZE;
tag = sbitmap_queue_get(&fdev->admin_sbq, &cpu);
if (tag < 0) {
if (!wait_ok)
return -EAGAIN;
tag = fun_wait_for_tag(fdev, &cpu);
if (tag < 0)
return tag;
}
cmd->cid = cpu_to_be16(tag);
cmd_ctx = &fdev->cmd_ctx[tag];
cmd_ctx->cb = cb;
cmd_ctx->cb_data = cb_data;
spin_lock(&funq->sq_lock);
if (unlikely(fdev->suppress_cmds)) {
rc = -ESHUTDOWN;
sbitmap_queue_clear(&fdev->admin_sbq, tag, cpu);
} else {
cmd_ctx->cpu = cpu;
memcpy(fun_sqe_at(funq, funq->sq_tail), cmd, cmdsize);
dev_dbg(fdev->dev, "admin cmd @ %u: %8ph\n", funq->sq_tail,
cmd);
if (++funq->sq_tail == funq->sq_depth)
funq->sq_tail = 0;
writel(funq->sq_tail, funq->sq_db);
}
spin_unlock(&funq->sq_lock);
return rc;
}
/* Abandon a pending admin command by clearing the issuer's callback data.
* Failure indicates that the command either has already completed or its
* completion is racing with this call.
*/
static bool fun_abandon_admin_cmd(struct fun_dev *fd,
const struct fun_admin_req_common *cmd,
void *cb_data)
{
u16 cid = be16_to_cpu(cmd->cid);
struct fun_cmd_ctx *cmd_ctx = &fd->cmd_ctx[cid];
return cmpxchg(&cmd_ctx->cb_data, cb_data, NULL) == cb_data;
}
/* Stop submission of new admin commands and wake up any processes waiting for
* tags. Already submitted commands are left to complete or time out.
*/
static void fun_admin_stop(struct fun_dev *fdev)
{
spin_lock(&fdev->admin_q->sq_lock);
fdev->suppress_cmds = true;
spin_unlock(&fdev->admin_q->sq_lock);
sbitmap_queue_wake_all(&fdev->admin_sbq);
}
/* The callback for synchronous execution of admin commands. It copies the
* command response to the caller's buffer and signals completion.
*/
static void fun_admin_cmd_sync_cb(struct fun_dev *fd, void *rsp, void *cb_data)
{
const struct fun_admin_rsp_common *rsp_common = rsp;
struct fun_sync_cmd_ctx *ctx = cb_data;
if (!ctx)
return; /* command issuer timed out and left */
if (ctx->rsp_buf) {
unsigned int rsp_len = rsp_common->len8 * 8;
if (unlikely(rsp_len > ctx->rsp_len)) {
dev_err(fd->dev,
"response for op %u is %uB > response buffer %uB\n",
rsp_common->op, rsp_len, ctx->rsp_len);
rsp_len = ctx->rsp_len;
}
memcpy(ctx->rsp_buf, rsp, rsp_len);
}
ctx->rsp_status = rsp_common->ret;
complete(&ctx->compl);
}
/* Submit a synchronous admin command. */
int fun_submit_admin_sync_cmd(struct fun_dev *fdev,
struct fun_admin_req_common *cmd, void *rsp,
size_t rspsize, unsigned int timeout)
{
struct fun_sync_cmd_ctx ctx = {
.compl = COMPLETION_INITIALIZER_ONSTACK(ctx.compl),
.rsp_buf = rsp,
.rsp_len = rspsize,
};
unsigned int cmdlen = cmd->len8 * 8;
unsigned long jiffies_left;
int ret;
ret = fun_submit_admin_cmd(fdev, cmd, fun_admin_cmd_sync_cb, &ctx,
true);
if (ret)
return ret;
if (!timeout)
timeout = FUN_ADMIN_CMD_TO_MS;
jiffies_left = wait_for_completion_timeout(&ctx.compl,
msecs_to_jiffies(timeout));
if (!jiffies_left) {
/* The command timed out. Attempt to cancel it so we can return.
* But if the command is in the process of completing we'll
* wait for it.
*/
if (fun_abandon_admin_cmd(fdev, cmd, &ctx)) {
dev_err(fdev->dev, "admin command timed out: %*ph\n",
cmdlen, cmd);
fun_admin_stop(fdev);
/* see if the timeout was due to a queue failure */
if (fun_adminq_stopped(fdev))
dev_err(fdev->dev,
"device does not accept admin commands\n");
return -ETIMEDOUT;
}
wait_for_completion(&ctx.compl);
}
if (ctx.rsp_status) {
dev_err(fdev->dev, "admin command failed, err %d: %*ph\n",
ctx.rsp_status, cmdlen, cmd);
}
return -ctx.rsp_status;
}
EXPORT_SYMBOL_GPL(fun_submit_admin_sync_cmd);
/* Return the number of device resources of the requested type. */
int fun_get_res_count(struct fun_dev *fdev, enum fun_admin_op res)
{
union {
struct fun_admin_res_count_req req;
struct fun_admin_res_count_rsp rsp;
} cmd;
int rc;
cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(cmd.req));
cmd.req.count = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_RES_COUNT,
0, 0);
rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common, &cmd.rsp,
sizeof(cmd), 0);
return rc ? rc : be32_to_cpu(cmd.rsp.count.data);
}
EXPORT_SYMBOL_GPL(fun_get_res_count);
/* Request that the instance of resource @res with the given id be deleted. */
int fun_res_destroy(struct fun_dev *fdev, enum fun_admin_op res,
unsigned int flags, u32 id)
{
struct fun_admin_generic_destroy_req req = {
.common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(req)),
.destroy = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_DESTROY,
flags, id)
};
return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0);
}
EXPORT_SYMBOL_GPL(fun_res_destroy);
/* Bind two entities of the given types and IDs. */
int fun_bind(struct fun_dev *fdev, enum fun_admin_bind_type type0,
unsigned int id0, enum fun_admin_bind_type type1,
unsigned int id1)
{
struct {
struct fun_admin_bind_req req;
struct fun_admin_bind_entry entry[2];
} cmd = {
.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_BIND,
sizeof(cmd)),
.entry[0] = FUN_ADMIN_BIND_ENTRY_INIT(type0, id0),
.entry[1] = FUN_ADMIN_BIND_ENTRY_INIT(type1, id1),
};
return fun_submit_admin_sync_cmd(fdev, &cmd.req.common, NULL, 0, 0);
}
EXPORT_SYMBOL_GPL(fun_bind);
static int fun_get_dev_limits(struct fun_dev *fdev)
{
struct pci_dev *pdev = to_pci_dev(fdev->dev);
unsigned int cq_count, sq_count, num_dbs;
int rc;
rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPCQ);
if (rc < 0)
return rc;
cq_count = rc;
rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPSQ);
if (rc < 0)
return rc;
sq_count = rc;
/* The admin queue consumes 1 CQ and at least 1 SQ. To be usable the
* device must provide additional queues.
*/
if (cq_count < 2 || sq_count < 2 + !!fdev->admin_q->rq_depth)
return -EINVAL;
/* Calculate the max QID based on SQ/CQ/doorbell counts.
* SQ/CQ doorbells alternate.
*/
num_dbs = (pci_resource_len(pdev, 0) - NVME_REG_DBS) /
(fdev->db_stride * 4);
fdev->max_qid = min3(cq_count, sq_count, num_dbs / 2) - 1;
fdev->kern_end_qid = fdev->max_qid + 1;
return 0;
}
/* Allocate all MSI-X vectors available on a function and at least @min_vecs. */
static int fun_alloc_irqs(struct pci_dev *pdev, unsigned int min_vecs)
{
int vecs, num_msix = pci_msix_vec_count(pdev);
if (num_msix < 0)
return num_msix;
if (min_vecs > num_msix)
return -ERANGE;
vecs = pci_alloc_irq_vectors(pdev, min_vecs, num_msix, PCI_IRQ_MSIX);
if (vecs > 0) {
dev_info(&pdev->dev,
"Allocated %d IRQ vectors of %d requested\n",
vecs, num_msix);
} else {
dev_err(&pdev->dev,
"Unable to allocate at least %u IRQ vectors\n",
min_vecs);
}
return vecs;
}
/* Allocate and initialize the IRQ manager state. */
static int fun_alloc_irq_mgr(struct fun_dev *fdev)
{
fdev->irq_map = bitmap_zalloc(fdev->num_irqs, GFP_KERNEL);
if (!fdev->irq_map)
return -ENOMEM;
spin_lock_init(&fdev->irqmgr_lock);
/* mark IRQ 0 allocated, it is used by the admin queue */
__set_bit(0, fdev->irq_map);
fdev->irqs_avail = fdev->num_irqs - 1;
return 0;
}
/* Reserve @nirqs of the currently available IRQs and return their indices. */
int fun_reserve_irqs(struct fun_dev *fdev, unsigned int nirqs, u16 *irq_indices)
{
unsigned int b, n = 0;
int err = -ENOSPC;
if (!nirqs)
return 0;
spin_lock(&fdev->irqmgr_lock);
if (nirqs > fdev->irqs_avail)
goto unlock;
for_each_clear_bit(b, fdev->irq_map, fdev->num_irqs) {
__set_bit(b, fdev->irq_map);
irq_indices[n++] = b;
if (n >= nirqs)
break;
}
WARN_ON(n < nirqs);
fdev->irqs_avail -= n;
err = n;
unlock:
spin_unlock(&fdev->irqmgr_lock);
return err;
}
EXPORT_SYMBOL(fun_reserve_irqs);
/* Release @nirqs previously allocated IRQS with the supplied indices. */
void fun_release_irqs(struct fun_dev *fdev, unsigned int nirqs,
u16 *irq_indices)
{
unsigned int i;
spin_lock(&fdev->irqmgr_lock);
for (i = 0; i < nirqs; i++)
__clear_bit(irq_indices[i], fdev->irq_map);
fdev->irqs_avail += nirqs;
spin_unlock(&fdev->irqmgr_lock);
}
EXPORT_SYMBOL(fun_release_irqs);
static void fun_serv_handler(struct work_struct *work)
{
struct fun_dev *fd = container_of(work, struct fun_dev, service_task);
if (test_bit(FUN_SERV_DISABLED, &fd->service_flags))
return;
if (fd->serv_cb)
fd->serv_cb(fd);
}
void fun_serv_stop(struct fun_dev *fd)
{
set_bit(FUN_SERV_DISABLED, &fd->service_flags);
cancel_work_sync(&fd->service_task);
}
EXPORT_SYMBOL_GPL(fun_serv_stop);
void fun_serv_restart(struct fun_dev *fd)
{
clear_bit(FUN_SERV_DISABLED, &fd->service_flags);
if (fd->service_flags)
schedule_work(&fd->service_task);
}
EXPORT_SYMBOL_GPL(fun_serv_restart);
void fun_serv_sched(struct fun_dev *fd)
{
if (!test_bit(FUN_SERV_DISABLED, &fd->service_flags))
schedule_work(&fd->service_task);
}
EXPORT_SYMBOL_GPL(fun_serv_sched);
/* Check and try to get the device into a proper state for initialization,
* i.e., CSTS.RDY = CC.EN = 0.
*/
static int sanitize_dev(struct fun_dev *fdev)
{
int rc;
fdev->cap_reg = readq(fdev->bar + NVME_REG_CAP);
fdev->cc_reg = readl(fdev->bar + NVME_REG_CC);
/* First get RDY to agree with the current EN. Give RDY the opportunity
* to complete a potential recent EN change.
*/
rc = fun_wait_ready(fdev, fdev->cc_reg & NVME_CC_ENABLE);
if (rc)
return rc;
/* Next, reset the device if EN is currently 1. */
if (fdev->cc_reg & NVME_CC_ENABLE)
rc = fun_disable_ctrl(fdev);
return rc;
}
/* Undo the device initialization of fun_dev_enable(). */
void fun_dev_disable(struct fun_dev *fdev)
{
struct pci_dev *pdev = to_pci_dev(fdev->dev);
pci_set_drvdata(pdev, NULL);
if (fdev->fw_handle != FUN_HCI_ID_INVALID) {
fun_res_destroy(fdev, FUN_ADMIN_OP_SWUPGRADE, 0,
fdev->fw_handle);
fdev->fw_handle = FUN_HCI_ID_INVALID;
}
fun_disable_admin_queue(fdev);
bitmap_free(fdev->irq_map);
pci_free_irq_vectors(pdev);
pci_clear_master(pdev);
pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
fun_unmap_bars(fdev);
}
EXPORT_SYMBOL(fun_dev_disable);
/* Perform basic initialization of a device, including
* - PCI config space setup and BAR0 mapping
* - interrupt management initialization
* - 1 admin queue setup
* - determination of some device limits, such as number of queues.
*/
int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev,
const struct fun_dev_params *areq, const char *name)
{
int rc;
fdev->dev = &pdev->dev;
rc = fun_map_bars(fdev, name);
if (rc)
return rc;
rc = fun_set_dma_masks(fdev->dev);
if (rc)
goto unmap;
rc = pci_enable_device_mem(pdev);
if (rc) {
dev_err(&pdev->dev, "Couldn't enable device, err %d\n", rc);
goto unmap;
}
pci_enable_pcie_error_reporting(pdev);
rc = sanitize_dev(fdev);
if (rc)
goto disable_dev;
fdev->fw_handle = FUN_HCI_ID_INVALID;
fdev->q_depth = NVME_CAP_MQES(fdev->cap_reg) + 1;
fdev->db_stride = 1 << NVME_CAP_STRIDE(fdev->cap_reg);
fdev->dbs = fdev->bar + NVME_REG_DBS;
INIT_WORK(&fdev->service_task, fun_serv_handler);
fdev->service_flags = FUN_SERV_DISABLED;
fdev->serv_cb = areq->serv_cb;
rc = fun_alloc_irqs(pdev, areq->min_msix + 1); /* +1 for admin CQ */
if (rc < 0)
goto disable_dev;
fdev->num_irqs = rc;
rc = fun_alloc_irq_mgr(fdev);
if (rc)
goto free_irqs;
pci_set_master(pdev);
rc = fun_enable_admin_queue(fdev, areq);
if (rc)
goto free_irq_mgr;
rc = fun_get_dev_limits(fdev);
if (rc < 0)
goto disable_admin;
pci_save_state(pdev);
pci_set_drvdata(pdev, fdev);
pcie_print_link_status(pdev);
dev_dbg(fdev->dev, "q_depth %u, db_stride %u, max qid %d kern_end_qid %d\n",
fdev->q_depth, fdev->db_stride, fdev->max_qid,
fdev->kern_end_qid);
return 0;
disable_admin:
fun_disable_admin_queue(fdev);
free_irq_mgr:
pci_clear_master(pdev);
bitmap_free(fdev->irq_map);
free_irqs:
pci_free_irq_vectors(pdev);
disable_dev:
pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
unmap:
fun_unmap_bars(fdev);
return rc;
}
EXPORT_SYMBOL(fun_dev_enable);
MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>");
MODULE_DESCRIPTION("Core services driver for Fungible devices");
MODULE_LICENSE("Dual BSD/GPL");

View file

@ -0,0 +1,150 @@
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
#ifndef _FUNDEV_H
#define _FUNDEV_H
#include <linux/sbitmap.h>
#include <linux/spinlock_types.h>
#include <linux/workqueue.h>
#include "fun_hci.h"
struct pci_dev;
struct fun_dev;
struct fun_queue;
struct fun_cmd_ctx;
struct fun_queue_alloc_req;
/* doorbell fields */
enum {
FUN_DB_QIDX_S = 0,
FUN_DB_INTCOAL_ENTRIES_S = 16,
FUN_DB_INTCOAL_ENTRIES_M = 0x7f,
FUN_DB_INTCOAL_USEC_S = 23,
FUN_DB_INTCOAL_USEC_M = 0x7f,
FUN_DB_IRQ_S = 30,
FUN_DB_IRQ_F = 1 << FUN_DB_IRQ_S,
FUN_DB_IRQ_ARM_S = 31,
FUN_DB_IRQ_ARM_F = 1U << FUN_DB_IRQ_ARM_S
};
/* Callback for asynchronous admin commands.
* Invoked on reception of command response.
*/
typedef void (*fun_admin_callback_t)(struct fun_dev *fdev, void *rsp,
void *cb_data);
/* Callback for events/notifications received by an admin queue. */
typedef void (*fun_admin_event_cb)(struct fun_dev *fdev, void *cqe);
/* Callback for pending work handled by the service task. */
typedef void (*fun_serv_cb)(struct fun_dev *fd);
/* service task flags */
enum {
FUN_SERV_DISABLED, /* service task is disabled */
FUN_SERV_FIRST_AVAIL
};
/* Driver state associated with a PCI function. */
struct fun_dev {
struct device *dev;
void __iomem *bar; /* start of BAR0 mapping */
u32 __iomem *dbs; /* start of doorbells in BAR0 mapping */
/* admin queue */
struct fun_queue *admin_q;
struct sbitmap_queue admin_sbq;
struct fun_cmd_ctx *cmd_ctx;
fun_admin_event_cb adminq_cb;
bool suppress_cmds; /* if set don't write commands to SQ */
/* address increment between consecutive doorbells, in 4B units */
unsigned int db_stride;
/* SW versions of device registers */
u32 cc_reg; /* CC register */
u64 cap_reg; /* CAPability register */
unsigned int q_depth; /* max queue depth supported by device */
unsigned int max_qid; /* = #queues - 1, separately for SQs and CQs */
unsigned int kern_end_qid; /* last qid in the kernel range + 1 */
unsigned int fw_handle;
/* IRQ manager */
unsigned int num_irqs;
unsigned int irqs_avail;
spinlock_t irqmgr_lock;
unsigned long *irq_map;
/* The service task handles work that needs a process context */
struct work_struct service_task;
unsigned long service_flags;
fun_serv_cb serv_cb;
};
struct fun_dev_params {
u8 cqe_size_log2; /* admin q CQE size */
u8 sqe_size_log2; /* admin q SQE size */
/* admin q depths */
u16 cq_depth;
u16 sq_depth;
u16 rq_depth;
u16 min_msix; /* min vectors needed by requesting driver */
fun_admin_event_cb event_cb;
fun_serv_cb serv_cb;
};
/* Return the BAR address of a doorbell. */
static inline u32 __iomem *fun_db_addr(const struct fun_dev *fdev,
unsigned int db_index)
{
return &fdev->dbs[db_index * fdev->db_stride];
}
/* Return the BAR address of an SQ doorbell. SQ and CQ DBs alternate,
* SQs have even DB indices.
*/
static inline u32 __iomem *fun_sq_db_addr(const struct fun_dev *fdev,
unsigned int sqid)
{
return fun_db_addr(fdev, sqid * 2);
}
static inline u32 __iomem *fun_cq_db_addr(const struct fun_dev *fdev,
unsigned int cqid)
{
return fun_db_addr(fdev, cqid * 2 + 1);
}
int fun_get_res_count(struct fun_dev *fdev, enum fun_admin_op res);
int fun_res_destroy(struct fun_dev *fdev, enum fun_admin_op res,
unsigned int flags, u32 id);
int fun_bind(struct fun_dev *fdev, enum fun_admin_bind_type type0,
unsigned int id0, enum fun_admin_bind_type type1,
unsigned int id1);
int fun_submit_admin_cmd(struct fun_dev *fdev, struct fun_admin_req_common *cmd,
fun_admin_callback_t cb, void *cb_data, bool wait_ok);
int fun_submit_admin_sync_cmd(struct fun_dev *fdev,
struct fun_admin_req_common *cmd, void *rsp,
size_t rspsize, unsigned int timeout);
int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev,
const struct fun_dev_params *areq, const char *name);
void fun_dev_disable(struct fun_dev *fdev);
int fun_reserve_irqs(struct fun_dev *fdev, unsigned int nirqs,
u16 *irq_indices);
void fun_release_irqs(struct fun_dev *fdev, unsigned int nirqs,
u16 *irq_indices);
void fun_serv_stop(struct fun_dev *fd);
void fun_serv_restart(struct fun_dev *fd);
void fun_serv_sched(struct fun_dev *fd);
#endif /* _FUNDEV_H */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,601 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
#include <linux/dma-mapping.h>
#include <linux/interrupt.h>
#include <linux/log2.h>
#include <linux/mm.h>
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include "fun_dev.h"
#include "fun_queue.h"
/* Allocate memory for a queue. This includes the memory for the HW descriptor
* ring, an optional 64b HW write-back area, and an optional SW state ring.
* Returns the virtual and DMA addresses of the HW ring, the VA of the SW ring,
* and the VA of the write-back area.
*/
void *fun_alloc_ring_mem(struct device *dma_dev, size_t depth,
size_t hw_desc_sz, size_t sw_desc_sz, bool wb,
int numa_node, dma_addr_t *dma_addr, void **sw_va,
volatile __be64 **wb_va)
{
int dev_node = dev_to_node(dma_dev);
size_t dma_sz;
void *va;
if (numa_node == NUMA_NO_NODE)
numa_node = dev_node;
/* Place optional write-back area at end of descriptor ring. */
dma_sz = hw_desc_sz * depth;
if (wb)
dma_sz += sizeof(u64);
set_dev_node(dma_dev, numa_node);
va = dma_alloc_coherent(dma_dev, dma_sz, dma_addr, GFP_KERNEL);
set_dev_node(dma_dev, dev_node);
if (!va)
return NULL;
if (sw_desc_sz) {
*sw_va = kvzalloc_node(sw_desc_sz * depth, GFP_KERNEL,
numa_node);
if (!*sw_va) {
dma_free_coherent(dma_dev, dma_sz, va, *dma_addr);
return NULL;
}
}
if (wb)
*wb_va = va + dma_sz - sizeof(u64);
return va;
}
EXPORT_SYMBOL_GPL(fun_alloc_ring_mem);
void fun_free_ring_mem(struct device *dma_dev, size_t depth, size_t hw_desc_sz,
bool wb, void *hw_va, dma_addr_t dma_addr, void *sw_va)
{
if (hw_va) {
size_t sz = depth * hw_desc_sz;
if (wb)
sz += sizeof(u64);
dma_free_coherent(dma_dev, sz, hw_va, dma_addr);
}
kvfree(sw_va);
}
EXPORT_SYMBOL_GPL(fun_free_ring_mem);
/* Prepare and issue an admin command to create an SQ on the device with the
* provided parameters. If the queue ID is auto-allocated by the device it is
* returned in *sqidp.
*/
int fun_sq_create(struct fun_dev *fdev, u16 flags, u32 sqid, u32 cqid,
u8 sqe_size_log2, u32 sq_depth, dma_addr_t dma_addr,
u8 coal_nentries, u8 coal_usec, u32 irq_num,
u32 scan_start_id, u32 scan_end_id,
u32 rq_buf_size_log2, u32 *sqidp, u32 __iomem **dbp)
{
union {
struct fun_admin_epsq_req req;
struct fun_admin_generic_create_rsp rsp;
} cmd;
dma_addr_t wb_addr;
u32 hw_qid;
int rc;
if (sq_depth > fdev->q_depth)
return -EINVAL;
if (flags & FUN_ADMIN_EPSQ_CREATE_FLAG_RQ)
sqe_size_log2 = ilog2(sizeof(struct fun_eprq_rqbuf));
wb_addr = dma_addr + (sq_depth << sqe_size_log2);
cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPSQ,
sizeof(cmd.req));
cmd.req.u.create =
FUN_ADMIN_EPSQ_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, flags,
sqid, cqid, sqe_size_log2,
sq_depth - 1, dma_addr, 0,
coal_nentries, coal_usec,
irq_num, scan_start_id,
scan_end_id, 0,
rq_buf_size_log2,
ilog2(sizeof(u64)), wb_addr);
rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common,
&cmd.rsp, sizeof(cmd.rsp), 0);
if (rc)
return rc;
hw_qid = be32_to_cpu(cmd.rsp.id);
*dbp = fun_sq_db_addr(fdev, hw_qid);
if (flags & FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR)
*sqidp = hw_qid;
return rc;
}
EXPORT_SYMBOL_GPL(fun_sq_create);
/* Prepare and issue an admin command to create a CQ on the device with the
* provided parameters. If the queue ID is auto-allocated by the device it is
* returned in *cqidp.
*/
int fun_cq_create(struct fun_dev *fdev, u16 flags, u32 cqid, u32 rqid,
u8 cqe_size_log2, u32 cq_depth, dma_addr_t dma_addr,
u16 headroom, u16 tailroom, u8 coal_nentries, u8 coal_usec,
u32 irq_num, u32 scan_start_id, u32 scan_end_id, u32 *cqidp,
u32 __iomem **dbp)
{
union {
struct fun_admin_epcq_req req;
struct fun_admin_generic_create_rsp rsp;
} cmd;
u32 hw_qid;
int rc;
if (cq_depth > fdev->q_depth)
return -EINVAL;
cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ,
sizeof(cmd.req));
cmd.req.u.create =
FUN_ADMIN_EPCQ_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, flags,
cqid, rqid, cqe_size_log2,
cq_depth - 1, dma_addr, tailroom,
headroom / 2, 0, coal_nentries,
coal_usec, irq_num,
scan_start_id, scan_end_id, 0);
rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common,
&cmd.rsp, sizeof(cmd.rsp), 0);
if (rc)
return rc;
hw_qid = be32_to_cpu(cmd.rsp.id);
*dbp = fun_cq_db_addr(fdev, hw_qid);
if (flags & FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR)
*cqidp = hw_qid;
return rc;
}
EXPORT_SYMBOL_GPL(fun_cq_create);
static bool fun_sq_is_head_wb(const struct fun_queue *funq)
{
return funq->sq_flags & FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS;
}
static void fun_clean_rq(struct fun_queue *funq)
{
struct fun_dev *fdev = funq->fdev;
struct fun_rq_info *rqinfo;
unsigned int i;
for (i = 0; i < funq->rq_depth; i++) {
rqinfo = &funq->rq_info[i];
if (rqinfo->page) {
dma_unmap_page(fdev->dev, rqinfo->dma, PAGE_SIZE,
DMA_FROM_DEVICE);
put_page(rqinfo->page);
rqinfo->page = NULL;
}
}
}
static int fun_fill_rq(struct fun_queue *funq)
{
struct device *dev = funq->fdev->dev;
int i, node = dev_to_node(dev);
struct fun_rq_info *rqinfo;
for (i = 0; i < funq->rq_depth; i++) {
rqinfo = &funq->rq_info[i];
rqinfo->page = alloc_pages_node(node, GFP_KERNEL, 0);
if (unlikely(!rqinfo->page))
return -ENOMEM;
rqinfo->dma = dma_map_page(dev, rqinfo->page, 0,
PAGE_SIZE, DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(dev, rqinfo->dma))) {
put_page(rqinfo->page);
rqinfo->page = NULL;
return -ENOMEM;
}
funq->rqes[i] = FUN_EPRQ_RQBUF_INIT(rqinfo->dma);
}
funq->rq_tail = funq->rq_depth - 1;
return 0;
}
static void fun_rq_update_pos(struct fun_queue *funq, int buf_offset)
{
if (buf_offset <= funq->rq_buf_offset) {
struct fun_rq_info *rqinfo = &funq->rq_info[funq->rq_buf_idx];
struct device *dev = funq->fdev->dev;
dma_sync_single_for_device(dev, rqinfo->dma, PAGE_SIZE,
DMA_FROM_DEVICE);
funq->num_rqe_to_fill++;
if (++funq->rq_buf_idx == funq->rq_depth)
funq->rq_buf_idx = 0;
}
funq->rq_buf_offset = buf_offset;
}
/* Given a command response with data scattered across >= 1 RQ buffers return
* a pointer to a contiguous buffer containing all the data. If the data is in
* one RQ buffer the start address within that buffer is returned, otherwise a
* new buffer is allocated and the data is gathered into it.
*/
static void *fun_data_from_rq(struct fun_queue *funq,
const struct fun_rsp_common *rsp, bool *need_free)
{
u32 bufoff, total_len, remaining, fragsize, dataoff;
struct device *dma_dev = funq->fdev->dev;
const struct fun_dataop_rqbuf *databuf;
const struct fun_dataop_hdr *dataop;
const struct fun_rq_info *rqinfo;
void *data;
dataop = (void *)rsp + rsp->suboff8 * 8;
total_len = be32_to_cpu(dataop->total_len);
if (likely(dataop->nsgl == 1)) {
databuf = (struct fun_dataop_rqbuf *)dataop->imm;
bufoff = be32_to_cpu(databuf->bufoff);
fun_rq_update_pos(funq, bufoff);
rqinfo = &funq->rq_info[funq->rq_buf_idx];
dma_sync_single_for_cpu(dma_dev, rqinfo->dma + bufoff,
total_len, DMA_FROM_DEVICE);
*need_free = false;
return page_address(rqinfo->page) + bufoff;
}
/* For scattered completions gather the fragments into one buffer. */
data = kmalloc(total_len, GFP_ATOMIC);
/* NULL is OK here. In case of failure we still need to consume the data
* for proper buffer accounting but indicate an error in the response.
*/
if (likely(data))
*need_free = true;
dataoff = 0;
for (remaining = total_len; remaining; remaining -= fragsize) {
fun_rq_update_pos(funq, 0);
fragsize = min_t(unsigned int, PAGE_SIZE, remaining);
if (data) {
rqinfo = &funq->rq_info[funq->rq_buf_idx];
dma_sync_single_for_cpu(dma_dev, rqinfo->dma, fragsize,
DMA_FROM_DEVICE);
memcpy(data + dataoff, page_address(rqinfo->page),
fragsize);
dataoff += fragsize;
}
}
return data;
}
unsigned int __fun_process_cq(struct fun_queue *funq, unsigned int max)
{
const struct fun_cqe_info *info;
struct fun_rsp_common *rsp;
unsigned int new_cqes;
u16 sf_p, flags;
bool need_free;
void *cqe;
if (!max)
max = funq->cq_depth - 1;
for (new_cqes = 0; new_cqes < max; new_cqes++) {
cqe = funq->cqes + (funq->cq_head << funq->cqe_size_log2);
info = funq_cqe_info(funq, cqe);
sf_p = be16_to_cpu(info->sf_p);
if ((sf_p & 1) != funq->cq_phase)
break;
/* ensure the phase tag is read before other CQE fields */
dma_rmb();
if (++funq->cq_head == funq->cq_depth) {
funq->cq_head = 0;
funq->cq_phase = !funq->cq_phase;
}
rsp = cqe;
flags = be16_to_cpu(rsp->flags);
need_free = false;
if (unlikely(flags & FUN_REQ_COMMON_FLAG_CQE_IN_RQBUF)) {
rsp = fun_data_from_rq(funq, rsp, &need_free);
if (!rsp) {
rsp = cqe;
rsp->len8 = 1;
if (rsp->ret == 0)
rsp->ret = ENOMEM;
}
}
if (funq->cq_cb)
funq->cq_cb(funq, funq->cb_data, rsp, info);
if (need_free)
kfree(rsp);
}
dev_dbg(funq->fdev->dev, "CQ %u, new CQEs %u/%u, head %u, phase %u\n",
funq->cqid, new_cqes, max, funq->cq_head, funq->cq_phase);
return new_cqes;
}
unsigned int fun_process_cq(struct fun_queue *funq, unsigned int max)
{
unsigned int processed;
u32 db;
processed = __fun_process_cq(funq, max);
if (funq->num_rqe_to_fill) {
funq->rq_tail = (funq->rq_tail + funq->num_rqe_to_fill) %
funq->rq_depth;
funq->num_rqe_to_fill = 0;
writel(funq->rq_tail, funq->rq_db);
}
db = funq->cq_head | FUN_DB_IRQ_ARM_F;
writel(db, funq->cq_db);
return processed;
}
static int fun_alloc_sqes(struct fun_queue *funq)
{
funq->sq_cmds = fun_alloc_ring_mem(funq->fdev->dev, funq->sq_depth,
1 << funq->sqe_size_log2, 0,
fun_sq_is_head_wb(funq),
NUMA_NO_NODE, &funq->sq_dma_addr,
NULL, &funq->sq_head);
return funq->sq_cmds ? 0 : -ENOMEM;
}
static int fun_alloc_cqes(struct fun_queue *funq)
{
funq->cqes = fun_alloc_ring_mem(funq->fdev->dev, funq->cq_depth,
1 << funq->cqe_size_log2, 0, false,
NUMA_NO_NODE, &funq->cq_dma_addr, NULL,
NULL);
return funq->cqes ? 0 : -ENOMEM;
}
static int fun_alloc_rqes(struct fun_queue *funq)
{
funq->rqes = fun_alloc_ring_mem(funq->fdev->dev, funq->rq_depth,
sizeof(*funq->rqes),
sizeof(*funq->rq_info), false,
NUMA_NO_NODE, &funq->rq_dma_addr,
(void **)&funq->rq_info, NULL);
return funq->rqes ? 0 : -ENOMEM;
}
/* Free a queue's structures. */
void fun_free_queue(struct fun_queue *funq)
{
struct device *dev = funq->fdev->dev;
fun_free_ring_mem(dev, funq->cq_depth, 1 << funq->cqe_size_log2, false,
funq->cqes, funq->cq_dma_addr, NULL);
fun_free_ring_mem(dev, funq->sq_depth, 1 << funq->sqe_size_log2,
fun_sq_is_head_wb(funq), funq->sq_cmds,
funq->sq_dma_addr, NULL);
if (funq->rqes) {
fun_clean_rq(funq);
fun_free_ring_mem(dev, funq->rq_depth, sizeof(*funq->rqes),
false, funq->rqes, funq->rq_dma_addr,
funq->rq_info);
}
kfree(funq);
}
/* Allocate and initialize a funq's structures. */
struct fun_queue *fun_alloc_queue(struct fun_dev *fdev, int qid,
const struct fun_queue_alloc_req *req)
{
struct fun_queue *funq = kzalloc(sizeof(*funq), GFP_KERNEL);
if (!funq)
return NULL;
funq->fdev = fdev;
spin_lock_init(&funq->sq_lock);
funq->qid = qid;
/* Initial CQ/SQ/RQ ids */
if (req->rq_depth) {
funq->cqid = 2 * qid;
if (funq->qid) {
/* I/O Q: use rqid = cqid, sqid = +1 */
funq->rqid = funq->cqid;
funq->sqid = funq->rqid + 1;
} else {
/* Admin Q: sqid is always 0, use ID 1 for RQ */
funq->sqid = 0;
funq->rqid = 1;
}
} else {
funq->cqid = qid;
funq->sqid = qid;
}
funq->cq_flags = req->cq_flags;
funq->sq_flags = req->sq_flags;
funq->cqe_size_log2 = req->cqe_size_log2;
funq->sqe_size_log2 = req->sqe_size_log2;
funq->cq_depth = req->cq_depth;
funq->sq_depth = req->sq_depth;
funq->cq_intcoal_nentries = req->cq_intcoal_nentries;
funq->cq_intcoal_usec = req->cq_intcoal_usec;
funq->sq_intcoal_nentries = req->sq_intcoal_nentries;
funq->sq_intcoal_usec = req->sq_intcoal_usec;
if (fun_alloc_cqes(funq))
goto free_funq;
funq->cq_phase = 1;
if (fun_alloc_sqes(funq))
goto free_funq;
if (req->rq_depth) {
funq->rq_flags = req->rq_flags | FUN_ADMIN_EPSQ_CREATE_FLAG_RQ;
funq->rq_depth = req->rq_depth;
funq->rq_buf_offset = -1;
if (fun_alloc_rqes(funq) || fun_fill_rq(funq))
goto free_funq;
}
funq->cq_vector = -1;
funq->cqe_info_offset = (1 << funq->cqe_size_log2) - sizeof(struct fun_cqe_info);
/* SQ/CQ 0 are implicitly created, assign their doorbells now.
* Other queues are assigned doorbells at their explicit creation.
*/
if (funq->sqid == 0)
funq->sq_db = fun_sq_db_addr(fdev, 0);
if (funq->cqid == 0)
funq->cq_db = fun_cq_db_addr(fdev, 0);
return funq;
free_funq:
fun_free_queue(funq);
return NULL;
}
/* Create a funq's CQ on the device. */
static int fun_create_cq(struct fun_queue *funq)
{
struct fun_dev *fdev = funq->fdev;
unsigned int rqid;
int rc;
rqid = funq->cq_flags & FUN_ADMIN_EPCQ_CREATE_FLAG_RQ ?
funq->rqid : FUN_HCI_ID_INVALID;
rc = fun_cq_create(fdev, funq->cq_flags, funq->cqid, rqid,
funq->cqe_size_log2, funq->cq_depth,
funq->cq_dma_addr, 0, 0, funq->cq_intcoal_nentries,
funq->cq_intcoal_usec, funq->cq_vector, 0, 0,
&funq->cqid, &funq->cq_db);
if (!rc)
dev_dbg(fdev->dev, "created CQ %u\n", funq->cqid);
return rc;
}
/* Create a funq's SQ on the device. */
static int fun_create_sq(struct fun_queue *funq)
{
struct fun_dev *fdev = funq->fdev;
int rc;
rc = fun_sq_create(fdev, funq->sq_flags, funq->sqid, funq->cqid,
funq->sqe_size_log2, funq->sq_depth,
funq->sq_dma_addr, funq->sq_intcoal_nentries,
funq->sq_intcoal_usec, funq->cq_vector, 0, 0,
0, &funq->sqid, &funq->sq_db);
if (!rc)
dev_dbg(fdev->dev, "created SQ %u\n", funq->sqid);
return rc;
}
/* Create a funq's RQ on the device. */
int fun_create_rq(struct fun_queue *funq)
{
struct fun_dev *fdev = funq->fdev;
int rc;
rc = fun_sq_create(fdev, funq->rq_flags, funq->rqid, funq->cqid, 0,
funq->rq_depth, funq->rq_dma_addr, 0, 0,
funq->cq_vector, 0, 0, PAGE_SHIFT, &funq->rqid,
&funq->rq_db);
if (!rc)
dev_dbg(fdev->dev, "created RQ %u\n", funq->rqid);
return rc;
}
static unsigned int funq_irq(struct fun_queue *funq)
{
return pci_irq_vector(to_pci_dev(funq->fdev->dev), funq->cq_vector);
}
int fun_request_irq(struct fun_queue *funq, const char *devname,
irq_handler_t handler, void *data)
{
int rc;
if (funq->cq_vector < 0)
return -EINVAL;
funq->irq_handler = handler;
funq->irq_data = data;
snprintf(funq->irqname, sizeof(funq->irqname),
funq->qid ? "%s-q[%d]" : "%s-adminq", devname, funq->qid);
rc = request_irq(funq_irq(funq), handler, 0, funq->irqname, data);
if (rc)
funq->irq_handler = NULL;
return rc;
}
/* Create all component queues of a funq on the device. */
int fun_create_queue(struct fun_queue *funq)
{
int rc;
rc = fun_create_cq(funq);
if (rc)
return rc;
if (funq->rq_depth) {
rc = fun_create_rq(funq);
if (rc)
goto release_cq;
}
rc = fun_create_sq(funq);
if (rc)
goto release_rq;
return 0;
release_rq:
fun_destroy_sq(funq->fdev, funq->rqid);
release_cq:
fun_destroy_cq(funq->fdev, funq->cqid);
return rc;
}
void fun_free_irq(struct fun_queue *funq)
{
if (funq->irq_handler) {
unsigned int vector = funq_irq(funq);
free_irq(vector, funq->irq_data);
funq->irq_handler = NULL;
funq->irq_data = NULL;
}
}

View file

@ -0,0 +1,175 @@
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
#ifndef _FUN_QEUEUE_H
#define _FUN_QEUEUE_H
#include <linux/interrupt.h>
#include <linux/io.h>
struct device;
struct fun_dev;
struct fun_queue;
struct fun_cqe_info;
struct fun_rsp_common;
typedef void (*cq_callback_t)(struct fun_queue *funq, void *data, void *msg,
const struct fun_cqe_info *info);
struct fun_rq_info {
dma_addr_t dma;
struct page *page;
};
/* A queue group consisting of an SQ, a CQ, and an optional RQ. */
struct fun_queue {
struct fun_dev *fdev;
spinlock_t sq_lock;
dma_addr_t cq_dma_addr;
dma_addr_t sq_dma_addr;
dma_addr_t rq_dma_addr;
u32 __iomem *cq_db;
u32 __iomem *sq_db;
u32 __iomem *rq_db;
void *cqes;
void *sq_cmds;
struct fun_eprq_rqbuf *rqes;
struct fun_rq_info *rq_info;
u32 cqid;
u32 sqid;
u32 rqid;
u32 cq_depth;
u32 sq_depth;
u32 rq_depth;
u16 cq_head;
u16 sq_tail;
u16 rq_tail;
u8 cqe_size_log2;
u8 sqe_size_log2;
u16 cqe_info_offset;
u16 rq_buf_idx;
int rq_buf_offset;
u16 num_rqe_to_fill;
u8 cq_intcoal_usec;
u8 cq_intcoal_nentries;
u8 sq_intcoal_usec;
u8 sq_intcoal_nentries;
u16 cq_flags;
u16 sq_flags;
u16 rq_flags;
/* SQ head writeback */
u16 sq_comp;
volatile __be64 *sq_head;
cq_callback_t cq_cb;
void *cb_data;
irq_handler_t irq_handler;
void *irq_data;
s16 cq_vector;
u8 cq_phase;
/* I/O q index */
u16 qid;
char irqname[24];
};
static inline void *fun_sqe_at(const struct fun_queue *funq, unsigned int pos)
{
return funq->sq_cmds + (pos << funq->sqe_size_log2);
}
static inline void funq_sq_post_tail(struct fun_queue *funq, u16 tail)
{
if (++tail == funq->sq_depth)
tail = 0;
funq->sq_tail = tail;
writel(tail, funq->sq_db);
}
static inline struct fun_cqe_info *funq_cqe_info(const struct fun_queue *funq,
void *cqe)
{
return cqe + funq->cqe_info_offset;
}
static inline void funq_rq_post(struct fun_queue *funq)
{
writel(funq->rq_tail, funq->rq_db);
}
struct fun_queue_alloc_req {
u8 cqe_size_log2;
u8 sqe_size_log2;
u16 cq_flags;
u16 sq_flags;
u16 rq_flags;
u32 cq_depth;
u32 sq_depth;
u32 rq_depth;
u8 cq_intcoal_usec;
u8 cq_intcoal_nentries;
u8 sq_intcoal_usec;
u8 sq_intcoal_nentries;
};
int fun_sq_create(struct fun_dev *fdev, u16 flags, u32 sqid, u32 cqid,
u8 sqe_size_log2, u32 sq_depth, dma_addr_t dma_addr,
u8 coal_nentries, u8 coal_usec, u32 irq_num,
u32 scan_start_id, u32 scan_end_id,
u32 rq_buf_size_log2, u32 *sqidp, u32 __iomem **dbp);
int fun_cq_create(struct fun_dev *fdev, u16 flags, u32 cqid, u32 rqid,
u8 cqe_size_log2, u32 cq_depth, dma_addr_t dma_addr,
u16 headroom, u16 tailroom, u8 coal_nentries, u8 coal_usec,
u32 irq_num, u32 scan_start_id, u32 scan_end_id,
u32 *cqidp, u32 __iomem **dbp);
void *fun_alloc_ring_mem(struct device *dma_dev, size_t depth,
size_t hw_desc_sz, size_t sw_desc_size, bool wb,
int numa_node, dma_addr_t *dma_addr, void **sw_va,
volatile __be64 **wb_va);
void fun_free_ring_mem(struct device *dma_dev, size_t depth, size_t hw_desc_sz,
bool wb, void *hw_va, dma_addr_t dma_addr, void *sw_va);
#define fun_destroy_sq(fdev, sqid) \
fun_res_destroy((fdev), FUN_ADMIN_OP_EPSQ, 0, (sqid))
#define fun_destroy_cq(fdev, cqid) \
fun_res_destroy((fdev), FUN_ADMIN_OP_EPCQ, 0, (cqid))
struct fun_queue *fun_alloc_queue(struct fun_dev *fdev, int qid,
const struct fun_queue_alloc_req *req);
void fun_free_queue(struct fun_queue *funq);
static inline void fun_set_cq_callback(struct fun_queue *funq, cq_callback_t cb,
void *cb_data)
{
funq->cq_cb = cb;
funq->cb_data = cb_data;
}
int fun_create_rq(struct fun_queue *funq);
int fun_create_queue(struct fun_queue *funq);
void fun_free_irq(struct fun_queue *funq);
int fun_request_irq(struct fun_queue *funq, const char *devname,
irq_handler_t handler, void *data);
unsigned int __fun_process_cq(struct fun_queue *funq, unsigned int max);
unsigned int fun_process_cq(struct fun_queue *funq, unsigned int max);
#endif /* _FUN_QEUEUE_H */