linux-stable/drivers/bus/mhi/pci_generic.c
Loic Poulain 026c5b1ec2 bus: mhi: pci_generic: Increase num of elements in hw event ring
We met some sporadic modem crashes during high throughput testing, this
has been root caused to a lack of elements in the event ring. Indeed,
the modem is simply crashing when event ring becomes empty.

It appears that the total number event ring elements is too low given
the performances of the modem (IPA hardware accelerator). This change
increases the number of elements in the hardware event ring to 2048,
which is aligned with what is defined in downstream version:
https://source.codeaurora.org/quic/la/kernel/msm-4.14/tree/arch/arm64/boot/dts/qcom/sm8150-mhi.dtsi?h=msm-4.14#n482

With this change, modem coes not crash anymore.

Note: An event ring element is 16-Byte, so the total memory usage of
a hardware event ring is now 32KB.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/1612514195-8257-1-git-send-email-loic.poulain@linaro.org
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
2021-02-05 17:59:29 +05:30

678 lines
17 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MHI PCI driver - MHI over PCI controller driver
*
* This module is a generic driver for registering MHI-over-PCI devices,
* such as PCIe QCOM modems.
*
* Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
*/
#include <linux/aer.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/mhi.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
#define MHI_PCI_DEFAULT_BAR_NUM 0
#define MHI_POST_RESET_DELAY_MS 500
#define HEALTH_CHECK_PERIOD (HZ * 2)
/**
* struct mhi_pci_dev_info - MHI PCI device specific information
* @config: MHI controller configuration
* @name: name of the PCI module
* @fw: firmware path (if any)
* @edl: emergency download mode firmware path (if any)
* @bar_num: PCI base address register to use for MHI MMIO register space
* @dma_data_width: DMA transfer word size (32 or 64 bits)
*/
struct mhi_pci_dev_info {
const struct mhi_controller_config *config;
const char *name;
const char *fw;
const char *edl;
unsigned int bar_num;
unsigned int dma_data_width;
};
#define MHI_CHANNEL_CONFIG_UL(ch_num, ch_name, el_count, ev_ring) \
{ \
.num = ch_num, \
.name = ch_name, \
.num_elements = el_count, \
.event_ring = ev_ring, \
.dir = DMA_TO_DEVICE, \
.ee_mask = BIT(MHI_EE_AMSS), \
.pollcfg = 0, \
.doorbell = MHI_DB_BRST_DISABLE, \
.lpm_notify = false, \
.offload_channel = false, \
.doorbell_mode_switch = false, \
} \
#define MHI_CHANNEL_CONFIG_DL(ch_num, ch_name, el_count, ev_ring) \
{ \
.num = ch_num, \
.name = ch_name, \
.num_elements = el_count, \
.event_ring = ev_ring, \
.dir = DMA_FROM_DEVICE, \
.ee_mask = BIT(MHI_EE_AMSS), \
.pollcfg = 0, \
.doorbell = MHI_DB_BRST_DISABLE, \
.lpm_notify = false, \
.offload_channel = false, \
.doorbell_mode_switch = false, \
}
#define MHI_EVENT_CONFIG_CTRL(ev_ring) \
{ \
.num_elements = 64, \
.irq_moderation_ms = 0, \
.irq = (ev_ring) + 1, \
.priority = 1, \
.mode = MHI_DB_BRST_DISABLE, \
.data_type = MHI_ER_CTRL, \
.hardware_event = false, \
.client_managed = false, \
.offload_channel = false, \
}
#define MHI_CHANNEL_CONFIG_HW_UL(ch_num, ch_name, el_count, ev_ring) \
{ \
.num = ch_num, \
.name = ch_name, \
.num_elements = el_count, \
.event_ring = ev_ring, \
.dir = DMA_TO_DEVICE, \
.ee_mask = BIT(MHI_EE_AMSS), \
.pollcfg = 0, \
.doorbell = MHI_DB_BRST_ENABLE, \
.lpm_notify = false, \
.offload_channel = false, \
.doorbell_mode_switch = true, \
} \
#define MHI_CHANNEL_CONFIG_HW_DL(ch_num, ch_name, el_count, ev_ring) \
{ \
.num = ch_num, \
.name = ch_name, \
.num_elements = el_count, \
.event_ring = ev_ring, \
.dir = DMA_FROM_DEVICE, \
.ee_mask = BIT(MHI_EE_AMSS), \
.pollcfg = 0, \
.doorbell = MHI_DB_BRST_ENABLE, \
.lpm_notify = false, \
.offload_channel = false, \
.doorbell_mode_switch = true, \
}
#define MHI_EVENT_CONFIG_DATA(ev_ring) \
{ \
.num_elements = 128, \
.irq_moderation_ms = 5, \
.irq = (ev_ring) + 1, \
.priority = 1, \
.mode = MHI_DB_BRST_DISABLE, \
.data_type = MHI_ER_DATA, \
.hardware_event = false, \
.client_managed = false, \
.offload_channel = false, \
}
#define MHI_EVENT_CONFIG_HW_DATA(ev_ring, ch_num) \
{ \
.num_elements = 2048, \
.irq_moderation_ms = 1, \
.irq = (ev_ring) + 1, \
.priority = 1, \
.mode = MHI_DB_BRST_DISABLE, \
.data_type = MHI_ER_DATA, \
.hardware_event = true, \
.client_managed = false, \
.offload_channel = false, \
.channel = ch_num, \
}
static const struct mhi_channel_config modem_qcom_v1_mhi_channels[] = {
MHI_CHANNEL_CONFIG_UL(4, "DIAG", 16, 1),
MHI_CHANNEL_CONFIG_DL(5, "DIAG", 16, 1),
MHI_CHANNEL_CONFIG_UL(12, "MBIM", 4, 0),
MHI_CHANNEL_CONFIG_DL(13, "MBIM", 4, 0),
MHI_CHANNEL_CONFIG_UL(14, "QMI", 4, 0),
MHI_CHANNEL_CONFIG_DL(15, "QMI", 4, 0),
MHI_CHANNEL_CONFIG_UL(20, "IPCR", 8, 0),
MHI_CHANNEL_CONFIG_DL(21, "IPCR", 8, 0),
MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 128, 2),
MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0", 128, 3),
};
static struct mhi_event_config modem_qcom_v1_mhi_events[] = {
/* first ring is control+data ring */
MHI_EVENT_CONFIG_CTRL(0),
/* DIAG dedicated event ring */
MHI_EVENT_CONFIG_DATA(1),
/* Hardware channels request dedicated hardware event rings */
MHI_EVENT_CONFIG_HW_DATA(2, 100),
MHI_EVENT_CONFIG_HW_DATA(3, 101)
};
static struct mhi_controller_config modem_qcom_v1_mhiv_config = {
.max_channels = 128,
.timeout_ms = 8000,
.num_channels = ARRAY_SIZE(modem_qcom_v1_mhi_channels),
.ch_cfg = modem_qcom_v1_mhi_channels,
.num_events = ARRAY_SIZE(modem_qcom_v1_mhi_events),
.event_cfg = modem_qcom_v1_mhi_events,
};
static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = {
.name = "qcom-sdx55m",
.fw = "qcom/sdx55m/sbl1.mbn",
.edl = "qcom/sdx55m/edl.mbn",
.config = &modem_qcom_v1_mhiv_config,
.bar_num = MHI_PCI_DEFAULT_BAR_NUM,
.dma_data_width = 32
};
static const struct pci_device_id mhi_pci_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0306),
.driver_data = (kernel_ulong_t) &mhi_qcom_sdx55_info },
{ }
};
MODULE_DEVICE_TABLE(pci, mhi_pci_id_table);
enum mhi_pci_device_status {
MHI_PCI_DEV_STARTED,
};
struct mhi_pci_device {
struct mhi_controller mhi_cntrl;
struct pci_saved_state *pci_state;
struct work_struct recovery_work;
struct timer_list health_check_timer;
unsigned long status;
};
static int mhi_pci_read_reg(struct mhi_controller *mhi_cntrl,
void __iomem *addr, u32 *out)
{
*out = readl(addr);
return 0;
}
static void mhi_pci_write_reg(struct mhi_controller *mhi_cntrl,
void __iomem *addr, u32 val)
{
writel(val, addr);
}
static void mhi_pci_status_cb(struct mhi_controller *mhi_cntrl,
enum mhi_callback cb)
{
struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
/* Nothing to do for now */
switch (cb) {
case MHI_CB_FATAL_ERROR:
case MHI_CB_SYS_ERROR:
dev_warn(&pdev->dev, "firmware crashed (%u)\n", cb);
break;
default:
break;
}
}
static bool mhi_pci_is_alive(struct mhi_controller *mhi_cntrl)
{
struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
u16 vendor = 0;
if (pci_read_config_word(pdev, PCI_VENDOR_ID, &vendor))
return false;
if (vendor == (u16) ~0 || vendor == 0)
return false;
return true;
}
static int mhi_pci_claim(struct mhi_controller *mhi_cntrl,
unsigned int bar_num, u64 dma_mask)
{
struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
int err;
err = pci_assign_resource(pdev, bar_num);
if (err)
return err;
err = pcim_enable_device(pdev);
if (err) {
dev_err(&pdev->dev, "failed to enable pci device: %d\n", err);
return err;
}
err = pcim_iomap_regions(pdev, 1 << bar_num, pci_name(pdev));
if (err) {
dev_err(&pdev->dev, "failed to map pci region: %d\n", err);
return err;
}
mhi_cntrl->regs = pcim_iomap_table(pdev)[bar_num];
err = pci_set_dma_mask(pdev, dma_mask);
if (err) {
dev_err(&pdev->dev, "Cannot set proper DMA mask\n");
return err;
}
err = pci_set_consistent_dma_mask(pdev, dma_mask);
if (err) {
dev_err(&pdev->dev, "set consistent dma mask failed\n");
return err;
}
pci_set_master(pdev);
return 0;
}
static int mhi_pci_get_irqs(struct mhi_controller *mhi_cntrl,
const struct mhi_controller_config *mhi_cntrl_config)
{
struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
int nr_vectors, i;
int *irq;
/*
* Alloc one MSI vector for BHI + one vector per event ring, ideally...
* No explicit pci_free_irq_vectors required, done by pcim_release.
*/
mhi_cntrl->nr_irqs = 1 + mhi_cntrl_config->num_events;
nr_vectors = pci_alloc_irq_vectors(pdev, 1, mhi_cntrl->nr_irqs, PCI_IRQ_MSI);
if (nr_vectors < 0) {
dev_err(&pdev->dev, "Error allocating MSI vectors %d\n",
nr_vectors);
return nr_vectors;
}
if (nr_vectors < mhi_cntrl->nr_irqs) {
dev_warn(&pdev->dev, "using shared MSI\n");
/* Patch msi vectors, use only one (shared) */
for (i = 0; i < mhi_cntrl_config->num_events; i++)
mhi_cntrl_config->event_cfg[i].irq = 0;
mhi_cntrl->nr_irqs = 1;
}
irq = devm_kcalloc(&pdev->dev, mhi_cntrl->nr_irqs, sizeof(int), GFP_KERNEL);
if (!irq)
return -ENOMEM;
for (i = 0; i < mhi_cntrl->nr_irqs; i++) {
int vector = i >= nr_vectors ? (nr_vectors - 1) : i;
irq[i] = pci_irq_vector(pdev, vector);
}
mhi_cntrl->irq = irq;
return 0;
}
static int mhi_pci_runtime_get(struct mhi_controller *mhi_cntrl)
{
/* no PM for now */
return 0;
}
static void mhi_pci_runtime_put(struct mhi_controller *mhi_cntrl)
{
/* no PM for now */
}
static void mhi_pci_recovery_work(struct work_struct *work)
{
struct mhi_pci_device *mhi_pdev = container_of(work, struct mhi_pci_device,
recovery_work);
struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
int err;
dev_warn(&pdev->dev, "device recovery started\n");
del_timer(&mhi_pdev->health_check_timer);
/* Clean up MHI state */
if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
mhi_power_down(mhi_cntrl, false);
mhi_unprepare_after_power_down(mhi_cntrl);
}
/* Check if we can recover without full reset */
pci_set_power_state(pdev, PCI_D0);
pci_load_saved_state(pdev, mhi_pdev->pci_state);
pci_restore_state(pdev);
if (!mhi_pci_is_alive(mhi_cntrl))
goto err_try_reset;
err = mhi_prepare_for_power_up(mhi_cntrl);
if (err)
goto err_try_reset;
err = mhi_sync_power_up(mhi_cntrl);
if (err)
goto err_unprepare;
dev_dbg(&pdev->dev, "Recovery completed\n");
set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
return;
err_unprepare:
mhi_unprepare_after_power_down(mhi_cntrl);
err_try_reset:
if (pci_reset_function(pdev))
dev_err(&pdev->dev, "Recovery failed\n");
}
static void health_check(struct timer_list *t)
{
struct mhi_pci_device *mhi_pdev = from_timer(mhi_pdev, t, health_check_timer);
struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
if (!mhi_pci_is_alive(mhi_cntrl)) {
dev_err(mhi_cntrl->cntrl_dev, "Device died\n");
queue_work(system_long_wq, &mhi_pdev->recovery_work);
return;
}
/* reschedule in two seconds */
mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
}
static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
const struct mhi_pci_dev_info *info = (struct mhi_pci_dev_info *) id->driver_data;
const struct mhi_controller_config *mhi_cntrl_config;
struct mhi_pci_device *mhi_pdev;
struct mhi_controller *mhi_cntrl;
int err;
dev_dbg(&pdev->dev, "MHI PCI device found: %s\n", info->name);
/* mhi_pdev.mhi_cntrl must be zero-initialized */
mhi_pdev = devm_kzalloc(&pdev->dev, sizeof(*mhi_pdev), GFP_KERNEL);
if (!mhi_pdev)
return -ENOMEM;
INIT_WORK(&mhi_pdev->recovery_work, mhi_pci_recovery_work);
timer_setup(&mhi_pdev->health_check_timer, health_check, 0);
mhi_cntrl_config = info->config;
mhi_cntrl = &mhi_pdev->mhi_cntrl;
mhi_cntrl->cntrl_dev = &pdev->dev;
mhi_cntrl->iova_start = 0;
mhi_cntrl->iova_stop = (dma_addr_t)DMA_BIT_MASK(info->dma_data_width);
mhi_cntrl->fw_image = info->fw;
mhi_cntrl->edl_image = info->edl;
mhi_cntrl->read_reg = mhi_pci_read_reg;
mhi_cntrl->write_reg = mhi_pci_write_reg;
mhi_cntrl->status_cb = mhi_pci_status_cb;
mhi_cntrl->runtime_get = mhi_pci_runtime_get;
mhi_cntrl->runtime_put = mhi_pci_runtime_put;
err = mhi_pci_claim(mhi_cntrl, info->bar_num, DMA_BIT_MASK(info->dma_data_width));
if (err)
return err;
err = mhi_pci_get_irqs(mhi_cntrl, mhi_cntrl_config);
if (err)
return err;
pci_set_drvdata(pdev, mhi_pdev);
/* Have stored pci confspace at hand for restore in sudden PCI error */
pci_save_state(pdev);
mhi_pdev->pci_state = pci_store_saved_state(pdev);
pci_enable_pcie_error_reporting(pdev);
err = mhi_register_controller(mhi_cntrl, mhi_cntrl_config);
if (err)
return err;
/* MHI bus does not power up the controller by default */
err = mhi_prepare_for_power_up(mhi_cntrl);
if (err) {
dev_err(&pdev->dev, "failed to prepare MHI controller\n");
goto err_unregister;
}
err = mhi_sync_power_up(mhi_cntrl);
if (err) {
dev_err(&pdev->dev, "failed to power up MHI controller\n");
goto err_unprepare;
}
set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
/* start health check */
mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
return 0;
err_unprepare:
mhi_unprepare_after_power_down(mhi_cntrl);
err_unregister:
mhi_unregister_controller(mhi_cntrl);
return err;
}
static void mhi_pci_remove(struct pci_dev *pdev)
{
struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
del_timer(&mhi_pdev->health_check_timer);
cancel_work_sync(&mhi_pdev->recovery_work);
if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
mhi_power_down(mhi_cntrl, true);
mhi_unprepare_after_power_down(mhi_cntrl);
}
mhi_unregister_controller(mhi_cntrl);
}
static void mhi_pci_reset_prepare(struct pci_dev *pdev)
{
struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
dev_info(&pdev->dev, "reset\n");
del_timer(&mhi_pdev->health_check_timer);
/* Clean up MHI state */
if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
mhi_power_down(mhi_cntrl, false);
mhi_unprepare_after_power_down(mhi_cntrl);
}
/* cause internal device reset */
mhi_soc_reset(mhi_cntrl);
/* Be sure device reset has been executed */
msleep(MHI_POST_RESET_DELAY_MS);
}
static void mhi_pci_reset_done(struct pci_dev *pdev)
{
struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
int err;
/* Restore initial known working PCI state */
pci_load_saved_state(pdev, mhi_pdev->pci_state);
pci_restore_state(pdev);
/* Is device status available ? */
if (!mhi_pci_is_alive(mhi_cntrl)) {
dev_err(&pdev->dev, "reset failed\n");
return;
}
err = mhi_prepare_for_power_up(mhi_cntrl);
if (err) {
dev_err(&pdev->dev, "failed to prepare MHI controller\n");
return;
}
err = mhi_sync_power_up(mhi_cntrl);
if (err) {
dev_err(&pdev->dev, "failed to power up MHI controller\n");
mhi_unprepare_after_power_down(mhi_cntrl);
return;
}
set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
}
static pci_ers_result_t mhi_pci_error_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
dev_err(&pdev->dev, "PCI error detected, state = %u\n", state);
if (state == pci_channel_io_perm_failure)
return PCI_ERS_RESULT_DISCONNECT;
/* Clean up MHI state */
if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
mhi_power_down(mhi_cntrl, false);
mhi_unprepare_after_power_down(mhi_cntrl);
} else {
/* Nothing to do */
return PCI_ERS_RESULT_RECOVERED;
}
pci_disable_device(pdev);
return PCI_ERS_RESULT_NEED_RESET;
}
static pci_ers_result_t mhi_pci_slot_reset(struct pci_dev *pdev)
{
if (pci_enable_device(pdev)) {
dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n");
return PCI_ERS_RESULT_DISCONNECT;
}
return PCI_ERS_RESULT_RECOVERED;
}
static void mhi_pci_io_resume(struct pci_dev *pdev)
{
struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
dev_err(&pdev->dev, "PCI slot reset done\n");
queue_work(system_long_wq, &mhi_pdev->recovery_work);
}
static const struct pci_error_handlers mhi_pci_err_handler = {
.error_detected = mhi_pci_error_detected,
.slot_reset = mhi_pci_slot_reset,
.resume = mhi_pci_io_resume,
.reset_prepare = mhi_pci_reset_prepare,
.reset_done = mhi_pci_reset_done,
};
static int __maybe_unused mhi_pci_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
del_timer(&mhi_pdev->health_check_timer);
cancel_work_sync(&mhi_pdev->recovery_work);
/* Transition to M3 state */
mhi_pm_suspend(mhi_cntrl);
pci_save_state(pdev);
pci_disable_device(pdev);
pci_wake_from_d3(pdev, true);
pci_set_power_state(pdev, PCI_D3hot);
return 0;
}
static int __maybe_unused mhi_pci_resume(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
int err;
pci_set_power_state(pdev, PCI_D0);
pci_restore_state(pdev);
pci_set_master(pdev);
err = pci_enable_device(pdev);
if (err)
goto err_recovery;
/* Exit M3, transition to M0 state */
err = mhi_pm_resume(mhi_cntrl);
if (err) {
dev_err(&pdev->dev, "failed to resume device: %d\n", err);
goto err_recovery;
}
/* Resume health check */
mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
return 0;
err_recovery:
/* The device may have loose power or crashed, try recovering it */
queue_work(system_long_wq, &mhi_pdev->recovery_work);
return err;
}
static const struct dev_pm_ops mhi_pci_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(mhi_pci_suspend, mhi_pci_resume)
};
static struct pci_driver mhi_pci_driver = {
.name = "mhi-pci-generic",
.id_table = mhi_pci_id_table,
.probe = mhi_pci_probe,
.remove = mhi_pci_remove,
.err_handler = &mhi_pci_err_handler,
.driver.pm = &mhi_pci_pm_ops
};
module_pci_driver(mhi_pci_driver);
MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
MODULE_DESCRIPTION("Modem Host Interface (MHI) PCI controller driver");
MODULE_LICENSE("GPL");