From 319ec8b3a5633bfbb83d7da895233c91827e86ce Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Fri, 18 Aug 2017 14:59:26 +0200 Subject: [PATCH 01/53] irqchip/armada-370-xp: Enable MSI-X support Armada XP does not only support MSI, but also MSI-X. This patch sets the MSI_FLAG_PCI_MSIX flag in the interrupt controller driver which is the only change necessary to enable MSI-X support on this SoC. As the Linux PCI MSI-X infrastructure takes care of writing the data and address structures into the BAR specified by the MSI-X controller. Signed-off-by: Stefan Roese Reviewed-by: Thomas Petazzoni Cc: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: Bjorn Helgaas Cc: Gregory CLEMENT Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-armada-370-xp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index b207b2c3aa55..9d42a8f2aec2 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -203,7 +203,7 @@ static struct irq_chip armada_370_xp_msi_irq_chip = { static struct msi_domain_info armada_370_xp_msi_domain_info = { .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_MULTI_PCI_MSI), + MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX), .chip = &armada_370_xp_msi_irq_chip, }; From 9bdd8b1cdeb6a873acb1d1e915d372e3440a4179 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 19 Aug 2017 10:16:02 +0100 Subject: [PATCH 02/53] irqchip/gic-v3-its: Properly handle command queue wrapping wait_for_range_completion() is nicely busted when handling wrapping of the command queue, leading to an early exit instead of waiting for the command to have been executed. Fortunately, the impact is pretty minor, as it only impair the detection of an ITS that doesn't make any forward progress for a whole second. And an ITS should *never* lock up. Reported-by: Yang Yingliang Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 68932873eebc..350a959da6dd 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -453,7 +453,13 @@ static void its_wait_for_range_completion(struct its_node *its, while (1) { rd_idx = readl_relaxed(its->base + GITS_CREADR); - if (rd_idx >= to_idx || rd_idx < from_idx) + + /* Direct case */ + if (from_idx < to_idx && rd_idx >= to_idx) + break; + + /* Wrapped case */ + if (from_idx >= to_idx && rd_idx >= to_idx && rd_idx < from_idx) break; count--; From 5ed34d3a4387c8967801688f66b90ce0c7facda0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 23 Aug 2017 10:31:47 +0900 Subject: [PATCH 03/53] irqchip: Add UniPhier AIDET irqchip driver UniPhier SoCs contain AIDET (ARM Interrupt Detector). This is intended to provide additional features that are not covered by GIC. The main purpose is to provide logic inverter to support low level and falling edge trigger types for interrupt lines from on-board devices. Acked-by: Rob Herring Signed-off-by: Masahiro Yamada Signed-off-by: Marc Zyngier --- .../socionext,uniphier-aidet.txt | 32 +++ MAINTAINERS | 1 + drivers/irqchip/Kconfig | 8 + drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-uniphier-aidet.c | 261 ++++++++++++++++++ 5 files changed, 303 insertions(+) create mode 100644 Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.txt create mode 100644 drivers/irqchip/irq-uniphier-aidet.c diff --git a/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.txt b/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.txt new file mode 100644 index 000000000000..48e71d3ac2ad --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.txt @@ -0,0 +1,32 @@ +UniPhier AIDET + +UniPhier AIDET (ARM Interrupt Detector) is an add-on block for ARM GIC (Generic +Interrupt Controller). GIC itself can handle only high level and rising edge +interrupts. The AIDET provides logic inverter to support low level and falling +edge interrupts. + +Required properties: +- compatible: Should be one of the following: + "socionext,uniphier-ld4-aidet" - for LD4 SoC + "socionext,uniphier-pro4-aidet" - for Pro4 SoC + "socionext,uniphier-sld8-aidet" - for sLD8 SoC + "socionext,uniphier-pro5-aidet" - for Pro5 SoC + "socionext,uniphier-pxs2-aidet" - for PXs2/LD6b SoC + "socionext,uniphier-ld11-aidet" - for LD11 SoC + "socionext,uniphier-ld20-aidet" - for LD20 SoC + "socionext,uniphier-pxs3-aidet" - for PXs3 SoC +- reg: Specifies offset and length of the register set for the device. +- interrupt-controller: Identifies the node as an interrupt controller +- #interrupt-cells : Specifies the number of cells needed to encode an interrupt + source. The value should be 2. The first cell defines the interrupt number + (corresponds to the SPI interrupt number of GIC). The second cell specifies + the trigger type as defined in interrupts.txt in this directory. + +Example: + + aidet: aidet@5fc20000 { + compatible = "socionext,uniphier-pro4-aidet"; + reg = <0x5fc20000 0x200>; + interrupt-controller; + #interrupt-cells = <2>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 6f7721d1634c..d55eed96a22f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1993,6 +1993,7 @@ F: arch/arm64/boot/dts/socionext/ F: drivers/bus/uniphier-system-bus.c F: drivers/clk/uniphier/ F: drivers/i2c/busses/i2c-uniphier* +F: drivers/irqchip/irq-uniphier-aidet.c F: drivers/pinctrl/uniphier/ F: drivers/reset/reset-uniphier.c F: drivers/tty/serial/8250/8250_uniphier.c diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index f1fd5f44d1d4..d1f43cb92e4d 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -306,3 +306,11 @@ config QCOM_IRQ_COMBINER help Say yes here to add support for the IRQ combiner devices embedded in Qualcomm Technologies chips. + +config IRQ_UNIPHIER_AIDET + bool "UniPhier AIDET support" if COMPILE_TEST + depends on ARCH_UNIPHIER || COMPILE_TEST + default ARCH_UNIPHIER + select IRQ_DOMAIN_HIERARCHY + help + Support for the UniPhier AIDET (ARM Interrupt Detector). diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index e88d856cc09c..2c630574986f 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -78,3 +78,4 @@ obj-$(CONFIG_EZNPS_GIC) += irq-eznps.o obj-$(CONFIG_ARCH_ASPEED) += irq-aspeed-vic.o irq-aspeed-i2c-ic.o obj-$(CONFIG_STM32_EXTI) += irq-stm32-exti.o obj-$(CONFIG_QCOM_IRQ_COMBINER) += qcom-irq-combiner.o +obj-$(CONFIG_IRQ_UNIPHIER_AIDET) += irq-uniphier-aidet.o diff --git a/drivers/irqchip/irq-uniphier-aidet.c b/drivers/irqchip/irq-uniphier-aidet.c new file mode 100644 index 000000000000..7ba7f253470e --- /dev/null +++ b/drivers/irqchip/irq-uniphier-aidet.c @@ -0,0 +1,261 @@ +/* + * Driver for UniPhier AIDET (ARM Interrupt Detector) + * + * Copyright (C) 2017 Socionext Inc. + * Author: Masahiro Yamada + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define UNIPHIER_AIDET_NR_IRQS 256 + +#define UNIPHIER_AIDET_DETCONF 0x04 /* inverter register base */ + +struct uniphier_aidet_priv { + struct irq_domain *domain; + void __iomem *reg_base; + spinlock_t lock; + u32 saved_vals[UNIPHIER_AIDET_NR_IRQS / 32]; +}; + +static void uniphier_aidet_reg_update(struct uniphier_aidet_priv *priv, + unsigned int reg, u32 mask, u32 val) +{ + unsigned long flags; + u32 tmp; + + spin_lock_irqsave(&priv->lock, flags); + tmp = readl_relaxed(priv->reg_base + reg); + tmp &= ~mask; + tmp |= mask & val; + writel_relaxed(tmp, priv->reg_base + reg); + spin_unlock_irqrestore(&priv->lock, flags); +} + +static void uniphier_aidet_detconf_update(struct uniphier_aidet_priv *priv, + unsigned long index, unsigned int val) +{ + unsigned int reg; + u32 mask; + + reg = UNIPHIER_AIDET_DETCONF + index / 32 * 4; + mask = BIT(index % 32); + + uniphier_aidet_reg_update(priv, reg, mask, val ? mask : 0); +} + +static int uniphier_aidet_irq_set_type(struct irq_data *data, unsigned int type) +{ + struct uniphier_aidet_priv *priv = data->chip_data; + unsigned int val; + + /* enable inverter for active low triggers */ + switch (type) { + case IRQ_TYPE_EDGE_RISING: + case IRQ_TYPE_LEVEL_HIGH: + val = 0; + break; + case IRQ_TYPE_EDGE_FALLING: + val = 1; + type = IRQ_TYPE_EDGE_RISING; + break; + case IRQ_TYPE_LEVEL_LOW: + val = 1; + type = IRQ_TYPE_LEVEL_HIGH; + break; + default: + return -EINVAL; + } + + uniphier_aidet_detconf_update(priv, data->hwirq, val); + + return irq_chip_set_type_parent(data, type); +} + +static struct irq_chip uniphier_aidet_irq_chip = { + .name = "AIDET", + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_affinity = irq_chip_set_affinity_parent, + .irq_set_type = uniphier_aidet_irq_set_type, +}; + +static int uniphier_aidet_domain_translate(struct irq_domain *domain, + struct irq_fwspec *fwspec, + unsigned long *out_hwirq, + unsigned int *out_type) +{ + if (WARN_ON(fwspec->param_count < 2)) + return -EINVAL; + + *out_hwirq = fwspec->param[0]; + *out_type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK; + + return 0; +} + +static int uniphier_aidet_domain_alloc(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs, + void *arg) +{ + struct irq_fwspec parent_fwspec; + irq_hw_number_t hwirq; + unsigned int type; + int ret; + + if (nr_irqs != 1) + return -EINVAL; + + ret = uniphier_aidet_domain_translate(domain, arg, &hwirq, &type); + if (ret) + return ret; + + switch (type) { + case IRQ_TYPE_EDGE_RISING: + case IRQ_TYPE_LEVEL_HIGH: + break; + case IRQ_TYPE_EDGE_FALLING: + type = IRQ_TYPE_EDGE_RISING; + break; + case IRQ_TYPE_LEVEL_LOW: + type = IRQ_TYPE_LEVEL_HIGH; + break; + default: + return -EINVAL; + } + + if (hwirq >= UNIPHIER_AIDET_NR_IRQS) + return -ENXIO; + + ret = irq_domain_set_hwirq_and_chip(domain, virq, hwirq, + &uniphier_aidet_irq_chip, + domain->host_data); + if (ret) + return ret; + + /* parent is GIC */ + parent_fwspec.fwnode = domain->parent->fwnode; + parent_fwspec.param_count = 3; + parent_fwspec.param[0] = 0; /* SPI */ + parent_fwspec.param[1] = hwirq; + parent_fwspec.param[2] = type; + + return irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec); +} + +static const struct irq_domain_ops uniphier_aidet_domain_ops = { + .alloc = uniphier_aidet_domain_alloc, + .free = irq_domain_free_irqs_common, + .translate = uniphier_aidet_domain_translate, +}; + +static int uniphier_aidet_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *parent_np; + struct irq_domain *parent_domain; + struct uniphier_aidet_priv *priv; + struct resource *res; + + parent_np = of_irq_find_parent(dev->of_node); + if (!parent_np) + return -ENXIO; + + parent_domain = irq_find_host(parent_np); + of_node_put(parent_np); + if (!parent_domain) + return -EPROBE_DEFER; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + priv->reg_base = devm_ioremap_resource(dev, res); + if (IS_ERR(priv->reg_base)) + return PTR_ERR(priv->reg_base); + + spin_lock_init(&priv->lock); + + priv->domain = irq_domain_create_hierarchy( + parent_domain, 0, + UNIPHIER_AIDET_NR_IRQS, + of_node_to_fwnode(dev->of_node), + &uniphier_aidet_domain_ops, priv); + if (!priv->domain) + return -ENOMEM; + + platform_set_drvdata(pdev, priv); + + return 0; +} + +static int __maybe_unused uniphier_aidet_suspend(struct device *dev) +{ + struct uniphier_aidet_priv *priv = dev_get_drvdata(dev); + int i; + + for (i = 0; i < ARRAY_SIZE(priv->saved_vals); i++) + priv->saved_vals[i] = readl_relaxed( + priv->reg_base + UNIPHIER_AIDET_DETCONF + i * 4); + + return 0; +} + +static int __maybe_unused uniphier_aidet_resume(struct device *dev) +{ + struct uniphier_aidet_priv *priv = dev_get_drvdata(dev); + int i; + + for (i = 0; i < ARRAY_SIZE(priv->saved_vals); i++) + writel_relaxed(priv->saved_vals[i], + priv->reg_base + UNIPHIER_AIDET_DETCONF + i * 4); + + return 0; +} + +static const struct dev_pm_ops uniphier_aidet_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(uniphier_aidet_suspend, + uniphier_aidet_resume) +}; + +static const struct of_device_id uniphier_aidet_match[] = { + { .compatible = "socionext,uniphier-ld4-aidet" }, + { .compatible = "socionext,uniphier-pro4-aidet" }, + { .compatible = "socionext,uniphier-sld8-aidet" }, + { .compatible = "socionext,uniphier-pro5-aidet" }, + { .compatible = "socionext,uniphier-pxs2-aidet" }, + { .compatible = "socionext,uniphier-ld11-aidet" }, + { .compatible = "socionext,uniphier-ld20-aidet" }, + { .compatible = "socionext,uniphier-pxs3-aidet" }, + { /* sentinel */ } +}; + +static struct platform_driver uniphier_aidet_driver = { + .probe = uniphier_aidet_probe, + .driver = { + .name = "uniphier-aidet", + .of_match_table = uniphier_aidet_match, + .pm = &uniphier_aidet_pm_ops, + }, +}; +builtin_platform_driver(uniphier_aidet_driver); From e81f54c668d89e50bad38f3fc4c5ea6e4be3a96e Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Jul 2017 16:43:10 -0500 Subject: [PATCH 04/53] irqchip: Convert to using %pOF instead of full_name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have a custom printf format specifier, convert users of full_name to use %pOF instead. This is preparation to remove storing of the full path string for each node. Cc: Thomas Gleixner Cc: Jason Cooper Cc: Lee Jones Cc: Stefan Wahren Cc: Florian Fainelli Cc: Ray Jui Cc: Scott Branden Cc: bcm-kernel-feedback-list@broadcom.com Cc: Sylvain Lemieux Cc: Maxime Coquelin Cc: Chen-Yu Tsai Cc: Thierry Reding Cc: Jonathan Hunter Cc: Michal Simek Cc: "Sören Brinkmann" Cc: linux-rpi-kernel@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mediatek@lists.infradead.org Cc: linux-tegra@vger.kernel.org Acked-by: Eric Anholt Acked-by: Baruch Siach Acked-by: Vladimir Zapolskiy Acked-by: Matthias Brugger Acked-by: Alexandre Torgue Acked-by: Maxime Ripard Signed-off-by: Rob Herring Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-bcm2835.c | 9 ++++----- drivers/irqchip/irq-bcm2836.c | 5 ++--- drivers/irqchip/irq-crossbar.c | 6 +++--- drivers/irqchip/irq-digicolor.c | 8 ++++---- drivers/irqchip/irq-dw-apb-ictl.c | 12 ++++++------ drivers/irqchip/irq-gic-v3-its-pci-msi.c | 2 +- drivers/irqchip/irq-gic-v3-its.c | 6 +++--- drivers/irqchip/irq-gic-v3.c | 11 ++++------- drivers/irqchip/irq-imx-gpcv2.c | 4 ++-- drivers/irqchip/irq-lpc32xx.c | 2 +- drivers/irqchip/irq-mtk-sysirq.c | 3 +-- drivers/irqchip/irq-mxs.c | 4 ++-- drivers/irqchip/irq-stm32-exti.c | 8 ++++---- drivers/irqchip/irq-sun4i.c | 6 +++--- drivers/irqchip/irq-tegra.c | 16 ++++++++-------- drivers/irqchip/irq-xilinx-intc.c | 4 ++-- 16 files changed, 50 insertions(+), 56 deletions(-) diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c index 44d7c38dde47..d2da8a1e6b1b 100644 --- a/drivers/irqchip/irq-bcm2835.c +++ b/drivers/irqchip/irq-bcm2835.c @@ -147,13 +147,12 @@ static int __init armctrl_of_init(struct device_node *node, base = of_iomap(node, 0); if (!base) - panic("%s: unable to map IC registers\n", - node->full_name); + panic("%pOF: unable to map IC registers\n", node); intc.domain = irq_domain_add_linear(node, MAKE_HWIRQ(NR_BANKS, 0), &armctrl_ops, NULL); if (!intc.domain) - panic("%s: unable to create IRQ domain\n", node->full_name); + panic("%pOF: unable to create IRQ domain\n", node); for (b = 0; b < NR_BANKS; b++) { intc.pending[b] = base + reg_pending[b]; @@ -173,8 +172,8 @@ static int __init armctrl_of_init(struct device_node *node, int parent_irq = irq_of_parse_and_map(node, 0); if (!parent_irq) { - panic("%s: unable to get parent interrupt.\n", - node->full_name); + panic("%pOF: unable to get parent interrupt.\n", + node); } irq_set_chained_handler(parent_irq, bcm2836_chained_handle_irq); } else { diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c index e7463e3c0814..dc8c1e3eafe7 100644 --- a/drivers/irqchip/irq-bcm2836.c +++ b/drivers/irqchip/irq-bcm2836.c @@ -282,8 +282,7 @@ static int __init bcm2836_arm_irqchip_l1_intc_of_init(struct device_node *node, { intc.base = of_iomap(node, 0); if (!intc.base) { - panic("%s: unable to map local interrupt registers\n", - node->full_name); + panic("%pOF: unable to map local interrupt registers\n", node); } bcm2835_init_local_timer_frequency(); @@ -292,7 +291,7 @@ static int __init bcm2836_arm_irqchip_l1_intc_of_init(struct device_node *node, &bcm2836_arm_irqchip_intc_ops, NULL); if (!intc.domain) - panic("%s: unable to create IRQ domain\n", node->full_name); + panic("%pOF: unable to create IRQ domain\n", node); bcm2836_arm_irqchip_register_irq(LOCAL_IRQ_CNTPSIRQ, &bcm2836_arm_irqchip_timer); diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index f96601268f71..99d97d7e3fd7 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -341,13 +341,13 @@ static int __init irqcrossbar_init(struct device_node *node, int err; if (!parent) { - pr_err("%s: no parent, giving up\n", node->full_name); + pr_err("%pOF: no parent, giving up\n", node); return -ENODEV; } parent_domain = irq_find_host(parent); if (!parent_domain) { - pr_err("%s: unable to obtain parent domain\n", node->full_name); + pr_err("%pOF: unable to obtain parent domain\n", node); return -ENXIO; } @@ -360,7 +360,7 @@ static int __init irqcrossbar_init(struct device_node *node, node, &crossbar_domain_ops, NULL); if (!domain) { - pr_err("%s: failed to allocated domain\n", node->full_name); + pr_err("%pOF: failed to allocated domain\n", node); return -ENOMEM; } diff --git a/drivers/irqchip/irq-digicolor.c b/drivers/irqchip/irq-digicolor.c index 3aae015469a5..fc38d2da11b9 100644 --- a/drivers/irqchip/irq-digicolor.c +++ b/drivers/irqchip/irq-digicolor.c @@ -78,7 +78,7 @@ static int __init digicolor_of_init(struct device_node *node, reg_base = of_iomap(node, 0); if (!reg_base) { - pr_err("%s: unable to map IC registers\n", node->full_name); + pr_err("%pOF: unable to map IC registers\n", node); return -ENXIO; } @@ -88,7 +88,7 @@ static int __init digicolor_of_init(struct device_node *node, ucregs = syscon_regmap_lookup_by_phandle(node, "syscon"); if (IS_ERR(ucregs)) { - pr_err("%s: unable to map UC registers\n", node->full_name); + pr_err("%pOF: unable to map UC registers\n", node); return PTR_ERR(ucregs); } /* channel 1, regular IRQs */ @@ -97,7 +97,7 @@ static int __init digicolor_of_init(struct device_node *node, digicolor_irq_domain = irq_domain_add_linear(node, 64, &irq_generic_chip_ops, NULL); if (!digicolor_irq_domain) { - pr_err("%s: unable to create IRQ domain\n", node->full_name); + pr_err("%pOF: unable to create IRQ domain\n", node); return -ENOMEM; } @@ -105,7 +105,7 @@ static int __init digicolor_of_init(struct device_node *node, "digicolor_irq", handle_level_irq, clr, 0, 0); if (ret) { - pr_err("%s: unable to allocate IRQ gc\n", node->full_name); + pr_err("%pOF: unable to allocate IRQ gc\n", node); return ret; } diff --git a/drivers/irqchip/irq-dw-apb-ictl.c b/drivers/irqchip/irq-dw-apb-ictl.c index 052f266364c0..0a19618ce2c8 100644 --- a/drivers/irqchip/irq-dw-apb-ictl.c +++ b/drivers/irqchip/irq-dw-apb-ictl.c @@ -79,24 +79,24 @@ static int __init dw_apb_ictl_init(struct device_node *np, /* Map the parent interrupt for the chained handler */ irq = irq_of_parse_and_map(np, 0); if (irq <= 0) { - pr_err("%s: unable to parse irq\n", np->full_name); + pr_err("%pOF: unable to parse irq\n", np); return -EINVAL; } ret = of_address_to_resource(np, 0, &r); if (ret) { - pr_err("%s: unable to get resource\n", np->full_name); + pr_err("%pOF: unable to get resource\n", np); return ret; } if (!request_mem_region(r.start, resource_size(&r), np->full_name)) { - pr_err("%s: unable to request mem region\n", np->full_name); + pr_err("%pOF: unable to request mem region\n", np); return -ENOMEM; } iobase = ioremap(r.start, resource_size(&r)); if (!iobase) { - pr_err("%s: unable to map resource\n", np->full_name); + pr_err("%pOF: unable to map resource\n", np); ret = -ENOMEM; goto err_release; } @@ -123,7 +123,7 @@ static int __init dw_apb_ictl_init(struct device_node *np, domain = irq_domain_add_linear(np, nrirqs, &irq_generic_chip_ops, NULL); if (!domain) { - pr_err("%s: unable to add irq domain\n", np->full_name); + pr_err("%pOF: unable to add irq domain\n", np); ret = -ENOMEM; goto err_unmap; } @@ -132,7 +132,7 @@ static int __init dw_apb_ictl_init(struct device_node *np, handle_level_irq, clr, 0, IRQ_GC_INIT_MASK_CACHE); if (ret) { - pr_err("%s: unable to alloc irq domain gc\n", np->full_name); + pr_err("%pOF: unable to alloc irq domain gc\n", np); goto err_unmap; } diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c index 77931214d954..14a8c0a7e095 100644 --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c @@ -138,7 +138,7 @@ static int __init its_pci_of_msi_init(void) if (its_pci_msi_init_one(of_node_to_fwnode(np), np->full_name)) continue; - pr_info("PCI/MSI: %s domain created\n", np->full_name); + pr_info("PCI/MSI: %pOF domain created\n", np); } return 0; diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 350a959da6dd..b93bd5a91e92 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1822,13 +1822,13 @@ static int __init its_of_probe(struct device_node *node) for (np = of_find_matching_node(node, its_device_id); np; np = of_find_matching_node(np, its_device_id)) { if (!of_property_read_bool(np, "msi-controller")) { - pr_warn("%s: no msi-controller property, ITS ignored\n", - np->full_name); + pr_warn("%pOF: no msi-controller property, ITS ignored\n", + np); continue; } if (of_address_to_resource(np, 0, &res)) { - pr_warn("%s: no regs?\n", np->full_name); + pr_warn("%pOF: no regs?\n", np); continue; } diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index dbffb7ab6203..afae0a9d0df9 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1057,7 +1057,7 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node) if (WARN_ON(cpu == -1)) continue; - pr_cont("%s[%d] ", cpu_node->full_name, cpu); + pr_cont("%pOF[%d] ", cpu_node, cpu); cpumask_set_cpu(cpu, &part->mask); } @@ -1125,15 +1125,13 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare dist_base = of_iomap(node, 0); if (!dist_base) { - pr_err("%s: unable to map gic dist registers\n", - node->full_name); + pr_err("%pOF: unable to map gic dist registers\n", node); return -ENXIO; } err = gic_validate_dist_version(dist_base); if (err) { - pr_err("%s: no distributor detected, giving up\n", - node->full_name); + pr_err("%pOF: no distributor detected, giving up\n", node); goto out_unmap_dist; } @@ -1153,8 +1151,7 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare ret = of_address_to_resource(node, 1 + i, &res); rdist_regs[i].redist_base = of_iomap(node, 1 + i); if (ret || !rdist_regs[i].redist_base) { - pr_err("%s: couldn't map region %d\n", - node->full_name, i); + pr_err("%pOF: couldn't map region %d\n", node, i); err = -ENODEV; goto out_unmap_rdist; } diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c index bb36f572e322..675eda5ff2b8 100644 --- a/drivers/irqchip/irq-imx-gpcv2.c +++ b/drivers/irqchip/irq-imx-gpcv2.c @@ -214,13 +214,13 @@ static int __init imx_gpcv2_irqchip_init(struct device_node *node, int i; if (!parent) { - pr_err("%s: no parent, giving up\n", node->full_name); + pr_err("%pOF: no parent, giving up\n", node); return -ENODEV; } parent_domain = irq_find_host(parent); if (!parent_domain) { - pr_err("%s: unable to get parent domain\n", node->full_name); + pr_err("%pOF: unable to get parent domain\n", node); return -ENXIO; } diff --git a/drivers/irqchip/irq-lpc32xx.c b/drivers/irqchip/irq-lpc32xx.c index 1034aeb2e98a..a48357d369b5 100644 --- a/drivers/irqchip/irq-lpc32xx.c +++ b/drivers/irqchip/irq-lpc32xx.c @@ -191,7 +191,7 @@ static int __init lpc32xx_of_ic_init(struct device_node *node, irqc->base = of_iomap(node, 0); if (!irqc->base) { - pr_err("%s: unable to map registers\n", node->full_name); + pr_err("%pOF: unable to map registers\n", node); kfree(irqc); return -EINVAL; } diff --git a/drivers/irqchip/irq-mtk-sysirq.c b/drivers/irqchip/irq-mtk-sysirq.c index eeac512ec5a8..90aaf190157f 100644 --- a/drivers/irqchip/irq-mtk-sysirq.c +++ b/drivers/irqchip/irq-mtk-sysirq.c @@ -178,8 +178,7 @@ static int __init mtk_sysirq_of_init(struct device_node *node, chip_data->intpol_words[i] = size / 4; chip_data->intpol_bases[i] = of_iomap(node, i); if (ret || !chip_data->intpol_bases[i]) { - pr_err("%s: couldn't map region %d\n", - node->full_name, i); + pr_err("%pOF: couldn't map region %d\n", node, i); ret = -ENODEV; goto out_free_intpol; } diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c index 05fa9f7af53c..e8b31f52e071 100644 --- a/drivers/irqchip/irq-mxs.c +++ b/drivers/irqchip/irq-mxs.c @@ -179,7 +179,7 @@ static void __init icoll_add_domain(struct device_node *np, &icoll_irq_domain_ops, NULL); if (!icoll_domain) - panic("%s: unable to create irq domain", np->full_name); + panic("%pOF: unable to create irq domain", np); } static void __iomem * __init icoll_init_iobase(struct device_node *np) @@ -188,7 +188,7 @@ static void __iomem * __init icoll_init_iobase(struct device_node *np) icoll_base = of_io_request_and_map(np, 0, np->name); if (IS_ERR(icoll_base)) - panic("%s: unable to map resource", np->full_name); + panic("%pOF: unable to map resource", np); return icoll_base; } diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index 491568c95aa5..45363ff8d06f 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -140,7 +140,7 @@ static int __init stm32_exti_init(struct device_node *node, base = of_iomap(node, 0); if (!base) { - pr_err("%s: Unable to map registers\n", node->full_name); + pr_err("%pOF: Unable to map registers\n", node); return -ENOMEM; } @@ -149,7 +149,7 @@ static int __init stm32_exti_init(struct device_node *node, nr_exti = fls(readl_relaxed(base + EXTI_RTSR)); writel_relaxed(0, base + EXTI_RTSR); - pr_info("%s: %d External IRQs detected\n", node->full_name, nr_exti); + pr_info("%pOF: %d External IRQs detected\n", node, nr_exti); domain = irq_domain_add_linear(node, nr_exti, &irq_exti_domain_ops, NULL); @@ -163,8 +163,8 @@ static int __init stm32_exti_init(struct device_node *node, ret = irq_alloc_domain_generic_chips(domain, nr_exti, 1, "exti", handle_edge_irq, clr, 0, 0); if (ret) { - pr_err("%s: Could not allocate generic interrupt chip.\n", - node->full_name); + pr_err("%pOF: Could not allocate generic interrupt chip.\n", + node); goto out_free_domain; } diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c index 376b28074e0d..e3e5b9132b75 100644 --- a/drivers/irqchip/irq-sun4i.c +++ b/drivers/irqchip/irq-sun4i.c @@ -97,8 +97,8 @@ static int __init sun4i_of_init(struct device_node *node, { sun4i_irq_base = of_iomap(node, 0); if (!sun4i_irq_base) - panic("%s: unable to map IC registers\n", - node->full_name); + panic("%pOF: unable to map IC registers\n", + node); /* Disable all interrupts */ writel(0, sun4i_irq_base + SUN4I_IRQ_ENABLE_REG(0)); @@ -124,7 +124,7 @@ static int __init sun4i_of_init(struct device_node *node, sun4i_irq_domain = irq_domain_add_linear(node, 3 * 32, &sun4i_irq_ops, NULL); if (!sun4i_irq_domain) - panic("%s: unable to create IRQ domain\n", node->full_name); + panic("%pOF: unable to create IRQ domain\n", node); set_handle_irq(sun4i_handle_irq); diff --git a/drivers/irqchip/irq-tegra.c b/drivers/irqchip/irq-tegra.c index 3973a14bb15b..0abc0cd1c32e 100644 --- a/drivers/irqchip/irq-tegra.c +++ b/drivers/irqchip/irq-tegra.c @@ -291,13 +291,13 @@ static int __init tegra_ictlr_init(struct device_node *node, int err; if (!parent) { - pr_err("%s: no parent, giving up\n", node->full_name); + pr_err("%pOF: no parent, giving up\n", node); return -ENODEV; } parent_domain = irq_find_host(parent); if (!parent_domain) { - pr_err("%s: unable to obtain parent domain\n", node->full_name); + pr_err("%pOF: unable to obtain parent domain\n", node); return -ENXIO; } @@ -329,29 +329,29 @@ static int __init tegra_ictlr_init(struct device_node *node, } if (!num_ictlrs) { - pr_err("%s: no valid regions, giving up\n", node->full_name); + pr_err("%pOF: no valid regions, giving up\n", node); err = -ENOMEM; goto out_free; } WARN(num_ictlrs != soc->num_ictlrs, - "%s: Found %u interrupt controllers in DT; expected %u.\n", - node->full_name, num_ictlrs, soc->num_ictlrs); + "%pOF: Found %u interrupt controllers in DT; expected %u.\n", + node, num_ictlrs, soc->num_ictlrs); domain = irq_domain_add_hierarchy(parent_domain, 0, num_ictlrs * 32, node, &tegra_ictlr_domain_ops, lic); if (!domain) { - pr_err("%s: failed to allocated domain\n", node->full_name); + pr_err("%pOF: failed to allocated domain\n", node); err = -ENOMEM; goto out_unmap; } tegra_ictlr_syscore_init(); - pr_info("%s: %d interrupts forwarded to %s\n", - node->full_name, num_ictlrs * 32, parent->full_name); + pr_info("%pOF: %d interrupts forwarded to %pOF\n", + node, num_ictlrs * 32, parent); return 0; diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c index 3db7ab1c9741..e3043ded8973 100644 --- a/drivers/irqchip/irq-xilinx-intc.c +++ b/drivers/irqchip/irq-xilinx-intc.c @@ -186,8 +186,8 @@ static int __init xilinx_intc_of_init(struct device_node *intc, if (irqc->intr_mask >> nr_irq) pr_warn("irq-xilinx: mismatch in kind-of-intr param\n"); - pr_info("irq-xilinx: %s: num_irq=%d, edge=0x%x\n", - intc->full_name, nr_irq, irqc->intr_mask); + pr_info("irq-xilinx: %pOF: num_irq=%d, edge=0x%x\n", + intc, nr_irq, irqc->intr_mask); /* From 0abce64a55ae44d39b92f8e672736f4f324e610f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 23 Jun 2017 21:42:57 +0100 Subject: [PATCH 05/53] genirq: Let irq_set_vcpu_affinity() iterate over hierarchy When assigning an interrupt to a vcpu, it is not unlikely that the level of the hierarchy implementing irq_set_vcpu_affinity is not the top level (think a generic MSI domain on top of a virtualization aware interrupt controller). In such a case, let's iterate over the hierarchy until we find an irqchip implementing it. Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- kernel/irq/manage.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1d1a5b945ab4..573dc52b0806 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -400,8 +400,18 @@ int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info) return -EINVAL; data = irq_desc_get_irq_data(desc); - chip = irq_data_get_irq_chip(data); - if (chip && chip->irq_set_vcpu_affinity) + do { + chip = irq_data_get_irq_chip(data); + if (chip && chip->irq_set_vcpu_affinity) + break; +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + data = data->parent_data; +#else + data = NULL; +#endif + } while (data); + + if (data) ret = chip->irq_set_vcpu_affinity(data, vcpu_info); irq_put_desc_unlock(desc, flags); From 0d94ded2689dd836f12ba1c64520377549facbde Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 17:00:38 +0000 Subject: [PATCH 06/53] irqchip/gic-v3: Add redistributor iterator In order to discover the VLPI properties, we need to iterate over the redistributor regions. As we already have code that does this, let's factor it out and make it slightly more generic. Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3.c | 69 ++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index afae0a9d0df9..fba5f668be59 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -421,24 +421,14 @@ static void __init gic_dist_init(void) gic_write_irouter(affinity, base + GICD_IROUTER + i * 8); } -static int gic_populate_rdist(void) +static int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *)) { - unsigned long mpidr = cpu_logical_map(smp_processor_id()); - u64 typer; - u32 aff; + int ret = -ENODEV; int i; - /* - * Convert affinity to a 32bit value that can be matched to - * GICR_TYPER bits [63:32]. - */ - aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 | - MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 | - MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | - MPIDR_AFFINITY_LEVEL(mpidr, 0)); - for (i = 0; i < gic_data.nr_redist_regions; i++) { void __iomem *ptr = gic_data.redist_regions[i].redist_base; + u64 typer; u32 reg; reg = readl_relaxed(ptr + GICR_PIDR2) & GIC_PIDR2_ARCH_MASK; @@ -450,15 +440,9 @@ static int gic_populate_rdist(void) do { typer = gic_read_typer(ptr + GICR_TYPER); - if ((typer >> 32) == aff) { - u64 offset = ptr - gic_data.redist_regions[i].redist_base; - gic_data_rdist_rd_base() = ptr; - gic_data_rdist()->phys_base = gic_data.redist_regions[i].phys_base + offset; - pr_info("CPU%d: found redistributor %lx region %d:%pa\n", - smp_processor_id(), mpidr, i, - &gic_data_rdist()->phys_base); + ret = fn(gic_data.redist_regions + i, ptr); + if (!ret) return 0; - } if (gic_data.redist_regions[i].single_redist) break; @@ -473,9 +457,50 @@ static int gic_populate_rdist(void) } while (!(typer & GICR_TYPER_LAST)); } + return ret ? -ENODEV : 0; +} + +static int __gic_populate_rdist(struct redist_region *region, void __iomem *ptr) +{ + unsigned long mpidr = cpu_logical_map(smp_processor_id()); + u64 typer; + u32 aff; + + /* + * Convert affinity to a 32bit value that can be matched to + * GICR_TYPER bits [63:32]. + */ + aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 | + MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 | + MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | + MPIDR_AFFINITY_LEVEL(mpidr, 0)); + + typer = gic_read_typer(ptr + GICR_TYPER); + if ((typer >> 32) == aff) { + u64 offset = ptr - region->redist_base; + gic_data_rdist_rd_base() = ptr; + gic_data_rdist()->phys_base = region->phys_base + offset; + + pr_info("CPU%d: found redistributor %lx region %d:%pa\n", + smp_processor_id(), mpidr, + (int)(region - gic_data.redist_regions), + &gic_data_rdist()->phys_base); + return 0; + } + + /* Try next one */ + return 1; +} + +static int gic_populate_rdist(void) +{ + if (gic_iterate_rdists(__gic_populate_rdist) == 0) + return 0; + /* We couldn't even deal with ourselves... */ WARN(true, "CPU%d: mpidr %lx has no re-distributor!\n", - smp_processor_id(), mpidr); + smp_processor_id(), + (unsigned long)cpu_logical_map(smp_processor_id())); return -ENODEV; } From 0edc23ea2692fe75e941ec00867e661eb15f67fa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 17:01:52 +0000 Subject: [PATCH 07/53] irqchip/gic-v3: Add VLPI/DirectLPI discovery Add helper functions that probe for VLPI and DirectLPI properties. Reviewed-by: Eric Auger Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3.c | 24 +++++++++++++++++++++++- include/linux/irqchip/arm-gic-v3.h | 3 +++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index fba5f668be59..65fabd5f2ec6 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved. + * Copyright (C) 2013-2017 ARM Limited, All Rights Reserved. * Author: Marc Zyngier * * This program is free software; you can redistribute it and/or modify @@ -504,6 +504,24 @@ static int gic_populate_rdist(void) return -ENODEV; } +static int __gic_update_vlpi_properties(struct redist_region *region, + void __iomem *ptr) +{ + u64 typer = gic_read_typer(ptr + GICR_TYPER); + gic_data.rdists.has_vlpis &= !!(typer & GICR_TYPER_VLPIS); + gic_data.rdists.has_direct_lpi &= !!(typer & GICR_TYPER_DirectLPIS); + + return 1; +} + +static void gic_update_vlpi_properties(void) +{ + gic_iterate_rdists(__gic_update_vlpi_properties); + pr_info("%sVLPI support, %sdirect LPI support\n", + !gic_data.rdists.has_vlpis ? "no " : "", + !gic_data.rdists.has_direct_lpi ? "no " : ""); +} + static void gic_cpu_sys_reg_init(void) { /* @@ -968,6 +986,8 @@ static int __init gic_init_bases(void __iomem *dist_base, gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops, &gic_data); gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist)); + gic_data.rdists.has_vlpis = true; + gic_data.rdists.has_direct_lpi = true; if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) { err = -ENOMEM; @@ -976,6 +996,8 @@ static int __init gic_init_bases(void __iomem *dist_base, set_handle_irq(gic_handle_irq); + gic_update_vlpi_properties(); + if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis()) its_init(handle, &gic_data.rdists, gic_data.domain); diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 6a1f87ff94e2..20a553423ac7 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -204,6 +204,7 @@ #define GICR_TYPER_PLPIS (1U << 0) #define GICR_TYPER_VLPIS (1U << 1) +#define GICR_TYPER_DirectLPIS (1U << 3) #define GICR_TYPER_LAST (1U << 4) #define GIC_V3_REDIST_SIZE 0x20000 @@ -487,6 +488,8 @@ struct rdists { struct page *prop_page; int id_bits; u64 flags; + bool has_vlpis; + bool has_direct_lpi; }; struct irq_domain; From a13b040408bbe6d9fe42c405bbdc48a074a3d299 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 17:15:24 +0000 Subject: [PATCH 08/53] irqchip/gic-v3-its: Move LPI definitions around The various LPI definitions are in the middle of the code, and would be better placed at the beginning, given that we're going to use some of them much earlier. Reviewed-by: Thomas Gleixner Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index b93bd5a91e92..2232250f1442 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -48,6 +48,19 @@ #define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0) +static u32 lpi_id_bits; + +/* + * We allocate memory for PROPBASE to cover 2 ^ lpi_id_bits LPIs to + * deal with (one configuration byte per interrupt). PENDBASE has to + * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI). + */ +#define LPI_NRBITS lpi_id_bits +#define LPI_PROPBASE_SZ ALIGN(BIT(LPI_NRBITS), SZ_64K) +#define LPI_PENDBASE_SZ ALIGN(BIT(LPI_NRBITS) / 8, SZ_64K) + +#define LPI_PROP_DEFAULT_PRIO 0xa0 + /* * Collection structure - just an ID, and a redistributor address to * ping. We use one per CPU as a bag of interrupts assigned to this @@ -701,7 +714,6 @@ static struct irq_chip its_irq_chip = { static unsigned long *lpi_bitmap; static u32 lpi_chunks; -static u32 lpi_id_bits; static DEFINE_SPINLOCK(lpi_lock); static int its_lpi_to_chunk(int lpi) @@ -796,17 +808,6 @@ static void its_lpi_free(struct event_lpi_map *map) kfree(map->col_map); } -/* - * We allocate memory for PROPBASE to cover 2 ^ lpi_id_bits LPIs to - * deal with (one configuration byte per interrupt). PENDBASE has to - * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI). - */ -#define LPI_NRBITS lpi_id_bits -#define LPI_PROPBASE_SZ ALIGN(BIT(LPI_NRBITS), SZ_64K) -#define LPI_PENDBASE_SZ ALIGN(BIT(LPI_NRBITS) / 8, SZ_64K) - -#define LPI_PROP_DEFAULT_PRIO 0xa0 - static int __init its_alloc_lpi_tables(void) { phys_addr_t paddr; From 3dfa576bfb453482314b596931a59a4951428058 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 17:25:54 +0000 Subject: [PATCH 09/53] irqchip/gic-v3-its: Add probing for VLPI properties Add the probing code for the ITS VLPI support. This includes configuring the ITS number if not supporting the single VMOVP command feature. Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 71 ++++++++++++++++++++++++++++-- include/linux/irqchip/arm-gic-v3.h | 5 +++ 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 2232250f1442..89da961e949e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -101,6 +101,7 @@ struct its_node { u32 ite_size; u32 device_ids; int numa_node; + bool is_v4; }; #define ITS_ITT_ALIGN SZ_256 @@ -133,6 +134,14 @@ static DEFINE_SPINLOCK(its_lock); static struct rdists *gic_rdists; static struct irq_domain *its_parent; +/* + * We have a maximum number of 16 ITSs in the whole system if we're + * using the ITSList mechanism + */ +#define ITS_LIST_MAX 16 + +static unsigned long its_list_map; + #define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist)) #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) @@ -1679,13 +1688,51 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) return 0; } +static int __init its_compute_its_list_map(struct resource *res, + void __iomem *its_base) +{ + int its_number; + u32 ctlr; + + /* + * This is assumed to be done early enough that we're + * guaranteed to be single-threaded, hence no + * locking. Should this change, we should address + * this. + */ + its_number = find_first_zero_bit(&its_list_map, ITS_LIST_MAX); + if (its_number >= ITS_LIST_MAX) { + pr_err("ITS@%pa: No ITSList entry available!\n", + &res->start); + return -EINVAL; + } + + ctlr = readl_relaxed(its_base + GITS_CTLR); + ctlr &= ~GITS_CTLR_ITS_NUMBER; + ctlr |= its_number << GITS_CTLR_ITS_NUMBER_SHIFT; + writel_relaxed(ctlr, its_base + GITS_CTLR); + ctlr = readl_relaxed(its_base + GITS_CTLR); + if ((ctlr & GITS_CTLR_ITS_NUMBER) != (its_number << GITS_CTLR_ITS_NUMBER_SHIFT)) { + its_number = ctlr & GITS_CTLR_ITS_NUMBER; + its_number >>= GITS_CTLR_ITS_NUMBER_SHIFT; + } + + if (test_and_set_bit(its_number, &its_list_map)) { + pr_err("ITS@%pa: Duplicate ITSList entry %d\n", + &res->start, its_number); + return -EINVAL; + } + + return its_number; +} + static int __init its_probe_one(struct resource *res, struct fwnode_handle *handle, int numa_node) { struct its_node *its; void __iomem *its_base; - u32 val; - u64 baser, tmp; + u32 val, ctlr; + u64 baser, tmp, typer; int err; its_base = ioremap(res->start, resource_size(res)); @@ -1718,9 +1765,24 @@ static int __init its_probe_one(struct resource *res, raw_spin_lock_init(&its->lock); INIT_LIST_HEAD(&its->entry); INIT_LIST_HEAD(&its->its_device_list); + typer = gic_read_typer(its_base + GITS_TYPER); its->base = its_base; its->phys_base = res->start; - its->ite_size = ((gic_read_typer(its_base + GITS_TYPER) >> 4) & 0xf) + 1; + its->ite_size = GITS_TYPER_ITT_ENTRY_SIZE(typer); + its->is_v4 = !!(typer & GITS_TYPER_VLPIS); + if (its->is_v4) { + if (!(typer & GITS_TYPER_VMOVP)) { + err = its_compute_its_list_map(res, its_base); + if (err < 0) + goto out_free_its; + + pr_info("ITS@%pa: Using ITS number %d\n", + &res->start, err); + } else { + pr_info("ITS@%pa: Single VMOVP capable\n", &res->start); + } + } + its->numa_node = numa_node; its->cmd_base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, @@ -1767,7 +1829,8 @@ static int __init its_probe_one(struct resource *res, } gits_write_cwriter(0, its->base + GITS_CWRITER); - writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR); + ctlr = readl_relaxed(its->base + GITS_CTLR); + writel_relaxed(ctlr | GITS_CTLR_ENABLE, its->base + GITS_CTLR); err = its_init_domain(handle, its); if (err) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 20a553423ac7..af8c55105fc2 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -235,15 +235,20 @@ #define GITS_TRANSLATER 0x10040 #define GITS_CTLR_ENABLE (1U << 0) +#define GITS_CTLR_ITS_NUMBER_SHIFT 4 +#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT) #define GITS_CTLR_QUIESCENT (1U << 31) #define GITS_TYPER_PLPIS (1UL << 0) +#define GITS_TYPER_VLPIS (1UL << 1) #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 +#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0x1f) + 1) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) #define GITS_TYPER_PTA (1UL << 19) #define GITS_TYPER_HWCOLLCNT_SHIFT 24 +#define GITS_TYPER_VMOVP (1ULL << 37) #define GITS_IIDR_REV_SHIFT 12 #define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT) From e4f9094b54882fa404cfd33609fc5c62b0964082 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 17:56:32 +0000 Subject: [PATCH 10/53] irqchip/gic-v3-its: Macro-ize its_send_single_command Most ITS commands do operate on a collection object, and require a SYNC command to be performed on that collection in order to guarantee the execution of the first command. With GICv4 ITS, another set of commands perform similar operations on a VPE object, and a VSYNC operations must be executed to guarantee their execution. Given the similarities (post a command, perform a synchronization operation on a sync object), it makes sense to reuse the same mechanism for both class of commands. Let's start with turning its_send_single_command into a huge macro that performs the bulk of the work, and a set of helpers that make this macro usable for the GICv3 ITS commands. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 82 ++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 89da961e949e..7ad0fc8d2e23 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -494,44 +494,54 @@ static void its_wait_for_range_completion(struct its_node *its, } } -static void its_send_single_command(struct its_node *its, - its_cmd_builder_t builder, - struct its_cmd_desc *desc) -{ - struct its_cmd_block *cmd, *sync_cmd, *next_cmd; - struct its_collection *sync_col; - unsigned long flags; - - raw_spin_lock_irqsave(&its->lock, flags); - - cmd = its_allocate_entry(its); - if (!cmd) { /* We're soooooo screewed... */ - pr_err_ratelimited("ITS can't allocate, dropping command\n"); - raw_spin_unlock_irqrestore(&its->lock, flags); - return; - } - sync_col = builder(cmd, desc); - its_flush_cmd(its, cmd); - - if (sync_col) { - sync_cmd = its_allocate_entry(its); - if (!sync_cmd) { - pr_err_ratelimited("ITS can't SYNC, skipping\n"); - goto post; - } - its_encode_cmd(sync_cmd, GITS_CMD_SYNC); - its_encode_target(sync_cmd, sync_col->target_address); - its_fixup_cmd(sync_cmd); - its_flush_cmd(its, sync_cmd); - } - -post: - next_cmd = its_post_commands(its); - raw_spin_unlock_irqrestore(&its->lock, flags); - - its_wait_for_range_completion(its, cmd, next_cmd); +/* Warning, macro hell follows */ +#define BUILD_SINGLE_CMD_FUNC(name, buildtype, synctype, buildfn) \ +void name(struct its_node *its, \ + buildtype builder, \ + struct its_cmd_desc *desc) \ +{ \ + struct its_cmd_block *cmd, *sync_cmd, *next_cmd; \ + synctype *sync_obj; \ + unsigned long flags; \ + \ + raw_spin_lock_irqsave(&its->lock, flags); \ + \ + cmd = its_allocate_entry(its); \ + if (!cmd) { /* We're soooooo screewed... */ \ + raw_spin_unlock_irqrestore(&its->lock, flags); \ + return; \ + } \ + sync_obj = builder(cmd, desc); \ + its_flush_cmd(its, cmd); \ + \ + if (sync_obj) { \ + sync_cmd = its_allocate_entry(its); \ + if (!sync_cmd) \ + goto post; \ + \ + buildfn(sync_cmd, sync_obj); \ + its_flush_cmd(its, sync_cmd); \ + } \ + \ +post: \ + next_cmd = its_post_commands(its); \ + raw_spin_unlock_irqrestore(&its->lock, flags); \ + \ + its_wait_for_range_completion(its, cmd, next_cmd); \ } +static void its_build_sync_cmd(struct its_cmd_block *sync_cmd, + struct its_collection *sync_col) +{ + its_encode_cmd(sync_cmd, GITS_CMD_SYNC); + its_encode_target(sync_cmd, sync_col->target_address); + + its_fixup_cmd(sync_cmd); +} + +static BUILD_SINGLE_CMD_FUNC(its_send_single_command, its_cmd_builder_t, + struct its_collection, its_build_sync_cmd) + static void its_send_inv(struct its_device *dev, u32 event_id) { struct its_cmd_desc desc; From 8d85dcedc4663f7cee840fbdd27f0204e3989ea2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 18:02:13 +0000 Subject: [PATCH 11/53] irqchip/gic-v3-its: Implement irq_set_irqchip_state for pending state Allow the pending state of an LPI to be set or cleared via irq_set_irqchip_state. Reviewed-by: Thomas Gleixner Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 78 ++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 7ad0fc8d2e23..6a33cdb63636 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -164,6 +164,11 @@ struct its_cmd_desc { u32 event_id; } its_inv_cmd; + struct { + struct its_device *dev; + u32 event_id; + } its_clear_cmd; + struct { struct its_device *dev; u32 event_id; @@ -380,6 +385,40 @@ static struct its_collection *its_build_inv_cmd(struct its_cmd_block *cmd, return col; } +static struct its_collection *its_build_int_cmd(struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + struct its_collection *col; + + col = dev_event_to_col(desc->its_int_cmd.dev, + desc->its_int_cmd.event_id); + + its_encode_cmd(cmd, GITS_CMD_INT); + its_encode_devid(cmd, desc->its_int_cmd.dev->device_id); + its_encode_event_id(cmd, desc->its_int_cmd.event_id); + + its_fixup_cmd(cmd); + + return col; +} + +static struct its_collection *its_build_clear_cmd(struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + struct its_collection *col; + + col = dev_event_to_col(desc->its_clear_cmd.dev, + desc->its_clear_cmd.event_id); + + its_encode_cmd(cmd, GITS_CMD_CLEAR); + its_encode_devid(cmd, desc->its_clear_cmd.dev->device_id); + its_encode_event_id(cmd, desc->its_clear_cmd.event_id); + + its_fixup_cmd(cmd); + + return col; +} + static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { @@ -542,6 +581,26 @@ static void its_build_sync_cmd(struct its_cmd_block *sync_cmd, static BUILD_SINGLE_CMD_FUNC(its_send_single_command, its_cmd_builder_t, struct its_collection, its_build_sync_cmd) +static void its_send_int(struct its_device *dev, u32 event_id) +{ + struct its_cmd_desc desc; + + desc.its_int_cmd.dev = dev; + desc.its_int_cmd.event_id = event_id; + + its_send_single_command(dev->its, its_build_int_cmd, &desc); +} + +static void its_send_clear(struct its_device *dev, u32 event_id) +{ + struct its_cmd_desc desc; + + desc.its_clear_cmd.dev = dev; + desc.its_clear_cmd.event_id = event_id; + + its_send_single_command(dev->its, its_build_clear_cmd, &desc); +} + static void its_send_inv(struct its_device *dev, u32 event_id) { struct its_cmd_desc desc; @@ -708,6 +767,24 @@ static void its_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg) iommu_dma_map_msi_msg(d->irq, msg); } +static int its_irq_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, + bool state) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); + + if (which != IRQCHIP_STATE_PENDING) + return -EINVAL; + + if (state) + its_send_int(its_dev, event); + else + its_send_clear(its_dev, event); + + return 0; +} + static struct irq_chip its_irq_chip = { .name = "ITS", .irq_mask = its_mask_irq, @@ -715,6 +792,7 @@ static struct irq_chip its_irq_chip = { .irq_eoi = irq_chip_eoi_parent, .irq_set_affinity = its_set_affinity, .irq_compose_msi_msg = its_irq_compose_msi_msg, + .irq_set_irqchip_state = its_irq_set_irqchip_state, }; /* From 0e5ccf91e02d3b47ab6a3f5588679f1032370099 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 18:15:05 +0000 Subject: [PATCH 12/53] irqchip/gic-v3-its: Split out property table allocation Move the LPI property table allocation into its own function, as this is going to be required for those associated with VMs in the future. Reviewed-by: Eric Auger Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 6a33cdb63636..9d8d39320f62 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -905,13 +905,32 @@ static void its_lpi_free(struct event_lpi_map *map) kfree(map->col_map); } +static struct page *its_allocate_prop_table(gfp_t gfp_flags) +{ + struct page *prop_page; + + prop_page = alloc_pages(gfp_flags, get_order(LPI_PROPBASE_SZ)); + if (!prop_page) + return NULL; + + /* Priority 0xa0, Group-1, disabled */ + memset(page_address(prop_page), + LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1, + LPI_PROPBASE_SZ); + + /* Make sure the GIC will observe the written configuration */ + gic_flush_dcache_to_poc(page_address(prop_page), LPI_PROPBASE_SZ); + + return prop_page; +} + + static int __init its_alloc_lpi_tables(void) { phys_addr_t paddr; lpi_id_bits = min_t(u32, gic_rdists->id_bits, ITS_MAX_LPI_NRBITS); - gic_rdists->prop_page = alloc_pages(GFP_NOWAIT, - get_order(LPI_PROPBASE_SZ)); + gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT); if (!gic_rdists->prop_page) { pr_err("Failed to allocate PROPBASE\n"); return -ENOMEM; @@ -920,14 +939,6 @@ static int __init its_alloc_lpi_tables(void) paddr = page_to_phys(gic_rdists->prop_page); pr_info("GIC: using LPI property table @%pa\n", &paddr); - /* Priority 0xa0, Group-1, disabled */ - memset(page_address(gic_rdists->prop_page), - LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1, - LPI_PROPBASE_SZ); - - /* Make sure the GIC will observe the written configuration */ - gic_flush_dcache_to_poc(page_address(gic_rdists->prop_page), LPI_PROPBASE_SZ); - return its_lpi_init(lpi_id_bits); } From 4cacac5744d57f321ae17cb15456ec97d775860e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 18:18:34 +0000 Subject: [PATCH 13/53] irqchip/gic-v3-its: Allow use of indirect VCPU tables The VCPU tables can be quite sparse as well, and it makes sense to use indirect tables as well if possible. Reviewed-by: Thomas Gleixner Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 9d8d39320f62..6ef6b0d5fe72 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1077,10 +1077,13 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, return 0; } -static bool its_parse_baser_device(struct its_node *its, struct its_baser *baser, - u32 psz, u32 *order) +static bool its_parse_indirect_baser(struct its_node *its, + struct its_baser *baser, + u32 psz, u32 *order) { - u64 esz = GITS_BASER_ENTRY_SIZE(its_read_baser(its, baser)); + u64 tmp = its_read_baser(its, baser); + u64 type = GITS_BASER_TYPE(tmp); + u64 esz = GITS_BASER_ENTRY_SIZE(tmp); u64 val = GITS_BASER_InnerShareable | GITS_BASER_RaWaWb; u32 ids = its->device_ids; u32 new_order = *order; @@ -1119,8 +1122,9 @@ static bool its_parse_baser_device(struct its_node *its, struct its_baser *baser if (new_order >= MAX_ORDER) { new_order = MAX_ORDER - 1; ids = ilog2(PAGE_ORDER_TO_SIZE(new_order) / (int)esz); - pr_warn("ITS@%pa: Device Table too large, reduce ids %u->%u\n", - &its->phys_base, its->device_ids, ids); + pr_warn("ITS@%pa: %s Table too large, reduce ids %u->%u\n", + &its->phys_base, its_base_type_string[type], + its->device_ids, ids); } *order = new_order; @@ -1168,11 +1172,16 @@ static int its_alloc_tables(struct its_node *its) u32 order = get_order(psz); bool indirect = false; - if (type == GITS_BASER_TYPE_NONE) + switch (type) { + case GITS_BASER_TYPE_NONE: continue; - if (type == GITS_BASER_TYPE_DEVICE) - indirect = its_parse_baser_device(its, baser, psz, &order); + case GITS_BASER_TYPE_DEVICE: + case GITS_BASER_TYPE_VCPU: + indirect = its_parse_indirect_baser(its, baser, + psz, &order); + break; + } err = its_setup_baser(its, baser, cache, shr, psz, order, indirect); if (err < 0) { From 7c297a2d5c08cec6444175594a09ccc2035dcc68 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 18:34:38 +0000 Subject: [PATCH 14/53] irqchip/gic-v3-its: Split out pending table allocation Just as for the property table, let's move the pending table allocation to a separate function. Reviewed-by: Thomas Gleixner Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 6ef6b0d5fe72..022dc3e94dbd 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1208,6 +1208,24 @@ static int its_alloc_collections(struct its_node *its) return 0; } +static struct page *its_allocate_pending_table(gfp_t gfp_flags) +{ + struct page *pend_page; + /* + * The pending pages have to be at least 64kB aligned, + * hence the 'max(LPI_PENDBASE_SZ, SZ_64K)' below. + */ + pend_page = alloc_pages(gfp_flags | __GFP_ZERO, + get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K))); + if (!pend_page) + return NULL; + + /* Make sure the GIC will observe the zero-ed page */ + gic_flush_dcache_to_poc(page_address(pend_page), LPI_PENDBASE_SZ); + + return pend_page; +} + static void its_cpu_init_lpis(void) { void __iomem *rbase = gic_data_rdist_rd_base(); @@ -1218,21 +1236,14 @@ static void its_cpu_init_lpis(void) pend_page = gic_data_rdist()->pend_page; if (!pend_page) { phys_addr_t paddr; - /* - * The pending pages have to be at least 64kB aligned, - * hence the 'max(LPI_PENDBASE_SZ, SZ_64K)' below. - */ - pend_page = alloc_pages(GFP_NOWAIT | __GFP_ZERO, - get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K))); + + pend_page = its_allocate_pending_table(GFP_NOWAIT); if (!pend_page) { pr_err("Failed to allocate PENDBASE for CPU%d\n", smp_processor_id()); return; } - /* Make sure the GIC will observe the zero-ed page */ - gic_flush_dcache_to_poc(page_address(pend_page), LPI_PENDBASE_SZ); - paddr = page_to_phys(pend_page); pr_info("CPU%d: using LPI pending table @%pa\n", smp_processor_id(), &paddr); From cf2be8ba6f264b75eb30ebfe74899e3462669a75 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 18:49:59 +0000 Subject: [PATCH 15/53] irqchip/gic-v3-its: Rework LPI freeing Rework LPI deallocation so that it can be reused by the v4 support code. Reviewed-by: Eric Auger Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 022dc3e94dbd..71d71f757a17 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -881,16 +881,15 @@ static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids) return bitmap; } -static void its_lpi_free(struct event_lpi_map *map) +static void its_lpi_free_chunks(unsigned long *bitmap, int base, int nr_ids) { - int base = map->lpi_base; - int nr_ids = map->nr_lpis; int lpi; spin_lock(&lpi_lock); for (lpi = base; lpi < (base + nr_ids); lpi += IRQS_PER_CHUNK) { int chunk = its_lpi_to_chunk(lpi); + BUG_ON(chunk > lpi_chunks); if (test_bit(chunk, lpi_bitmap)) { clear_bit(chunk, lpi_bitmap); @@ -901,8 +900,7 @@ static void its_lpi_free(struct event_lpi_map *map) spin_unlock(&lpi_lock); - kfree(map->lpi_map); - kfree(map->col_map); + kfree(bitmap); } static struct page *its_allocate_prop_table(gfp_t gfp_flags) @@ -1674,7 +1672,10 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, /* If all interrupts have been freed, start mopping the floor */ if (bitmap_empty(its_dev->event_map.lpi_map, its_dev->event_map.nr_lpis)) { - its_lpi_free(&its_dev->event_map); + its_lpi_free_chunks(its_dev->event_map.lpi_map, + its_dev->event_map.lpi_base, + its_dev->event_map.nr_lpis); + kfree(its_dev->event_map.col_map); /* Unmap device/itt */ its_send_mapd(its_dev, 0); From 70cc81edc08d2ae42414d609dac87e7db8da456d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 18:53:02 +0000 Subject: [PATCH 16/53] irqchip/gic-v3-its: Generalize device table allocation As we want to use 2-level tables for VCPUs, let's hack the device table allocator in order to make it slightly more generic. It will get reused in subsequent patches. Reviewed-by: Thomas Gleixner Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 71d71f757a17..15a007f98253 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1392,26 +1392,19 @@ static struct its_baser *its_get_baser(struct its_node *its, u32 type) return NULL; } -static bool its_alloc_device_table(struct its_node *its, u32 dev_id) +static bool its_alloc_table_entry(struct its_baser *baser, u32 id) { - struct its_baser *baser; struct page *page; u32 esz, idx; __le64 *table; - baser = its_get_baser(its, GITS_BASER_TYPE_DEVICE); - - /* Don't allow device id that exceeds ITS hardware limit */ - if (!baser) - return (ilog2(dev_id) < its->device_ids); - /* Don't allow device id that exceeds single, flat table limit */ esz = GITS_BASER_ENTRY_SIZE(baser->val); if (!(baser->val & GITS_BASER_INDIRECT)) - return (dev_id < (PAGE_ORDER_TO_SIZE(baser->order) / esz)); + return (id < (PAGE_ORDER_TO_SIZE(baser->order) / esz)); /* Compute 1st level table index & check if that exceeds table limit */ - idx = dev_id >> ilog2(baser->psz / esz); + idx = id >> ilog2(baser->psz / esz); if (idx >= (PAGE_ORDER_TO_SIZE(baser->order) / GITS_LVL1_ENTRY_SIZE)) return false; @@ -1440,6 +1433,19 @@ static bool its_alloc_device_table(struct its_node *its, u32 dev_id) return true; } +static bool its_alloc_device_table(struct its_node *its, u32 dev_id) +{ + struct its_baser *baser; + + baser = its_get_baser(its, GITS_BASER_TYPE_DEVICE); + + /* Don't allow device id that exceeds ITS hardware limit */ + if (!baser) + return (ilog2(dev_id) < its->device_ids); + + return its_alloc_table_entry(baser, dev_id); +} + static struct its_device *its_create_device(struct its_node *its, u32 dev_id, int nvecs) { From adcdb94e3180f6b9ffa16c3def6095b5cf41e06d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 19:18:13 +0000 Subject: [PATCH 17/53] irqchip/gic-v3-its: Generalize LPI configuration We're are going to need to change a bit more than just the enable bit in the LPI property table in the future. So let's change the LPI configuration funtion to take a set of bits to be cleared, and a set of bits to be set. This way, we'll be able to use it when a guest updates an LPI property (priority, for example). Reviewed-by: Eric Auger Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 15a007f98253..19ce99b78042 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -684,17 +684,18 @@ static inline u32 its_get_event_id(struct irq_data *d) return d->hwirq - its_dev->event_map.lpi_base; } -static void lpi_set_config(struct irq_data *d, bool enable) +static void lpi_update_config(struct irq_data *d, u8 clr, u8 set) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); irq_hw_number_t hwirq = d->hwirq; - u32 id = its_get_event_id(d); - u8 *cfg = page_address(gic_rdists->prop_page) + hwirq - 8192; + struct page *prop_page; + u8 *cfg; - if (enable) - *cfg |= LPI_PROP_ENABLED; - else - *cfg &= ~LPI_PROP_ENABLED; + prop_page = gic_rdists->prop_page; + + cfg = page_address(prop_page) + hwirq - 8192; + *cfg &= ~clr; + *cfg |= set; /* * Make the above write visible to the redistributors. @@ -705,17 +706,17 @@ static void lpi_set_config(struct irq_data *d, bool enable) gic_flush_dcache_to_poc(cfg, sizeof(*cfg)); else dsb(ishst); - its_send_inv(its_dev, id); + its_send_inv(its_dev, its_get_event_id(d)); } static void its_mask_irq(struct irq_data *d) { - lpi_set_config(d, false); + lpi_update_config(d, LPI_PROP_ENABLED, 0); } static void its_unmask_irq(struct irq_data *d) { - lpi_set_config(d, true); + lpi_update_config(d, 0, LPI_PROP_ENABLED); } static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val, From de29faa0d8ac925534749bc56d539bf936ce122b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 19:25:00 +0000 Subject: [PATCH 18/53] irqchip/gic-v4: Add management structure definitions Add a bunch of GICv4-specific data structures that will get used in subsequent patches. Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v4.h | 92 ++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 include/linux/irqchip/arm-gic-v4.h diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h new file mode 100644 index 000000000000..d499538dd86f --- /dev/null +++ b/include/linux/irqchip/arm-gic-v4.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2016,2017 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __LINUX_IRQCHIP_ARM_GIC_V4_H +#define __LINUX_IRQCHIP_ARM_GIC_V4_H + +struct its_vpe; + +/* Embedded in kvm.arch */ +struct its_vm { + struct fwnode_handle *fwnode; + struct irq_domain *domain; + struct page *vprop_page; + struct its_vpe **vpes; + int nr_vpes; + irq_hw_number_t db_lpi_base; + unsigned long *db_bitmap; + int nr_db_lpis; +}; + +/* Embedded in kvm_vcpu.arch */ +struct its_vpe { + struct page *vpt_page; + struct its_vm *its_vm; + /* Doorbell interrupt */ + int irq; + irq_hw_number_t vpe_db_lpi; + /* + * This collection ID is used to indirect the target + * redistributor for this VPE. The ID itself isn't involved in + * programming of the ITS. + */ + u16 col_idx; + /* Unique (system-wide) VPE identifier */ + u16 vpe_id; + /* Implementation Defined Area Invalid */ + bool idai; + /* Pending VLPIs on schedule out? */ + bool pending_last; +}; + +/* + * struct its_vlpi_map: structure describing the mapping of a + * VLPI. Only to be interpreted in the context of a physical interrupt + * it complements. To be used as the vcpu_info passed to + * irq_set_vcpu_affinity(). + * + * @vm: Pointer to the GICv4 notion of a VM + * @vpe: Pointer to the GICv4 notion of a virtual CPU (VPE) + * @vintid: Virtual LPI number + * @db_enabled: Is the VPE doorbell to be generated? + */ +struct its_vlpi_map { + struct its_vm *vm; + struct its_vpe *vpe; + u32 vintid; + bool db_enabled; +}; + +enum its_vcpu_info_cmd_type { + MAP_VLPI, + GET_VLPI, + PROP_UPDATE_VLPI, + PROP_UPDATE_AND_INV_VLPI, + SCHEDULE_VPE, + DESCHEDULE_VPE, + INVALL_VPE, +}; + +struct its_cmd_info { + enum its_vcpu_info_cmd_type cmd_type; + union { + struct its_vlpi_map *map; + u8 config; + }; +}; + +#endif From d7276b80e752acaaf30f04e62d0e986b5bced2df Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:11:47 +0000 Subject: [PATCH 19/53] irqchip/gic-v3-its: Add GICv4 ITS command definitions Add the new GICv4 ITS command definitions, most of them, being defined in terms of their physical counterparts. Reviewed-by: Eric Auger Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 2 +- include/linux/irqchip/arm-gic-v3.h | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 19ce99b78042..dd92a8a4572a 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved. + * Copyright (C) 2013-2017 ARM Limited, All Rights Reserved. * Author: Marc Zyngier * * This program is free software; you can redistribute it and/or modify diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index af8c55105fc2..7c6fd8f3e36c 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -347,6 +347,18 @@ #define GITS_CMD_CLEAR 0x04 #define GITS_CMD_SYNC 0x05 +/* + * GICv4 ITS specific commands + */ +#define GITS_CMD_GICv4(x) ((x) | 0x20) +#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL) +#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC) +#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI) +#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI) +#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC) +/* VMOVP is the odd one, as it doesn't have a physical counterpart */ +#define GITS_CMD_VMOVP GITS_CMD_GICv4(2) + /* * ITS error numbers */ From c808eea8f763a996409656df83c41b8ec628060b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 09:31:20 +0000 Subject: [PATCH 20/53] irqchip/gic-v3-its: Add VLPI configuration hook Add the skeleton irq_set_vcpu_affinity method that will be used to configure VLPIs. Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index dd92a8a4572a..ead06d443c3b 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -36,6 +36,7 @@ #include #include +#include #include #include @@ -786,6 +787,28 @@ static int its_irq_set_irqchip_state(struct irq_data *d, return 0; } +static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + struct its_cmd_info *info = vcpu_info; + + /* Need a v4 ITS */ + if (!its_dev->its->is_v4 || !info) + return -EINVAL; + + switch (info->cmd_type) { + case MAP_VLPI: + + case GET_VLPI: + + case PROP_UPDATE_VLPI: + case PROP_UPDATE_AND_INV_VLPI: + + default: + return -EINVAL; + } +} + static struct irq_chip its_irq_chip = { .name = "ITS", .irq_mask = its_mask_irq, @@ -794,6 +817,7 @@ static struct irq_chip its_irq_chip = { .irq_set_affinity = its_set_affinity, .irq_compose_msi_msg = its_irq_compose_msi_msg, .irq_set_irqchip_state = its_irq_set_irqchip_state, + .irq_set_vcpu_affinity = its_irq_set_vcpu_affinity, }; /* From d011e4e654d7f519bf892995b9f4b453dbf8a3e5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 09:44:41 +0000 Subject: [PATCH 21/53] irqchip/gic-v3-its: Add VLPI map/unmap operations In order to let a VLPI being injected into a guest, the VLPI must be mapped using the VMAPTI command. When moved to a different vcpu, it must be moved with the VMOVI command. These commands are issued via the irq_set_vcpu_affinity method, making sure we unmap the corresponding host LPI first. The reverse is also done when the VLPI is unmapped from the guest. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 250 ++++++++++++++++++++++++++++++- 1 file changed, 247 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index ead06d443c3b..5187c4b116d4 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -115,11 +115,17 @@ struct event_lpi_map { u16 *col_map; irq_hw_number_t lpi_base; int nr_lpis; + struct mutex vlpi_lock; + struct its_vm *vm; + struct its_vlpi_map *vlpi_maps; + int nr_vlpis; }; /* - * The ITS view of a device - belongs to an ITS, a collection, owns an - * interrupt translation table, and a list of interrupts. + * The ITS view of a device - belongs to an ITS, owns an interrupt + * translation table, and a list of interrupts. If it some of its + * LPIs are injected into a guest (GICv4), the event_map.vm field + * indicates which one. */ struct its_device { struct list_head entry; @@ -205,6 +211,21 @@ struct its_cmd_desc { struct { struct its_collection *col; } its_invall_cmd; + + struct { + struct its_vpe *vpe; + struct its_device *dev; + u32 virt_id; + u32 event_id; + bool db_enabled; + } its_vmapti_cmd; + + struct { + struct its_vpe *vpe; + struct its_device *dev; + u32 event_id; + bool db_enabled; + } its_vmovi_cmd; }; }; @@ -221,6 +242,9 @@ struct its_cmd_block { typedef struct its_collection *(*its_cmd_builder_t)(struct its_cmd_block *, struct its_cmd_desc *); +typedef struct its_vpe *(*its_cmd_vbuilder_t)(struct its_cmd_block *, + struct its_cmd_desc *); + static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l) { u64 mask = GENMASK_ULL(h, l); @@ -273,6 +297,26 @@ static void its_encode_collection(struct its_cmd_block *cmd, u16 col) its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); } +static void its_encode_vpeid(struct its_cmd_block *cmd, u16 vpeid) +{ + its_mask_encode(&cmd->raw_cmd[1], vpeid, 47, 32); +} + +static void its_encode_virt_id(struct its_cmd_block *cmd, u32 virt_id) +{ + its_mask_encode(&cmd->raw_cmd[2], virt_id, 31, 0); +} + +static void its_encode_db_phys_id(struct its_cmd_block *cmd, u32 db_phys_id) +{ + its_mask_encode(&cmd->raw_cmd[2], db_phys_id, 63, 32); +} + +static void its_encode_db_valid(struct its_cmd_block *cmd, bool db_valid) +{ + its_mask_encode(&cmd->raw_cmd[2], db_valid, 0, 0); +} + static inline void its_fixup_cmd(struct its_cmd_block *cmd) { /* Let's fixup BE commands */ @@ -431,6 +475,50 @@ static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd, return NULL; } +static struct its_vpe *its_build_vmapti_cmd(struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + u32 db; + + if (desc->its_vmapti_cmd.db_enabled) + db = desc->its_vmapti_cmd.vpe->vpe_db_lpi; + else + db = 1023; + + its_encode_cmd(cmd, GITS_CMD_VMAPTI); + its_encode_devid(cmd, desc->its_vmapti_cmd.dev->device_id); + its_encode_vpeid(cmd, desc->its_vmapti_cmd.vpe->vpe_id); + its_encode_event_id(cmd, desc->its_vmapti_cmd.event_id); + its_encode_db_phys_id(cmd, db); + its_encode_virt_id(cmd, desc->its_vmapti_cmd.virt_id); + + its_fixup_cmd(cmd); + + return desc->its_vmapti_cmd.vpe; +} + +static struct its_vpe *its_build_vmovi_cmd(struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + u32 db; + + if (desc->its_vmovi_cmd.db_enabled) + db = desc->its_vmovi_cmd.vpe->vpe_db_lpi; + else + db = 1023; + + its_encode_cmd(cmd, GITS_CMD_VMOVI); + its_encode_devid(cmd, desc->its_vmovi_cmd.dev->device_id); + its_encode_vpeid(cmd, desc->its_vmovi_cmd.vpe->vpe_id); + its_encode_event_id(cmd, desc->its_vmovi_cmd.event_id); + its_encode_db_phys_id(cmd, db); + its_encode_db_valid(cmd, true); + + its_fixup_cmd(cmd); + + return desc->its_vmovi_cmd.vpe; +} + static u64 its_cmd_ptr_to_offset(struct its_node *its, struct its_cmd_block *ptr) { @@ -582,6 +670,18 @@ static void its_build_sync_cmd(struct its_cmd_block *sync_cmd, static BUILD_SINGLE_CMD_FUNC(its_send_single_command, its_cmd_builder_t, struct its_collection, its_build_sync_cmd) +static void its_build_vsync_cmd(struct its_cmd_block *sync_cmd, + struct its_vpe *sync_vpe) +{ + its_encode_cmd(sync_cmd, GITS_CMD_VSYNC); + its_encode_vpeid(sync_cmd, sync_vpe->vpe_id); + + its_fixup_cmd(sync_cmd); +} + +static BUILD_SINGLE_CMD_FUNC(its_send_single_vcommand, its_cmd_vbuilder_t, + struct its_vpe, its_build_vsync_cmd) + static void its_send_int(struct its_device *dev, u32 event_id) { struct its_cmd_desc desc; @@ -675,6 +775,33 @@ static void its_send_invall(struct its_node *its, struct its_collection *col) its_send_single_command(its, its_build_invall_cmd, &desc); } +static void its_send_vmapti(struct its_device *dev, u32 id) +{ + struct its_vlpi_map *map = &dev->event_map.vlpi_maps[id]; + struct its_cmd_desc desc; + + desc.its_vmapti_cmd.vpe = map->vpe; + desc.its_vmapti_cmd.dev = dev; + desc.its_vmapti_cmd.virt_id = map->vintid; + desc.its_vmapti_cmd.event_id = id; + desc.its_vmapti_cmd.db_enabled = map->db_enabled; + + its_send_single_vcommand(dev->its, its_build_vmapti_cmd, &desc); +} + +static void its_send_vmovi(struct its_device *dev, u32 id) +{ + struct its_vlpi_map *map = &dev->event_map.vlpi_maps[id]; + struct its_cmd_desc desc; + + desc.its_vmovi_cmd.vpe = map->vpe; + desc.its_vmovi_cmd.dev = dev; + desc.its_vmovi_cmd.event_id = id; + desc.its_vmovi_cmd.db_enabled = map->db_enabled; + + its_send_single_vcommand(dev->its, its_build_vmovi_cmd, &desc); +} + /* * irqchip functions - assumes MSI, mostly. */ @@ -787,19 +914,135 @@ static int its_irq_set_irqchip_state(struct irq_data *d, return 0; } +static int its_vlpi_map(struct irq_data *d, struct its_cmd_info *info) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); + int ret = 0; + + if (!info->map) + return -EINVAL; + + mutex_lock(&its_dev->event_map.vlpi_lock); + + if (!its_dev->event_map.vm) { + struct its_vlpi_map *maps; + + maps = kzalloc(sizeof(*maps) * its_dev->event_map.nr_lpis, + GFP_KERNEL); + if (!maps) { + ret = -ENOMEM; + goto out; + } + + its_dev->event_map.vm = info->map->vm; + its_dev->event_map.vlpi_maps = maps; + } else if (its_dev->event_map.vm != info->map->vm) { + ret = -EINVAL; + goto out; + } + + /* Get our private copy of the mapping information */ + its_dev->event_map.vlpi_maps[event] = *info->map; + + if (irqd_is_forwarded_to_vcpu(d)) { + /* Already mapped, move it around */ + its_send_vmovi(its_dev, event); + } else { + /* Drop the physical mapping */ + its_send_discard(its_dev, event); + + /* and install the virtual one */ + its_send_vmapti(its_dev, event); + irqd_set_forwarded_to_vcpu(d); + + /* Increment the number of VLPIs */ + its_dev->event_map.nr_vlpis++; + } + +out: + mutex_unlock(&its_dev->event_map.vlpi_lock); + return ret; +} + +static int its_vlpi_get(struct irq_data *d, struct its_cmd_info *info) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); + int ret = 0; + + mutex_lock(&its_dev->event_map.vlpi_lock); + + if (!its_dev->event_map.vm || + !its_dev->event_map.vlpi_maps[event].vm) { + ret = -EINVAL; + goto out; + } + + /* Copy our mapping information to the incoming request */ + *info->map = its_dev->event_map.vlpi_maps[event]; + +out: + mutex_unlock(&its_dev->event_map.vlpi_lock); + return ret; +} + +static int its_vlpi_unmap(struct irq_data *d) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); + int ret = 0; + + mutex_lock(&its_dev->event_map.vlpi_lock); + + if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) { + ret = -EINVAL; + goto out; + } + + /* Drop the virtual mapping */ + its_send_discard(its_dev, event); + + /* and restore the physical one */ + irqd_clr_forwarded_to_vcpu(d); + its_send_mapti(its_dev, d->hwirq, event); + lpi_update_config(d, 0xff, (LPI_PROP_DEFAULT_PRIO | + LPI_PROP_ENABLED | + LPI_PROP_GROUP1)); + + /* + * Drop the refcount and make the device available again if + * this was the last VLPI. + */ + if (!--its_dev->event_map.nr_vlpis) { + its_dev->event_map.vm = NULL; + kfree(its_dev->event_map.vlpi_maps); + } + +out: + mutex_unlock(&its_dev->event_map.vlpi_lock); + return ret; +} + static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); struct its_cmd_info *info = vcpu_info; /* Need a v4 ITS */ - if (!its_dev->its->is_v4 || !info) + if (!its_dev->its->is_v4) return -EINVAL; + /* Unmap request? */ + if (!info) + return its_vlpi_unmap(d); + switch (info->cmd_type) { case MAP_VLPI: + return its_vlpi_map(d, info); case GET_VLPI: + return its_vlpi_get(d, info); case PROP_UPDATE_VLPI: case PROP_UPDATE_AND_INV_VLPI: @@ -1518,6 +1761,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, dev->event_map.col_map = col_map; dev->event_map.lpi_base = lpi_base; dev->event_map.nr_lpis = nr_lpis; + mutex_init(&dev->event_map.vlpi_lock); dev->device_id = dev_id; INIT_LIST_HEAD(&dev->entry); From 015ec0386ab69a5b6b21f6c1559b242f4540de9f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 09:54:57 +0000 Subject: [PATCH 22/53] irqchip/gic-v3-its: Add VLPI configuration handling When a VLPI is reconfigured (enabled, disabled, change in priority), the full configuration byte must be written, and the caches invalidated. Also, when using the irq_mask/irq_unmask methods, it is necessary to disable the doorbell for that particular interrupt (by mapping it to 1023) on top of clearing the Enable bit. Reviewed-by: Thomas Gleixner Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 75 +++++++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 5187c4b116d4..0aa4d609a8e9 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -812,18 +812,26 @@ static inline u32 its_get_event_id(struct irq_data *d) return d->hwirq - its_dev->event_map.lpi_base; } -static void lpi_update_config(struct irq_data *d, u8 clr, u8 set) +static void lpi_write_config(struct irq_data *d, u8 clr, u8 set) { - struct its_device *its_dev = irq_data_get_irq_chip_data(d); - irq_hw_number_t hwirq = d->hwirq; + irq_hw_number_t hwirq; struct page *prop_page; u8 *cfg; - prop_page = gic_rdists->prop_page; + if (irqd_is_forwarded_to_vcpu(d)) { + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); + + prop_page = its_dev->event_map.vm->vprop_page; + hwirq = its_dev->event_map.vlpi_maps[event].vintid; + } else { + prop_page = gic_rdists->prop_page; + hwirq = d->hwirq; + } cfg = page_address(prop_page) + hwirq - 8192; *cfg &= ~clr; - *cfg |= set; + *cfg |= set | LPI_PROP_GROUP1; /* * Make the above write visible to the redistributors. @@ -834,16 +842,52 @@ static void lpi_update_config(struct irq_data *d, u8 clr, u8 set) gic_flush_dcache_to_poc(cfg, sizeof(*cfg)); else dsb(ishst); +} + +static void lpi_update_config(struct irq_data *d, u8 clr, u8 set) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + + lpi_write_config(d, clr, set); its_send_inv(its_dev, its_get_event_id(d)); } +static void its_vlpi_set_doorbell(struct irq_data *d, bool enable) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); + + if (its_dev->event_map.vlpi_maps[event].db_enabled == enable) + return; + + its_dev->event_map.vlpi_maps[event].db_enabled = enable; + + /* + * More fun with the architecture: + * + * Ideally, we'd issue a VMAPTI to set the doorbell to its LPI + * value or to 1023, depending on the enable bit. But that + * would be issueing a mapping for an /existing/ DevID+EventID + * pair, which is UNPREDICTABLE. Instead, let's issue a VMOVI + * to the /same/ vPE, using this opportunity to adjust the + * doorbell. Mouahahahaha. We loves it, Precious. + */ + its_send_vmovi(its_dev, event); +} + static void its_mask_irq(struct irq_data *d) { + if (irqd_is_forwarded_to_vcpu(d)) + its_vlpi_set_doorbell(d, false); + lpi_update_config(d, LPI_PROP_ENABLED, 0); } static void its_unmask_irq(struct irq_data *d) { + if (irqd_is_forwarded_to_vcpu(d)) + its_vlpi_set_doorbell(d, true); + lpi_update_config(d, 0, LPI_PROP_ENABLED); } @@ -856,6 +900,10 @@ static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val, struct its_collection *target_col; u32 id = its_get_event_id(d); + /* A forwarded interrupt should use irq_set_vcpu_affinity */ + if (irqd_is_forwarded_to_vcpu(d)) + return -EINVAL; + /* lpi cannot be routed to a redistributor that is on a foreign node */ if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) { if (its_dev->its->numa_node >= 0) { @@ -1024,6 +1072,22 @@ static int its_vlpi_unmap(struct irq_data *d) return ret; } +static int its_vlpi_prop_update(struct irq_data *d, struct its_cmd_info *info) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + + if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) + return -EINVAL; + + if (info->cmd_type == PROP_UPDATE_AND_INV_VLPI) + lpi_update_config(d, 0xff, info->config); + else + lpi_write_config(d, 0xff, info->config); + its_vlpi_set_doorbell(d, !!(info->config & LPI_PROP_ENABLED)); + + return 0; +} + static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); @@ -1046,6 +1110,7 @@ static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) case PROP_UPDATE_VLPI: case PROP_UPDATE_AND_INV_VLPI: + return its_vlpi_prop_update(d, info); default: return -EINVAL; From 8fff27aec182ed4d6fe15d1373fc6b35cdfcda0f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 13:41:55 +0000 Subject: [PATCH 23/53] irqchip/gic-v3-its: Add VPE domain infrastructure Add the basic GICv4 VPE (vcpu in GICv4 parlance) infrastructure (irqchip, irq domain) that is going to be populated in the following patches. Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 0aa4d609a8e9..0de4a54485e8 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2032,6 +2032,13 @@ static const struct irq_domain_ops its_domain_ops = { .deactivate = its_irq_domain_deactivate, }; +static struct irq_chip its_vpe_irq_chip = { + .name = "GICv4-vpe", +}; + +static const struct irq_domain_ops its_vpe_domain_ops = { +}; + static int its_force_quiescent(void __iomem *base) { u32 count = 1000000; /* 1s */ @@ -2148,6 +2155,11 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) return 0; } +static int its_init_vpe_domain(void) +{ + return 0; +} + static int __init its_compute_its_list_map(struct resource *res, void __iomem *its_base) { @@ -2490,6 +2502,9 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, struct irq_domain *parent_domain) { struct device_node *of_node; + struct its_node *its; + bool has_v4 = false; + int err; its_parent = parent_domain; of_node = to_of_node(handle); @@ -2504,5 +2519,19 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, } gic_rdists = rdists; - return its_alloc_lpi_tables(); + err = its_alloc_lpi_tables(); + if (err) + return err; + + list_for_each_entry(its, &its_nodes, entry) + has_v4 |= its->is_v4; + + if (has_v4 & rdists->has_vlpis) { + if (its_init_vpe_domain()) { + rdists->has_vlpis = false; + pr_err("ITS: Disabling GICv4 support\n"); + } + } + + return 0; } From 7d75bbb4bc1ad90386776459d37e4ddfe605671e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 13:55:54 +0000 Subject: [PATCH 24/53] irqchip/gic-v3-its: Add VPE irq domain allocation/teardown When creating a VM, the low level GICv4 code is responsible for: - allocating each VPE a unique VPEID - allocating a doorbell interrupt for each VPE - allocating the pending tables for each VPE - allocating the property table for the VM This of course has to be reversed when the VM is brought down. All of this is wired into the irq domain alloc/free methods. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 169 +++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 0de4a54485e8..06ec47f004f9 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -148,6 +148,7 @@ static struct irq_domain *its_parent; #define ITS_LIST_MAX 16 static unsigned long its_list_map; +static DEFINE_IDA(its_vpeid_ida); #define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist)) #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) @@ -1255,6 +1256,11 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags) return prop_page; } +static void its_free_prop_table(struct page *prop_page) +{ + free_pages((unsigned long)page_address(prop_page), + get_order(LPI_PROPBASE_SZ)); +} static int __init its_alloc_lpi_tables(void) { @@ -1557,6 +1563,12 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags) return pend_page; } +static void its_free_pending_table(struct page *pt) +{ + free_pages((unsigned long)page_address(pt), + get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K))); +} + static void its_cpu_init_lpis(void) { void __iomem *rbase = gic_data_rdist_rd_base(); @@ -1779,6 +1791,34 @@ static bool its_alloc_device_table(struct its_node *its, u32 dev_id) return its_alloc_table_entry(baser, dev_id); } +static bool its_alloc_vpe_table(u32 vpe_id) +{ + struct its_node *its; + + /* + * Make sure the L2 tables are allocated on *all* v4 ITSs. We + * could try and only do it on ITSs corresponding to devices + * that have interrupts targeted at this VPE, but the + * complexity becomes crazy (and you have tons of memory + * anyway, right?). + */ + list_for_each_entry(its, &its_nodes, entry) { + struct its_baser *baser; + + if (!its->is_v4) + continue; + + baser = its_get_baser(its, GITS_BASER_TYPE_VCPU); + if (!baser) + return false; + + if (!its_alloc_table_entry(baser, vpe_id)) + return false; + } + + return true; +} + static struct its_device *its_create_device(struct its_node *its, u32 dev_id, int nvecs) { @@ -2036,7 +2076,136 @@ static struct irq_chip its_vpe_irq_chip = { .name = "GICv4-vpe", }; +static int its_vpe_id_alloc(void) +{ + return ida_simple_get(&its_vpeid_ida, 0, 1 << 16, GFP_KERNEL); +} + +static void its_vpe_id_free(u16 id) +{ + ida_simple_remove(&its_vpeid_ida, id); +} + +static int its_vpe_init(struct its_vpe *vpe) +{ + struct page *vpt_page; + int vpe_id; + + /* Allocate vpe_id */ + vpe_id = its_vpe_id_alloc(); + if (vpe_id < 0) + return vpe_id; + + /* Allocate VPT */ + vpt_page = its_allocate_pending_table(GFP_KERNEL); + if (!vpt_page) { + its_vpe_id_free(vpe_id); + return -ENOMEM; + } + + if (!its_alloc_vpe_table(vpe_id)) { + its_vpe_id_free(vpe_id); + its_free_pending_table(vpe->vpt_page); + return -ENOMEM; + } + + vpe->vpe_id = vpe_id; + vpe->vpt_page = vpt_page; + + return 0; +} + +static void its_vpe_teardown(struct its_vpe *vpe) +{ + its_vpe_id_free(vpe->vpe_id); + its_free_pending_table(vpe->vpt_page); +} + +static void its_vpe_irq_domain_free(struct irq_domain *domain, + unsigned int virq, + unsigned int nr_irqs) +{ + struct its_vm *vm = domain->host_data; + int i; + + irq_domain_free_irqs_parent(domain, virq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) { + struct irq_data *data = irq_domain_get_irq_data(domain, + virq + i); + struct its_vpe *vpe = irq_data_get_irq_chip_data(data); + + BUG_ON(vm != vpe->its_vm); + + clear_bit(data->hwirq, vm->db_bitmap); + its_vpe_teardown(vpe); + irq_domain_reset_irq_data(data); + } + + if (bitmap_empty(vm->db_bitmap, vm->nr_db_lpis)) { + its_lpi_free_chunks(vm->db_bitmap, vm->db_lpi_base, vm->nr_db_lpis); + its_free_prop_table(vm->vprop_page); + } +} + +static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *args) +{ + struct its_vm *vm = args; + unsigned long *bitmap; + struct page *vprop_page; + int base, nr_ids, i, err = 0; + + BUG_ON(!vm); + + bitmap = its_lpi_alloc_chunks(nr_irqs, &base, &nr_ids); + if (!bitmap) + return -ENOMEM; + + if (nr_ids < nr_irqs) { + its_lpi_free_chunks(bitmap, base, nr_ids); + return -ENOMEM; + } + + vprop_page = its_allocate_prop_table(GFP_KERNEL); + if (!vprop_page) { + its_lpi_free_chunks(bitmap, base, nr_ids); + return -ENOMEM; + } + + vm->db_bitmap = bitmap; + vm->db_lpi_base = base; + vm->nr_db_lpis = nr_ids; + vm->vprop_page = vprop_page; + + for (i = 0; i < nr_irqs; i++) { + vm->vpes[i]->vpe_db_lpi = base + i; + err = its_vpe_init(vm->vpes[i]); + if (err) + break; + err = its_irq_gic_domain_alloc(domain, virq + i, + vm->vpes[i]->vpe_db_lpi); + if (err) + break; + irq_domain_set_hwirq_and_chip(domain, virq + i, i, + &its_vpe_irq_chip, vm->vpes[i]); + set_bit(i, bitmap); + } + + if (err) { + if (i > 0) + its_vpe_irq_domain_free(domain, virq, i - 1); + + its_lpi_free_chunks(bitmap, base, nr_ids); + its_free_prop_table(vprop_page); + } + + return err; +} + static const struct irq_domain_ops its_vpe_domain_ops = { + .alloc = its_vpe_irq_domain_alloc, + .free = its_vpe_irq_domain_free, }; static int its_force_quiescent(void __iomem *base) From eb78192be2833484b0f7b506d217a40621215251 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 14:47:05 +0000 Subject: [PATCH 25/53] irqchip/gic-v3-its: Add VPE irq domain [de]activation On activation, a VPE is mapped using the VMAPP command, followed by a VINVALL for a good measure. On deactivation, the VPE is simply unmapped. Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 102 +++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 06ec47f004f9..128740b87806 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -213,6 +213,16 @@ struct its_cmd_desc { struct its_collection *col; } its_invall_cmd; + struct { + struct its_vpe *vpe; + } its_vinvall_cmd; + + struct { + struct its_vpe *vpe; + struct its_collection *col; + bool valid; + } its_vmapp_cmd; + struct { struct its_vpe *vpe; struct its_device *dev; @@ -318,6 +328,16 @@ static void its_encode_db_valid(struct its_cmd_block *cmd, bool db_valid) its_mask_encode(&cmd->raw_cmd[2], db_valid, 0, 0); } +static void its_encode_vpt_addr(struct its_cmd_block *cmd, u64 vpt_pa) +{ + its_mask_encode(&cmd->raw_cmd[3], vpt_pa >> 16, 50, 16); +} + +static void its_encode_vpt_size(struct its_cmd_block *cmd, u8 vpt_size) +{ + its_mask_encode(&cmd->raw_cmd[3], vpt_size, 4, 0); +} + static inline void its_fixup_cmd(struct its_cmd_block *cmd) { /* Let's fixup BE commands */ @@ -476,6 +496,36 @@ static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd, return NULL; } +static struct its_vpe *its_build_vinvall_cmd(struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + its_encode_cmd(cmd, GITS_CMD_VINVALL); + its_encode_vpeid(cmd, desc->its_vinvall_cmd.vpe->vpe_id); + + its_fixup_cmd(cmd); + + return desc->its_vinvall_cmd.vpe; +} + +static struct its_vpe *its_build_vmapp_cmd(struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + unsigned long vpt_addr; + + vpt_addr = virt_to_phys(page_address(desc->its_vmapp_cmd.vpe->vpt_page)); + + its_encode_cmd(cmd, GITS_CMD_VMAPP); + its_encode_vpeid(cmd, desc->its_vmapp_cmd.vpe->vpe_id); + its_encode_valid(cmd, desc->its_vmapp_cmd.valid); + its_encode_target(cmd, desc->its_vmapp_cmd.col->target_address); + its_encode_vpt_addr(cmd, vpt_addr); + its_encode_vpt_size(cmd, LPI_NRBITS - 1); + + its_fixup_cmd(cmd); + + return desc->its_vmapp_cmd.vpe; +} + static struct its_vpe *its_build_vmapti_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { @@ -803,6 +853,37 @@ static void its_send_vmovi(struct its_device *dev, u32 id) its_send_single_vcommand(dev->its, its_build_vmovi_cmd, &desc); } +static void its_send_vmapp(struct its_vpe *vpe, bool valid) +{ + struct its_cmd_desc desc; + struct its_node *its; + + desc.its_vmapp_cmd.vpe = vpe; + desc.its_vmapp_cmd.valid = valid; + + list_for_each_entry(its, &its_nodes, entry) { + if (!its->is_v4) + continue; + + desc.its_vmapp_cmd.col = &its->collections[vpe->col_idx]; + its_send_single_vcommand(its, its_build_vmapp_cmd, &desc); + } +} + +static void its_send_vinvall(struct its_vpe *vpe) +{ + struct its_cmd_desc desc; + struct its_node *its; + + desc.its_vinvall_cmd.vpe = vpe; + + list_for_each_entry(its, &its_nodes, entry) { + if (!its->is_v4) + continue; + its_send_single_vcommand(its, its_build_vinvall_cmd, &desc); + } +} + /* * irqchip functions - assumes MSI, mostly. */ @@ -2203,9 +2284,30 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq return err; } +static void its_vpe_irq_domain_activate(struct irq_domain *domain, + struct irq_data *d) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + + /* Map the VPE to the first possible CPU */ + vpe->col_idx = cpumask_first(cpu_online_mask); + its_send_vmapp(vpe, true); + its_send_vinvall(vpe); +} + +static void its_vpe_irq_domain_deactivate(struct irq_domain *domain, + struct irq_data *d) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + + its_send_vmapp(vpe, false); +} + static const struct irq_domain_ops its_vpe_domain_ops = { .alloc = its_vpe_irq_domain_alloc, .free = its_vpe_irq_domain_free, + .activate = its_vpe_irq_domain_activate, + .deactivate = its_vpe_irq_domain_deactivate, }; static int its_force_quiescent(void __iomem *base) From 3ca63f363f3f8fe457482c53d5c86d83bff21e64 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Jan 2017 13:39:52 +0000 Subject: [PATCH 26/53] irqchip/gic-v3-its: Add VPENDBASER/VPROPBASER accessors V{PEND,PROP}BASER being 64bit registers, they need some ad-hoc accessors on 32bit, specially given that VPENDBASER contains a Valid bit, making the access a bit convoluted. Reviewed-by: Thomas Gleixner Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- arch/arm/include/asm/arch_gicv3.h | 28 ++++++++++++++++++++++++++++ arch/arm64/include/asm/arch_gicv3.h | 5 +++++ include/linux/irqchip/arm-gic-v3.h | 5 +++++ 3 files changed, 38 insertions(+) diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index 27475904e096..8d45e88feac9 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -291,5 +291,33 @@ static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr) */ #define gits_write_cwriter(v, c) __gic_writeq_nonatomic(v, c) +/* + * GITS_VPROPBASER - hi and lo bits may be accessed independently. + */ +#define gits_write_vpropbaser(v, c) __gic_writeq_nonatomic(v, c) + +/* + * GITS_VPENDBASER - the Valid bit must be cleared before changing + * anything else. + */ +static inline void gits_write_vpendbaser(u64 val, void * __iomem addr) +{ + u32 tmp; + + tmp = readl_relaxed(addr + 4); + if (tmp & (GICR_VPENDBASER_Valid >> 32)) { + tmp &= ~(GICR_VPENDBASER_Valid >> 32); + writel_relaxed(tmp, addr + 4); + } + + /* + * Use the fact that __gic_writeq_nonatomic writes the second + * half of the 64bit quantity after the first. + */ + __gic_writeq_nonatomic(val, addr); +} + +#define gits_read_vpendbaser(c) __gic_readq_nonatomic(c) + #endif /* !__ASSEMBLY__ */ #endif /* !__ASM_ARCH_GICV3_H */ diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 8cef47fa2218..0d2a53457c30 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -133,5 +133,10 @@ static inline void gic_write_bpr1(u32 val) #define gicr_write_pendbaser(v, c) writeq_relaxed(v, c) #define gicr_read_pendbaser(c) readq_relaxed(c) +#define gits_write_vpropbaser(v, c) writeq_relaxed(v, c) + +#define gits_write_vpendbaser(v, c) writeq_relaxed(v, c) +#define gits_read_vpendbaser(c) readq_relaxed(c) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ARCH_GICV3_H */ diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 7c6fd8f3e36c..17ba0d732f12 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -212,6 +212,11 @@ #define LPI_PROP_GROUP1 (1 << 1) #define LPI_PROP_ENABLED (1 << 0) +#define GICR_VPENDBASER_Dirty (1ULL << 60) +#define GICR_VPENDBASER_PendingLast (1ULL << 61) +#define GICR_VPENDBASER_IDAI (1ULL << 62) +#define GICR_VPENDBASER_Valid (1ULL << 63) + /* * ITS registers, offsets from ITS_base */ From e643d80340363c9d172abfbe437537196cfc1643 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:09:31 +0000 Subject: [PATCH 27/53] irqchip/gic-v3-its: Add VPE scheduling When a VPE is scheduled to run, the corresponding redistributor must be told so, by setting VPROPBASER to the VM's property table, and VPENDBASER to the vcpu's pending table. When scheduled out, we preserve the IDAI and PendingLast bits. The latter is specially important, as it tells the hypervisor that there are pending interrupts for this vcpu. Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 85 ++++++++++++++++++++++++++++++ include/linux/irqchip/arm-gic-v3.h | 58 ++++++++++++++++++++ 2 files changed, 143 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 128740b87806..f4827040a788 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -152,6 +152,7 @@ static DEFINE_IDA(its_vpeid_ida); #define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist)) #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) +#define gic_data_rdist_vlpi_base() (gic_data_rdist_rd_base() + SZ_128K) static struct its_collection *dev_event_to_col(struct its_device *its_dev, u32 event) @@ -2153,8 +2154,92 @@ static const struct irq_domain_ops its_domain_ops = { .deactivate = its_irq_domain_deactivate, }; +static void its_vpe_schedule(struct its_vpe *vpe) +{ + void * __iomem vlpi_base = gic_data_rdist_vlpi_base(); + u64 val; + + /* Schedule the VPE */ + val = virt_to_phys(page_address(vpe->its_vm->vprop_page)) & + GENMASK_ULL(51, 12); + val |= (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK; + val |= GICR_VPROPBASER_RaWb; + val |= GICR_VPROPBASER_InnerShareable; + gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); + + val = virt_to_phys(page_address(vpe->vpt_page)) & + GENMASK_ULL(51, 16); + val |= GICR_VPENDBASER_RaWaWb; + val |= GICR_VPENDBASER_NonShareable; + /* + * There is no good way of finding out if the pending table is + * empty as we can race against the doorbell interrupt very + * easily. So in the end, vpe->pending_last is only an + * indication that the vcpu has something pending, not one + * that the pending table is empty. A good implementation + * would be able to read its coarse map pretty quickly anyway, + * making this a tolerable issue. + */ + val |= GICR_VPENDBASER_PendingLast; + val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0; + val |= GICR_VPENDBASER_Valid; + gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); +} + +static void its_vpe_deschedule(struct its_vpe *vpe) +{ + void * __iomem vlpi_base = gic_data_rdist_vlpi_base(); + u32 count = 1000000; /* 1s! */ + bool clean; + u64 val; + + /* We're being scheduled out */ + val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + val &= ~GICR_VPENDBASER_Valid; + gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + do { + val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + clean = !(val & GICR_VPENDBASER_Dirty); + if (!clean) { + count--; + cpu_relax(); + udelay(1); + } + } while (!clean && count); + + if (unlikely(!clean && !count)) { + pr_err_ratelimited("ITS virtual pending table not cleaning\n"); + vpe->idai = false; + vpe->pending_last = true; + } else { + vpe->idai = !!(val & GICR_VPENDBASER_IDAI); + vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast); + } +} + +static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + struct its_cmd_info *info = vcpu_info; + + switch (info->cmd_type) { + case SCHEDULE_VPE: + its_vpe_schedule(vpe); + return 0; + + case DESCHEDULE_VPE: + its_vpe_deschedule(vpe); + return 0; + + default: + return -EINVAL; + } +} + static struct irq_chip its_vpe_irq_chip = { .name = "GICv4-vpe", + .irq_set_vcpu_affinity = its_vpe_set_vcpu_affinity, }; static int its_vpe_id_alloc(void) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 17ba0d732f12..6bc142cfa616 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -212,6 +212,64 @@ #define LPI_PROP_GROUP1 (1 << 1) #define LPI_PROP_ENABLED (1 << 0) +/* + * Re-Distributor registers, offsets from VLPI_base + */ +#define GICR_VPROPBASER 0x0070 + +#define GICR_VPROPBASER_IDBITS_MASK 0x1f + +#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56) + +#define GICR_VPROPBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK) +#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK) +#define GICR_VPROPBASER_CACHEABILITY_MASK \ + GICR_VPROPBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPROPBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable) + +#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB) +#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC) +#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) +#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) +#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt) +#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb) +#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt) +#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb) + +#define GICR_VPENDBASER 0x0078 + +#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_VPENDBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK) +#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK) +#define GICR_VPENDBASER_CACHEABILITY_MASK \ + GICR_VPENDBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPENDBASER_NonShareable \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable) + +#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB) +#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC) +#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) +#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) +#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt) +#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb) +#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt) +#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb) + #define GICR_VPENDBASER_Dirty (1ULL << 60) #define GICR_VPENDBASER_PendingLast (1ULL << 61) #define GICR_VPENDBASER_IDAI (1ULL << 62) From 5e2f764234bd0ef9542902fb608e525c41e0d7ee Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:10:50 +0000 Subject: [PATCH 28/53] irqchip/gic-v3-its: Add VPE invalidation hook When a guest issues a INVALL command targetting a collection, it must be translated into a VINVALL for the VPE that has this collection. This patch implements a hook that offers this functionallity to the hypervisor. Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index f4827040a788..21b728df7544 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2232,6 +2232,10 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) its_vpe_deschedule(vpe); return 0; + case INVALL_VPE: + its_send_vinvall(vpe); + return 0; + default: return -EINVAL; } From 3171a47a22eedfb6e97803fb193fc98cdb61d90f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:17:28 +0000 Subject: [PATCH 29/53] irqchip/gic-v3-its: Add VPE affinity changes When we're about to run a vcpu, it is crucial that the redistributor associated with the physical CPU is being told about the new residency. This is abstracted by hijacking the irq_set_affinity method for the doorbell interrupt associated with the VPE. It is expected that the hypervisor will call this method before scheduling the VPE. Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 96 ++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 21b728df7544..ab25b57866cd 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -148,6 +148,9 @@ static struct irq_domain *its_parent; #define ITS_LIST_MAX 16 static unsigned long its_list_map; +static u16 vmovp_seq_num; +static DEFINE_RAW_SPINLOCK(vmovp_lock); + static DEFINE_IDA(its_vpeid_ida); #define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist)) @@ -238,6 +241,13 @@ struct its_cmd_desc { u32 event_id; bool db_enabled; } its_vmovi_cmd; + + struct { + struct its_vpe *vpe; + struct its_collection *col; + u16 seq_num; + u16 its_list; + } its_vmovp_cmd; }; }; @@ -329,6 +339,16 @@ static void its_encode_db_valid(struct its_cmd_block *cmd, bool db_valid) its_mask_encode(&cmd->raw_cmd[2], db_valid, 0, 0); } +static void its_encode_seq_num(struct its_cmd_block *cmd, u16 seq_num) +{ + its_mask_encode(&cmd->raw_cmd[0], seq_num, 47, 32); +} + +static void its_encode_its_list(struct its_cmd_block *cmd, u16 its_list) +{ + its_mask_encode(&cmd->raw_cmd[1], its_list, 15, 0); +} + static void its_encode_vpt_addr(struct its_cmd_block *cmd, u64 vpt_pa) { its_mask_encode(&cmd->raw_cmd[3], vpt_pa >> 16, 50, 16); @@ -571,6 +591,20 @@ static struct its_vpe *its_build_vmovi_cmd(struct its_cmd_block *cmd, return desc->its_vmovi_cmd.vpe; } +static struct its_vpe *its_build_vmovp_cmd(struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + its_encode_cmd(cmd, GITS_CMD_VMOVP); + its_encode_seq_num(cmd, desc->its_vmovp_cmd.seq_num); + its_encode_its_list(cmd, desc->its_vmovp_cmd.its_list); + its_encode_vpeid(cmd, desc->its_vmovp_cmd.vpe->vpe_id); + its_encode_target(cmd, desc->its_vmovp_cmd.col->target_address); + + its_fixup_cmd(cmd); + + return desc->its_vmovp_cmd.vpe; +} + static u64 its_cmd_ptr_to_offset(struct its_node *its, struct its_cmd_block *ptr) { @@ -871,6 +905,48 @@ static void its_send_vmapp(struct its_vpe *vpe, bool valid) } } +static void its_send_vmovp(struct its_vpe *vpe) +{ + struct its_cmd_desc desc; + struct its_node *its; + unsigned long flags; + int col_id = vpe->col_idx; + + desc.its_vmovp_cmd.vpe = vpe; + desc.its_vmovp_cmd.its_list = (u16)its_list_map; + + if (!its_list_map) { + its = list_first_entry(&its_nodes, struct its_node, entry); + desc.its_vmovp_cmd.seq_num = 0; + desc.its_vmovp_cmd.col = &its->collections[col_id]; + its_send_single_vcommand(its, its_build_vmovp_cmd, &desc); + return; + } + + /* + * Yet another marvel of the architecture. If using the + * its_list "feature", we need to make sure that all ITSs + * receive all VMOVP commands in the same order. The only way + * to guarantee this is to make vmovp a serialization point. + * + * Wall <-- Head. + */ + raw_spin_lock_irqsave(&vmovp_lock, flags); + + desc.its_vmovp_cmd.seq_num = vmovp_seq_num++; + + /* Emit VMOVPs */ + list_for_each_entry(its, &its_nodes, entry) { + if (!its->is_v4) + continue; + + desc.its_vmovp_cmd.col = &its->collections[col_id]; + its_send_single_vcommand(its, its_build_vmovp_cmd, &desc); + } + + raw_spin_unlock_irqrestore(&vmovp_lock, flags); +} + static void its_send_vinvall(struct its_vpe *vpe) { struct its_cmd_desc desc; @@ -2154,6 +2230,25 @@ static const struct irq_domain_ops its_domain_ops = { .deactivate = its_irq_domain_deactivate, }; +static int its_vpe_set_affinity(struct irq_data *d, + const struct cpumask *mask_val, + bool force) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + int cpu = cpumask_first(mask_val); + + /* + * Changing affinity is mega expensive, so let's be as lazy as + * we can and only do it if we really have to. + */ + if (vpe->col_idx != cpu) { + vpe->col_idx = cpu; + its_send_vmovp(vpe); + } + + return IRQ_SET_MASK_OK_DONE; +} + static void its_vpe_schedule(struct its_vpe *vpe) { void * __iomem vlpi_base = gic_data_rdist_vlpi_base(); @@ -2243,6 +2338,7 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) static struct irq_chip its_vpe_irq_chip = { .name = "GICv4-vpe", + .irq_set_affinity = its_vpe_set_affinity, .irq_set_vcpu_affinity = its_vpe_set_vcpu_affinity, }; From f6a91da7c788ac7345305ee291d0b205d2f5a70f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:20:38 +0000 Subject: [PATCH 30/53] irqchip/gic-v3-its: Add VPE interrupt masking When masking/unmasking a doorbell interrupt, it is necessary to issue an invalidation to the corresponding redistributor. We use the DirectLPI feature by writting directly to the corresponding redistributor. Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- arch/arm/include/asm/arch_gicv3.h | 6 ++++++ arch/arm64/include/asm/arch_gicv3.h | 2 ++ drivers/irqchip/irq-gic-v3-its.c | 33 +++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index 8d45e88feac9..eee269321923 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -275,6 +275,12 @@ static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr) #define gicr_read_pendbaser(c) __gic_readq_nonatomic(c) #define gicr_write_pendbaser(v, c) __gic_writeq_nonatomic(v, c) +/* + * GICR_xLPIR - only the lower bits are significant + */ +#define gic_read_lpir(c) readl_relaxed(c) +#define gic_write_lpir(v, c) writel_relaxed(lower_32_bits(v), c) + /* * GITS_TYPER is an ID register and doesn't need atomicity. */ diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 0d2a53457c30..b7e3f74822da 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -116,6 +116,8 @@ static inline void gic_write_bpr1(u32 val) #define gic_read_typer(c) readq_relaxed(c) #define gic_write_irouter(v, c) writeq_relaxed(v, c) +#define gic_read_lpir(c) readq_relaxed(c) +#define gic_write_lpir(v, c) writeq_relaxed(v, c) #define gic_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index ab25b57866cd..9040fb41d446 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2336,8 +2336,41 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) } } +static void its_vpe_send_inv(struct irq_data *d) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + void __iomem *rdbase; + + rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; + gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_INVLPIR); + while (gic_read_lpir(rdbase + GICR_SYNCR) & 1) + cpu_relax(); +} + +static void its_vpe_mask_irq(struct irq_data *d) +{ + /* + * We need to unmask the LPI, which is described by the parent + * irq_data. Instead of calling into the parent (which won't + * exactly do the right thing, let's simply use the + * parent_data pointer. Yes, I'm naughty. + */ + lpi_write_config(d->parent_data, LPI_PROP_ENABLED, 0); + its_vpe_send_inv(d); +} + +static void its_vpe_unmask_irq(struct irq_data *d) +{ + /* Same hack as above... */ + lpi_write_config(d->parent_data, 0, LPI_PROP_ENABLED); + its_vpe_send_inv(d); +} + static struct irq_chip its_vpe_irq_chip = { .name = "GICv4-vpe", + .irq_mask = its_vpe_mask_irq, + .irq_unmask = its_vpe_unmask_irq, + .irq_eoi = irq_chip_eoi_parent, .irq_set_affinity = its_vpe_set_affinity, .irq_set_vcpu_affinity = its_vpe_set_vcpu_affinity, }; From 93f94ea0548c2628efe7d57e0989f06a14cedb53 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 4 Aug 2017 18:37:09 +0100 Subject: [PATCH 31/53] irqchip/gic-v3-its: Make LPI allocation optional on device creation The normal course of action when allocating the ITS' view of a device is to allocate the corresponding LPIs. But we're about to introduce devices that borrow their interrupts from some other entities. So let's make the allocation optional. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 9040fb41d446..017a6efa3747 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1978,10 +1978,10 @@ static bool its_alloc_vpe_table(u32 vpe_id) } static struct its_device *its_create_device(struct its_node *its, u32 dev_id, - int nvecs) + int nvecs, bool alloc_lpis) { struct its_device *dev; - unsigned long *lpi_map; + unsigned long *lpi_map = NULL; unsigned long flags; u16 *col_map = NULL; void *itt; @@ -2003,11 +2003,18 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, sz = nr_ites * its->ite_size; sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; itt = kzalloc(sz, GFP_KERNEL); - lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis); - if (lpi_map) - col_map = kzalloc(sizeof(*col_map) * nr_lpis, GFP_KERNEL); + if (alloc_lpis) { + lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis); + if (lpi_map) + col_map = kzalloc(sizeof(*col_map) * nr_lpis, + GFP_KERNEL); + } else { + col_map = kzalloc(sizeof(*col_map) * nr_ites, GFP_KERNEL); + nr_lpis = 0; + lpi_base = 0; + } - if (!dev || !itt || !lpi_map || !col_map) { + if (!dev || !itt || !col_map || (!lpi_map && alloc_lpis)) { kfree(dev); kfree(itt); kfree(lpi_map); @@ -2094,7 +2101,7 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, goto out; } - its_dev = its_create_device(its, dev_id, nvec); + its_dev = its_create_device(its, dev_id, nvec, true); if (!its_dev) return -ENOMEM; From 20b3d54ecba51c5fe476eea94ffdc463559c5c85 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:23:22 +0000 Subject: [PATCH 32/53] irqchip/gic-v3-its: Add device proxy for VPE management if !DirectLpi When we don't have the DirectLPI feature, we must work around the architecture shortcomings to be able to perform the required maintenance (interrupt masking, clearing and injection). For this, we create a fake device whose sole purpose is to provide a way to issue commands as if we were dealing with LPIs coming from that device (while they actually originate from the ITS). This fake device doesn't have LPIs allocated to it, but instead uses the VPE LPIs. Of course, this could be a real bottleneck, and a naive implementation would require 6 commands to issue an invalidation. Instead, let's allocate at least one event per physical CPU (rounded up to the next power of 2), and opportunistically map the VPE doorbell to an event. This doorbell will be mapped until we roll over and need to reallocate this slot. This ensures that most of the time, we only need 2 commands to issue an INV, INT or CLEAR, making the performance a lot better, given that we always issue a CLEAR on entry, and an INV on each side of a trapped WFI. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 151 +++++++++++++++++++++++++++-- include/linux/irqchip/arm-gic-v4.h | 2 + 2 files changed, 147 insertions(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 017a6efa3747..06f14f20368b 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -136,6 +136,13 @@ struct its_device { u32 device_id; }; +static struct { + raw_spinlock_t lock; + struct its_device *dev; + struct its_vpe **vpes; + int next_victim; +} vpe_proxy; + static LIST_HEAD(its_nodes); static DEFINE_SPINLOCK(its_lock); static struct rdists *gic_rdists; @@ -2090,6 +2097,16 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, msi_info = msi_get_domain_info(domain); its = msi_info->data; + if (!gic_rdists->has_direct_lpi && + vpe_proxy.dev && + vpe_proxy.dev->its == its && + dev_id == vpe_proxy.dev->device_id) { + /* Bad luck. Get yourself a better implementation */ + WARN_ONCE(1, "DevId %x clashes with GICv4 VPE proxy device\n", + dev_id); + return -EINVAL; + } + its_dev = its_find_device(its, dev_id); if (its_dev) { /* @@ -2237,6 +2254,70 @@ static const struct irq_domain_ops its_domain_ops = { .deactivate = its_irq_domain_deactivate, }; +/* + * This is insane. + * + * If a GICv4 doesn't implement Direct LPIs (which is extremely + * likely), the only way to perform an invalidate is to use a fake + * device to issue an INV command, implying that the LPI has first + * been mapped to some event on that device. Since this is not exactly + * cheap, we try to keep that mapping around as long as possible, and + * only issue an UNMAP if we're short on available slots. + * + * Broken by design(tm). + */ +static void its_vpe_db_proxy_unmap_locked(struct its_vpe *vpe) +{ + /* Already unmapped? */ + if (vpe->vpe_proxy_event == -1) + return; + + its_send_discard(vpe_proxy.dev, vpe->vpe_proxy_event); + vpe_proxy.vpes[vpe->vpe_proxy_event] = NULL; + + /* + * We don't track empty slots at all, so let's move the + * next_victim pointer if we can quickly reuse that slot + * instead of nuking an existing entry. Not clear that this is + * always a win though, and this might just generate a ripple + * effect... Let's just hope VPEs don't migrate too often. + */ + if (vpe_proxy.vpes[vpe_proxy.next_victim]) + vpe_proxy.next_victim = vpe->vpe_proxy_event; + + vpe->vpe_proxy_event = -1; +} + +static void its_vpe_db_proxy_unmap(struct its_vpe *vpe) +{ + if (!gic_rdists->has_direct_lpi) { + unsigned long flags; + + raw_spin_lock_irqsave(&vpe_proxy.lock, flags); + its_vpe_db_proxy_unmap_locked(vpe); + raw_spin_unlock_irqrestore(&vpe_proxy.lock, flags); + } +} + +static void its_vpe_db_proxy_map_locked(struct its_vpe *vpe) +{ + /* Already mapped? */ + if (vpe->vpe_proxy_event != -1) + return; + + /* This slot was already allocated. Kick the other VPE out. */ + if (vpe_proxy.vpes[vpe_proxy.next_victim]) + its_vpe_db_proxy_unmap_locked(vpe_proxy.vpes[vpe_proxy.next_victim]); + + /* Map the new VPE instead */ + vpe_proxy.vpes[vpe_proxy.next_victim] = vpe; + vpe->vpe_proxy_event = vpe_proxy.next_victim; + vpe_proxy.next_victim = (vpe_proxy.next_victim + 1) % vpe_proxy.dev->nr_ites; + + vpe_proxy.dev->event_map.col_map[vpe->vpe_proxy_event] = vpe->col_idx; + its_send_mapti(vpe_proxy.dev, vpe->vpe_db_lpi, vpe->vpe_proxy_event); +} + static int its_vpe_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) @@ -2246,9 +2327,11 @@ static int its_vpe_set_affinity(struct irq_data *d, /* * Changing affinity is mega expensive, so let's be as lazy as - * we can and only do it if we really have to. + * we can and only do it if we really have to. Also, if mapped + * into the proxy device, we need to nuke that mapping. */ if (vpe->col_idx != cpu) { + its_vpe_db_proxy_unmap(vpe); vpe->col_idx = cpu; its_send_vmovp(vpe); } @@ -2343,15 +2426,33 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) } } +static void its_vpe_send_cmd(struct its_vpe *vpe, + void (*cmd)(struct its_device *, u32)) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&vpe_proxy.lock, flags); + + its_vpe_db_proxy_map_locked(vpe); + cmd(vpe_proxy.dev, vpe->vpe_proxy_event); + + raw_spin_unlock_irqrestore(&vpe_proxy.lock, flags); +} + static void its_vpe_send_inv(struct irq_data *d) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); - void __iomem *rdbase; - rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; - gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_INVLPIR); - while (gic_read_lpir(rdbase + GICR_SYNCR) & 1) - cpu_relax(); + if (gic_rdists->has_direct_lpi) { + void __iomem *rdbase; + + rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; + gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_INVLPIR); + while (gic_read_lpir(rdbase + GICR_SYNCR) & 1) + cpu_relax(); + } else { + its_vpe_send_cmd(vpe, its_send_inv); + } } static void its_vpe_mask_irq(struct irq_data *d) @@ -2417,12 +2518,14 @@ static int its_vpe_init(struct its_vpe *vpe) vpe->vpe_id = vpe_id; vpe->vpt_page = vpt_page; + vpe->vpe_proxy_event = -1; return 0; } static void its_vpe_teardown(struct its_vpe *vpe) { + its_vpe_db_proxy_unmap(vpe); its_vpe_id_free(vpe->vpe_id); its_free_pending_table(vpe->vpt_page); } @@ -2653,6 +2756,42 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) static int its_init_vpe_domain(void) { + struct its_node *its; + u32 devid; + int entries; + + if (gic_rdists->has_direct_lpi) { + pr_info("ITS: Using DirectLPI for VPE invalidation\n"); + return 0; + } + + /* Any ITS will do, even if not v4 */ + its = list_first_entry(&its_nodes, struct its_node, entry); + + entries = roundup_pow_of_two(nr_cpu_ids); + vpe_proxy.vpes = kzalloc(sizeof(*vpe_proxy.vpes) * entries, + GFP_KERNEL); + if (!vpe_proxy.vpes) { + pr_err("ITS: Can't allocate GICv4 proxy device array\n"); + return -ENOMEM; + } + + /* Use the last possible DevID */ + devid = GENMASK(its->device_ids - 1, 0); + vpe_proxy.dev = its_create_device(its, devid, entries, false); + if (!vpe_proxy.dev) { + kfree(vpe_proxy.vpes); + pr_err("ITS: Can't allocate GICv4 proxy device\n"); + return -ENOMEM; + } + + BUG_ON(entries != vpe_proxy.dev->nr_ites); + + raw_spin_lock_init(&vpe_proxy.lock); + vpe_proxy.next_victim = 0; + pr_info("ITS: Allocated DevID %x as GICv4 proxy device (%d slots)\n", + devid, vpe_proxy.dev->nr_ites); + return 0; } diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index d499538dd86f..e7a93ad4fe97 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -39,6 +39,8 @@ struct its_vpe { /* Doorbell interrupt */ int irq; irq_hw_number_t vpe_db_lpi; + /* VPE proxy mapping */ + int vpe_proxy_event; /* * This collection ID is used to indirect the target * redistributor for this VPE. The ID itself isn't involved in From 958b90d162b4ad40a7a809af5f56f3744d73ab84 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Aug 2017 16:14:17 +0100 Subject: [PATCH 33/53] irqchip/gic-v3-its: Move pending doorbell after VMOVP After moving a VPE from a redistributor to another, we're still left with a potential pending doorbell interrupt on the old redistributor. That interrupt should be moved to the new one to be either cleared or take, depending on what the hypervisor wishes to do. So let's move it right after having execited VMOVP. This doesn't add much cost in the !DirectLPI case (we trade a DISCARD for a MOVI), and the cost of the DIRECTLPI case should be minimal (two extra MMIO accesses). Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 34 ++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 06f14f20368b..f5068a95d3c4 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2318,6 +2318,33 @@ static void its_vpe_db_proxy_map_locked(struct its_vpe *vpe) its_send_mapti(vpe_proxy.dev, vpe->vpe_db_lpi, vpe->vpe_proxy_event); } +static void its_vpe_db_proxy_move(struct its_vpe *vpe, int from, int to) +{ + unsigned long flags; + struct its_collection *target_col; + + if (gic_rdists->has_direct_lpi) { + void __iomem *rdbase; + + rdbase = per_cpu_ptr(gic_rdists->rdist, from)->rd_base; + gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR); + while (gic_read_lpir(rdbase + GICR_SYNCR) & 1) + cpu_relax(); + + return; + } + + raw_spin_lock_irqsave(&vpe_proxy.lock, flags); + + its_vpe_db_proxy_map_locked(vpe); + + target_col = &vpe_proxy.dev->its->collections[to]; + its_send_movi(vpe_proxy.dev, target_col, vpe->vpe_proxy_event); + vpe_proxy.dev->event_map.col_map[vpe->vpe_proxy_event] = to; + + raw_spin_unlock_irqrestore(&vpe_proxy.lock, flags); +} + static int its_vpe_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) @@ -2328,12 +2355,15 @@ static int its_vpe_set_affinity(struct irq_data *d, /* * Changing affinity is mega expensive, so let's be as lazy as * we can and only do it if we really have to. Also, if mapped - * into the proxy device, we need to nuke that mapping. + * into the proxy device, we need to move the doorbell + * interrupt to its new location. */ if (vpe->col_idx != cpu) { - its_vpe_db_proxy_unmap(vpe); + int from = vpe->col_idx; + vpe->col_idx = cpu; its_send_vmovp(vpe); + its_vpe_db_proxy_move(vpe, from, cpu); } return IRQ_SET_MASK_OK_DONE; From e57a3e2847ad74bd3285287b0effc1cfcc3d8806 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 31 Jul 2017 14:47:24 +0100 Subject: [PATCH 34/53] irqchip/gic-v3-its: Allow doorbell interrupts to be injected/cleared While the doorbell interrupts are usually driven by the HW itself, having a way to trigger them independently has proved to be a really useful debug feature. As it is actually very little code, let's add it to the VPE irqchip operations. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index f5068a95d3c4..21ee33e81c74 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2504,12 +2504,43 @@ static void its_vpe_unmask_irq(struct irq_data *d) its_vpe_send_inv(d); } +static int its_vpe_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, + bool state) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + + if (which != IRQCHIP_STATE_PENDING) + return -EINVAL; + + if (gic_rdists->has_direct_lpi) { + void __iomem *rdbase; + + rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; + if (state) { + gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_SETLPIR); + } else { + gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR); + while (gic_read_lpir(rdbase + GICR_SYNCR) & 1) + cpu_relax(); + } + } else { + if (state) + its_vpe_send_cmd(vpe, its_send_int); + else + its_vpe_send_cmd(vpe, its_send_clear); + } + + return 0; +} + static struct irq_chip its_vpe_irq_chip = { .name = "GICv4-vpe", .irq_mask = its_vpe_mask_irq, .irq_unmask = its_vpe_unmask_irq, .irq_eoi = irq_chip_eoi_parent, .irq_set_affinity = its_vpe_set_affinity, + .irq_set_irqchip_state = its_vpe_set_irqchip_state, .irq_set_vcpu_affinity = its_vpe_set_vcpu_affinity, }; From d51c4b4da7f8fae8c884e3b89fdab906f66da28a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 27 Jun 2017 21:24:25 +0100 Subject: [PATCH 35/53] irqchip/gic-v3-its: Set implementation defined bit to enable VLPIs A long time ago, GITS_CTLR[1] used to be called GITC_CTLR.EnableVLPI. It has been subsequently deprecated and is now an "Implementation Defined" bit that may ot may not be set for GICv4. Brilliant. And the current crop of the FastModel requires that bit for VLPIs to be enabled. Oh well... Let's set it and find out what breaks. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 7 +++++-- include/linux/irqchip/arm-gic-v3.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 21ee33e81c74..d79bfdbd506b 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2714,7 +2714,7 @@ static int its_force_quiescent(void __iomem *base) return 0; /* Disable the generation of all interrupts to this ITS */ - val &= ~GITS_CTLR_ENABLE; + val &= ~(GITS_CTLR_ENABLE | GITS_CTLR_ImDe); writel_relaxed(val, base + GITS_CTLR); /* Poll GITS_CTLR and wait until ITS becomes quiescent */ @@ -2998,7 +2998,10 @@ static int __init its_probe_one(struct resource *res, gits_write_cwriter(0, its->base + GITS_CWRITER); ctlr = readl_relaxed(its->base + GITS_CTLR); - writel_relaxed(ctlr | GITS_CTLR_ENABLE, its->base + GITS_CTLR); + ctlr |= GITS_CTLR_ENABLE; + if (its->is_v4) + ctlr |= GITS_CTLR_ImDe; + writel_relaxed(ctlr, its->base + GITS_CTLR); err = its_init_domain(handle, its); if (err) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 6bc142cfa616..1ea576c8126f 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -298,6 +298,7 @@ #define GITS_TRANSLATER 0x10040 #define GITS_CTLR_ENABLE (1U << 0) +#define GITS_CTLR_ImDe (1U << 1) #define GITS_CTLR_ITS_NUMBER_SHIFT 4 #define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT) #define GITS_CTLR_QUIESCENT (1U << 31) From 7de5c0af9c7c717f9052e6d75b24f90050e6a56e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:27:52 +0000 Subject: [PATCH 36/53] irqchip/gic-v4: Add per-VM VPE domain creation When creating a VM, it is very convenient to have an irq domain containing all the doorbell interrupts associated with that VM (each interrupt representing a VPE). Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v4.c | 74 ++++++++++++++++++++++++++++++ include/linux/irqchip/arm-gic-v4.h | 3 ++ 2 files changed, 77 insertions(+) create mode 100644 drivers/irqchip/irq-gic-v4.c diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c new file mode 100644 index 000000000000..3d4bedea4e78 --- /dev/null +++ b/drivers/irqchip/irq-gic-v4.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2016,2017 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +#include + +static struct irq_domain *gic_domain; +static const struct irq_domain_ops *vpe_domain_ops; + +int its_alloc_vcpu_irqs(struct its_vm *vm) +{ + int vpe_base_irq, i; + + vm->fwnode = irq_domain_alloc_named_id_fwnode("GICv4-vpe", + task_pid_nr(current)); + if (!vm->fwnode) + goto err; + + vm->domain = irq_domain_create_hierarchy(gic_domain, 0, vm->nr_vpes, + vm->fwnode, vpe_domain_ops, + vm); + if (!vm->domain) + goto err; + + for (i = 0; i < vm->nr_vpes; i++) { + vm->vpes[i]->its_vm = vm; + vm->vpes[i]->idai = true; + } + + vpe_base_irq = __irq_domain_alloc_irqs(vm->domain, -1, vm->nr_vpes, + NUMA_NO_NODE, vm, + false, NULL); + if (vpe_base_irq <= 0) + goto err; + + for (i = 0; i < vm->nr_vpes; i++) + vm->vpes[i]->irq = vpe_base_irq + i; + + return 0; + +err: + if (vm->domain) + irq_domain_remove(vm->domain); + if (vm->fwnode) + irq_domain_free_fwnode(vm->fwnode); + + return -ENOMEM; +} + +void its_free_vcpu_irqs(struct its_vm *vm) +{ + irq_domain_free_irqs(vm->vpes[0]->irq, vm->nr_vpes); + irq_domain_remove(vm->domain); + irq_domain_free_fwnode(vm->fwnode); +} diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index e7a93ad4fe97..3dc811dc53da 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -91,4 +91,7 @@ struct its_cmd_info { }; }; +int its_alloc_vcpu_irqs(struct its_vm *vm); +void its_free_vcpu_irqs(struct its_vm *vm); + #endif From eab84318c2811e3f38c080efcc7f709f51bb8370 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:31:02 +0000 Subject: [PATCH 37/53] irqchip/gic-v4: Add VPE command interface Add the required interfaces to schedule a VPE and perform a VINVALL command. Reviewed-by: Thomas Gleixner Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v4.c | 25 +++++++++++++++++++++++++ include/linux/irqchip/arm-gic-v4.h | 2 ++ 2 files changed, 27 insertions(+) diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 3d4bedea4e78..d3f8b7a13d2c 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -72,3 +72,28 @@ void its_free_vcpu_irqs(struct its_vm *vm) irq_domain_remove(vm->domain); irq_domain_free_fwnode(vm->fwnode); } + +static int its_send_vpe_cmd(struct its_vpe *vpe, struct its_cmd_info *info) +{ + return irq_set_vcpu_affinity(vpe->irq, info); +} + +int its_schedule_vpe(struct its_vpe *vpe, bool on) +{ + struct its_cmd_info info; + + WARN_ON(preemptible()); + + info.cmd_type = on ? SCHEDULE_VPE : DESCHEDULE_VPE; + + return its_send_vpe_cmd(vpe, &info); +} + +int its_invall_vpe(struct its_vpe *vpe) +{ + struct its_cmd_info info = { + .cmd_type = INVALL_VPE, + }; + + return its_send_vpe_cmd(vpe, &info); +} diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 3dc811dc53da..6450f3ed101f 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -93,5 +93,7 @@ struct its_cmd_info { int its_alloc_vcpu_irqs(struct its_vm *vm); void its_free_vcpu_irqs(struct its_vm *vm); +int its_schedule_vpe(struct its_vpe *vpe, bool on); +int its_invall_vpe(struct its_vpe *vpe); #endif From f2eac75de435871d5a497f8b557874a2a8a7b264 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 21 Dec 2016 21:50:32 +0000 Subject: [PATCH 38/53] irqchip/gic-v4: Add VLPI configuration interface Add the required interfaces to map, unmap and update a VLPI. Reviewed-by: Eric Auger Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v4.c | 42 ++++++++++++++++++++++++++++++ include/linux/irqchip/arm-gic-v4.h | 4 +++ 2 files changed, 46 insertions(+) diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index d3f8b7a13d2c..6d33828a84a7 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -97,3 +97,45 @@ int its_invall_vpe(struct its_vpe *vpe) return its_send_vpe_cmd(vpe, &info); } + +int its_map_vlpi(int irq, struct its_vlpi_map *map) +{ + struct its_cmd_info info = { + .cmd_type = MAP_VLPI, + .map = map, + }; + + /* + * The host will never see that interrupt firing again, so it + * is vital that we don't do any lazy masking. + */ + irq_set_status_flags(irq, IRQ_DISABLE_UNLAZY); + + return irq_set_vcpu_affinity(irq, &info); +} + +int its_get_vlpi(int irq, struct its_vlpi_map *map) +{ + struct its_cmd_info info = { + .cmd_type = GET_VLPI, + .map = map, + }; + + return irq_set_vcpu_affinity(irq, &info); +} + +int its_unmap_vlpi(int irq) +{ + irq_clear_status_flags(irq, IRQ_DISABLE_UNLAZY); + return irq_set_vcpu_affinity(irq, NULL); +} + +int its_prop_update_vlpi(int irq, u8 config, bool inv) +{ + struct its_cmd_info info = { + .cmd_type = inv ? PROP_UPDATE_AND_INV_VLPI : PROP_UPDATE_VLPI, + .config = config, + }; + + return irq_set_vcpu_affinity(irq, &info); +} diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 6450f3ed101f..e22f878ad017 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -95,5 +95,9 @@ int its_alloc_vcpu_irqs(struct its_vm *vm); void its_free_vcpu_irqs(struct its_vm *vm); int its_schedule_vpe(struct its_vpe *vpe, bool on); int its_invall_vpe(struct its_vpe *vpe); +int its_map_vlpi(int irq, struct its_vlpi_map *map); +int its_get_vlpi(int irq, struct its_vlpi_map *map); +int its_unmap_vlpi(int irq); +int its_prop_update_vlpi(int irq, u8 config, bool inv); #endif From 7954907bedafd0f8e81633803945cb304793b29d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 21 Dec 2016 17:40:16 +0000 Subject: [PATCH 39/53] irqchip/gic-v4: Add some basic documentation Do a braindump of the way things are supposed to work. Reviewed-by: Thomas Gleixner Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v4.c | 71 ++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 6d33828a84a7..8eb2ad39322e 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -23,6 +23,77 @@ #include +/* + * WARNING: The blurb below assumes that you understand the + * intricacies of GICv3, GICv4, and how a guest's view of a GICv3 gets + * translated into GICv4 commands. So it effectively targets at most + * two individuals. You know who you are. + * + * The core GICv4 code is designed to *avoid* exposing too much of the + * core GIC code (that would in turn leak into the hypervisor code), + * and instead provide a hypervisor agnostic interface to the HW (of + * course, the astute reader will quickly realize that hypervisor + * agnostic actually means KVM-specific - what were you thinking?). + * + * In order to achieve a modicum of isolation, we try to hide most of + * the GICv4 "stuff" behind normal irqchip operations: + * + * - Any guest-visible VLPI is backed by a Linux interrupt (and a + * physical LPI which gets unmapped when the guest maps the + * VLPI). This allows the same DevID/EventID pair to be either + * mapped to the LPI (host) or the VLPI (guest). Note that this is + * exclusive, and you cannot have both. + * + * - Enabling/disabling a VLPI is done by issuing mask/unmask calls. + * + * - Guest INT/CLEAR commands are implemented through + * irq_set_irqchip_state(). + * + * - The *bizarre* stuff (mapping/unmapping an interrupt to a VLPI, or + * issuing an INV after changing a priority) gets shoved into the + * irq_set_vcpu_affinity() method. While this is quite horrible + * (let's face it, this is the irqchip version of an ioctl), it + * confines the crap to a single location. And map/unmap really is + * about setting the affinity of a VLPI to a vcpu, so only INV is + * majorly out of place. So there. + * + * A number of commands are simply not provided by this interface, as + * they do not make direct sense. For example, MAPD is purely local to + * the virtual ITS (because it references a virtual device, and the + * physical ITS is still very much in charge of the physical + * device). Same goes for things like MAPC (the physical ITS deals + * with the actual vPE affinity, and not the braindead concept of + * collection). SYNC is not provided either, as each and every command + * is followed by a VSYNC. This could be relaxed in the future, should + * this be seen as a bottleneck (yes, this means *never*). + * + * But handling VLPIs is only one side of the job of the GICv4 + * code. The other (darker) side is to take care of the doorbell + * interrupts which are delivered when a VLPI targeting a non-running + * vcpu is being made pending. + * + * The choice made here is that each vcpu (VPE in old northern GICv4 + * dialect) gets a single doorbell LPI, no matter how many interrupts + * are targeting it. This has a nice property, which is that the + * interrupt becomes a handle for the VPE, and that the hypervisor + * code can manipulate it through the normal interrupt API: + * + * - VMs (or rather the VM abstraction that matters to the GIC) + * contain an irq domain where each interrupt maps to a VPE. In + * turn, this domain sits on top of the normal LPI allocator, and a + * specially crafted irq_chip implementation. + * + * - mask/unmask do what is expected on the doorbell interrupt. + * + * - irq_set_affinity is used to move a VPE from one redistributor to + * another. + * + * - irq_set_vcpu_affinity once again gets hijacked for the purpose of + * creating a new sub-API, namely scheduling/descheduling a VPE + * (which involves programming GICR_V{PROP,PEND}BASER) and + * performing INVALL operations. + */ + static struct irq_domain *gic_domain; static const struct irq_domain_ops *vpe_domain_ops; From 3d63cb53e221d8ab347e94aeac0b5511857beb7f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:31:54 +0000 Subject: [PATCH 40/53] irqchip/gic-v4: Enable low-level GICv4 operations Get the show on the road... Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/Makefile | 2 +- drivers/irqchip/irq-gic-v3-its.c | 3 ++- drivers/irqchip/irq-gic-v4.c | 13 +++++++++++++ include/linux/irqchip/arm-gic-v4.h | 2 ++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 2c630574986f..845abc107ad5 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -28,7 +28,7 @@ obj-$(CONFIG_ARM_GIC_PM) += irq-gic-pm.o obj-$(CONFIG_ARCH_REALVIEW) += irq-gic-realview.o obj-$(CONFIG_ARM_GIC_V2M) += irq-gic-v2m.o obj-$(CONFIG_ARM_GIC_V3) += irq-gic-v3.o irq-gic-common.o -obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v3-its-pci-msi.o irq-gic-v3-its-platform-msi.o +obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v3-its-pci-msi.o irq-gic-v3-its-platform-msi.o irq-gic-v4.o obj-$(CONFIG_PARTITION_PERCPU) += irq-partition-percpu.o obj-$(CONFIG_HISILICON_IRQ_MBIGEN) += irq-mbigen.o obj-$(CONFIG_ARM_NVIC) += irq-nvic.o diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d79bfdbd506b..a93816cb2a98 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3226,7 +3226,8 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, has_v4 |= its->is_v4; if (has_v4 & rdists->has_vlpis) { - if (its_init_vpe_domain()) { + if (its_init_vpe_domain() || + its_init_v4(parent_domain, &its_vpe_domain_ops)) { rdists->has_vlpis = false; pr_err("ITS: Disabling GICv4 support\n"); } diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 8eb2ad39322e..2370e6d9e603 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -210,3 +210,16 @@ int its_prop_update_vlpi(int irq, u8 config, bool inv) return irq_set_vcpu_affinity(irq, &info); } + +int its_init_v4(struct irq_domain *domain, const struct irq_domain_ops *ops) +{ + if (domain) { + pr_info("ITS: Enabling GICv4 support\n"); + gic_domain = domain; + vpe_domain_ops = ops; + return 0; + } + + pr_err("ITS: No GICv4 VPE domain allocated\n"); + return -ENODEV; +} diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index e22f878ad017..58a4d89aa82c 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -100,4 +100,6 @@ int its_get_vlpi(int irq, struct its_vlpi_map *map); int its_unmap_vlpi(int irq); int its_prop_update_vlpi(int irq, u8 config, bool inv); +int its_init_v4(struct irq_domain *domain, const struct irq_domain_ops *ops); + #endif From 4bdf502517288662d883fbaa915874790f51a2cd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 25 Jun 2017 14:10:46 +0100 Subject: [PATCH 41/53] irqchip/gic-v3: Advertise GICv4 support to KVM As KVM needs to know about the availability of GICv4 to enable direct injection of interrupts, let's advertise the feature in the gic_kvm_info structure. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3.c | 2 ++ include/linux/irqchip/arm-gic-common.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 65fabd5f2ec6..cc968eae6c36 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1159,6 +1159,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) if (!ret) gic_v3_kvm_info.vcpu = r; + gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; gic_set_kvm_info(&gic_v3_kvm_info); } @@ -1452,6 +1453,7 @@ static void __init gic_acpi_setup_kvm_info(void) vcpu->end = vcpu->start + ACPI_GICV2_VCPU_MEM_SIZE - 1; } + gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; gic_set_kvm_info(&gic_v3_kvm_info); } diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h index c647b0547bcd..0a83b4379f34 100644 --- a/include/linux/irqchip/arm-gic-common.h +++ b/include/linux/irqchip/arm-gic-common.h @@ -27,6 +27,8 @@ struct gic_kvm_info { unsigned int maint_irq; /* Virtual control interface */ struct resource vctrl; + /* vlpi support */ + bool has_v4; }; const struct gic_kvm_info *gic_get_kvm_info(void); From 73c4c37c7e6ecc3cf921d182b733c9322ebed117 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Sat, 19 Aug 2017 16:22:37 +0530 Subject: [PATCH 42/53] irqchip/gic: Make irq_chip const Make this const as it is only used in a copy operation. Done using Coccinelle. Signed-off-by: Bhumika Goyal Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 1b1df4f770bd..6a089f3c4ace 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -410,7 +410,7 @@ static void gic_handle_cascade_irq(struct irq_desc *desc) chained_irq_exit(chip, desc); } -static struct irq_chip gic_chip = { +static const struct irq_chip gic_chip = { .irq_mask = gic_mask_irq, .irq_unmask = gic_unmask_irq, .irq_eoi = gic_eoi_irq, From c8c7d93dcbf21ebcf7486473e7a9a6addf49caa9 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Thu, 24 Aug 2017 15:56:21 +0530 Subject: [PATCH 43/53] irqchip/mmp: Make mmp_intc_conf const Make these const as they are only used during a copy operation. Done using Coccinelle. @match disable optional_qualifier@ identifier s; @@ static struct mmp_intc_conf s = {...}; @ref@ position p; identifier match.s; @@ s@p @good1@ position ref.p; identifier match.s,f,c; expression e; @@ ( e = s@p | e = s@p.f | c(...,s@p.f,...) | c(...,s@p,...) ) @bad depends on !good1@ position ref.p; identifier match.s; @@ s@p @depends on forall !bad disable optional_qualifier@ identifier match.s; @@ static + const struct mmp_intc_conf s; Signed-off-by: Bhumika Goyal Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-mmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 013fc9659a84..25f32e1d7764 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -181,13 +181,13 @@ const struct irq_domain_ops mmp_irq_domain_ops = { .xlate = mmp_irq_domain_xlate, }; -static struct mmp_intc_conf mmp_conf = { +static const struct mmp_intc_conf mmp_conf = { .conf_enable = 0x51, .conf_disable = 0x0, .conf_mask = 0x7f, }; -static struct mmp_intc_conf mmp2_conf = { +static const struct mmp_intc_conf mmp2_conf = { .conf_enable = 0x20, .conf_disable = 0x0, .conf_mask = 0x7f, From b304605f0a2c25c028e26a9a3f6aa798693a4051 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 30 Aug 2017 17:29:16 -0700 Subject: [PATCH 44/53] irqchip/irq-bcm7120-l2: Use correct I/O accessors for irq_fwd_mask Initialization of irq_fwd_mask was done using __raw_writel() which happens to work for all cases except when using ARM BE8 which requires writel() (with the proper swapping). Move the initialization of the irq_fwd_mask till later when we have correctly defined our I/O accessors. Signed-off-by: Florian Fainelli Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-bcm7120-l2.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c index 64c2692070ef..983640eba418 100644 --- a/drivers/irqchip/irq-bcm7120-l2.c +++ b/drivers/irqchip/irq-bcm7120-l2.c @@ -250,12 +250,6 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn, if (ret < 0) goto out_free_l1_data; - for (idx = 0; idx < data->n_words; idx++) { - __raw_writel(data->irq_fwd_mask[idx], - data->pair_base[idx] + - data->en_offset[idx]); - } - for (irq = 0; irq < data->num_parent_irqs; irq++) { ret = bcm7120_l2_intc_init_one(dn, data, irq, valid_mask); if (ret) @@ -297,6 +291,10 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn, gc->reg_base = data->pair_base[idx]; ct->regs.mask = data->en_offset[idx]; + /* gc->reg_base is defined and so is gc->writel */ + irq_reg_writel(gc, data->irq_fwd_mask[idx], + data->en_offset[idx]); + ct->chip.irq_mask = irq_gc_mask_clr_bit; ct->chip.irq_unmask = irq_gc_mask_set_bit; ct->chip.irq_ack = irq_gc_noop; From d35b2c99520bbc018d645ddcbc0332f36a985a56 Mon Sep 17 00:00:00 2001 From: Minghuan Lian Date: Wed, 5 Jul 2017 14:58:55 +0800 Subject: [PATCH 45/53] irqchip/ls-scfg-msi: Fix typo of MSI compatible strings The patch is to fix typo of the Layerscape SCFG MSI dts compatible strings. "1" is replaced by "l". Acked-by: Rob Herring Signed-off-by: Minghuan Lian Signed-off-by: Hou Zhiqiang Signed-off-by: Marc Zyngier --- .../bindings/interrupt-controller/fsl,ls-scfg-msi.txt | 6 +++--- drivers/irqchip/irq-ls-scfg-msi.c | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt b/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt index 9e389493203f..2755cd1ce611 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt @@ -4,8 +4,8 @@ Required properties: - compatible: should be "fsl,-msi" to identify Layerscape PCIe MSI controller block such as: - "fsl,1s1021a-msi" - "fsl,1s1043a-msi" + "fsl,ls1021a-msi" + "fsl,ls1043a-msi" - msi-controller: indicates that this is a PCIe MSI controller node - reg: physical base address of the controller and length of memory mapped. - interrupts: an interrupt to the parent interrupt controller. @@ -23,7 +23,7 @@ MSI controller node Examples: msi1: msi-controller@1571000 { - compatible = "fsl,1s1043a-msi"; + compatible = "fsl,ls1043a-msi"; reg = <0x0 0x1571000 0x0 0x8>, msi-controller; interrupts = <0 116 0x4>; diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c index 02cca74cab94..cef67cc5c0f2 100644 --- a/drivers/irqchip/irq-ls-scfg-msi.c +++ b/drivers/irqchip/irq-ls-scfg-msi.c @@ -219,8 +219,10 @@ static int ls_scfg_msi_remove(struct platform_device *pdev) } static const struct of_device_id ls_scfg_msi_id[] = { - { .compatible = "fsl,1s1021a-msi", }, - { .compatible = "fsl,1s1043a-msi", }, + { .compatible = "fsl,1s1021a-msi", }, /* a typo */ + { .compatible = "fsl,1s1043a-msi", }, /* a typo */ + { .compatible = "fsl,ls1021a-msi", }, + { .compatible = "fsl,ls1043a-msi", }, {}, }; From c9041ea324b0f2bec624c385c4a8bbd21d453e9b Mon Sep 17 00:00:00 2001 From: Minghuan Lian Date: Wed, 5 Jul 2017 14:58:56 +0800 Subject: [PATCH 46/53] arm: dts: ls1021a: Fix typo of MSI compatible string "1" should be replaced by "l". This is a typo. The patch is to fix it. Signed-off-by: Minghuan Lian Signed-off-by: Hou Zhiqiang Signed-off-by: Marc Zyngier --- arch/arm/boot/dts/ls1021a.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 7bb9df2c1460..51b0f69cf7d3 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -129,14 +129,14 @@ gic: interrupt-controller@1400000 { }; msi1: msi-controller@1570e00 { - compatible = "fsl,1s1021a-msi"; + compatible = "fsl,ls1021a-msi"; reg = <0x0 0x1570e00 0x0 0x8>; msi-controller; interrupts = ; }; msi2: msi-controller@1570e08 { - compatible = "fsl,1s1021a-msi"; + compatible = "fsl,ls1021a-msi"; reg = <0x0 0x1570e08 0x0 0x8>; msi-controller; interrupts = ; From f93aff6263f3de4428daf0ea8872f6c0121438ec Mon Sep 17 00:00:00 2001 From: Minghuan Lian Date: Wed, 5 Jul 2017 14:58:57 +0800 Subject: [PATCH 47/53] arm64: dts: ls1043a: Fix typo of MSI compatible string "1" should be replaced by "l". This is a typo. The patch is to fix it. Signed-off-by: Minghuan Lian Signed-off-by: Hou Zhiqiang Signed-off-by: Marc Zyngier --- arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index 31fd77f82ced..7ff98f40624b 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -653,21 +653,21 @@ sata: sata@3200000 { }; msi1: msi-controller1@1571000 { - compatible = "fsl,1s1043a-msi"; + compatible = "fsl,ls1043a-msi"; reg = <0x0 0x1571000 0x0 0x8>; msi-controller; interrupts = <0 116 0x4>; }; msi2: msi-controller2@1572000 { - compatible = "fsl,1s1043a-msi"; + compatible = "fsl,ls1043a-msi"; reg = <0x0 0x1572000 0x0 0x8>; msi-controller; interrupts = <0 126 0x4>; }; msi3: msi-controller3@1573000 { - compatible = "fsl,1s1043a-msi"; + compatible = "fsl,ls1043a-msi"; reg = <0x0 0x1573000 0x0 0x8>; msi-controller; interrupts = <0 160 0x4>; From df3015888d88479ea5da914330c87af58987886b Mon Sep 17 00:00:00 2001 From: Minghuan Lian Date: Wed, 5 Jul 2017 14:58:58 +0800 Subject: [PATCH 48/53] arm: dts: ls1021a: Share all MSIs In order to maximize the use of MSI, a PCIe controller will share all MSI controllers. The patch changes msi-parent to refer to all MSI controller dts nodes. Signed-off-by: Minghuan Lian Signed-off-by: Hou Zhiqiang Signed-off-by: Marc Zyngier --- arch/arm/boot/dts/ls1021a.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 51b0f69cf7d3..9319e1f0f1d8 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -699,7 +699,7 @@ pcie@3400000 { bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000 /* downstream I/O */ 0x82000000 0x0 0x40000000 0x40 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */ - msi-parent = <&msi1>; + msi-parent = <&msi1>, <&msi2>; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; interrupt-map = <0000 0 0 1 &gic GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>, @@ -722,7 +722,7 @@ pcie@3500000 { bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000 /* downstream I/O */ 0x82000000 0x0 0x40000000 0x48 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */ - msi-parent = <&msi2>; + msi-parent = <&msi1>, <&msi2>; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; interrupt-map = <0000 0 0 1 &gic GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>, From 0b09331a25fe279e7b93aa58dda35e58fd23eaa6 Mon Sep 17 00:00:00 2001 From: Minghuan Lian Date: Wed, 5 Jul 2017 14:58:59 +0800 Subject: [PATCH 49/53] arm64: dts: ls1043a: Share all MSIs In order to maximize the use of MSI, a PCIe controller will share all MSI controllers. The patch changes "msi-parent" to refer to all MSI controller dts nodes. Signed-off-by: Minghuan Lian Signed-off-by: Hou Zhiqiang Signed-off-by: Marc Zyngier --- arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index 7ff98f40624b..d16b9cc1e825 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -689,7 +689,7 @@ pcie@3400000 { bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000 /* downstream I/O */ 0x82000000 0x0 0x40000000 0x40 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */ - msi-parent = <&msi1>; + msi-parent = <&msi1>, <&msi2>, <&msi3>; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; interrupt-map = <0000 0 0 1 &gic 0 110 0x4>, @@ -714,7 +714,7 @@ pcie@3500000 { bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000 /* downstream I/O */ 0x82000000 0x0 0x40000000 0x48 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */ - msi-parent = <&msi2>; + msi-parent = <&msi1>, <&msi2>, <&msi3>; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; interrupt-map = <0000 0 0 1 &gic 0 120 0x4>, @@ -739,7 +739,7 @@ pcie@3600000 { bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x00000000 0x50 0x00010000 0x0 0x00010000 /* downstream I/O */ 0x82000000 0x0 0x40000000 0x50 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */ - msi-parent = <&msi3>; + msi-parent = <&msi1>, <&msi2>, <&msi3>; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; interrupt-map = <0000 0 0 1 &gic 0 154 0x4>, From cb3421684ee778d60da26232bfea626dca2eb8db Mon Sep 17 00:00:00 2001 From: Minghuan Lian Date: Wed, 5 Jul 2017 14:59:00 +0800 Subject: [PATCH 50/53] arm64: dts: ls1046a: Add MSI dts node LS1046a includes 3 MSI controllers. Each controller supports 128 interrupts. Acked-by: Rob Herring Signed-off-by: Minghuan Lian Signed-off-by: Hou Zhiqiang Signed-off-by: Marc Zyngier --- .../interrupt-controller/fsl,ls-scfg-msi.txt | 1 + .../arm64/boot/dts/freescale/fsl-ls1046a.dtsi | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt b/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt index 2755cd1ce611..dde455289c16 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt @@ -6,6 +6,7 @@ Required properties: Layerscape PCIe MSI controller block such as: "fsl,ls1021a-msi" "fsl,ls1043a-msi" + "fsl,ls1046a-msi" - msi-controller: indicates that this is a PCIe MSI controller node - reg: physical base address of the controller and length of memory mapped. - interrupts: an interrupt to the parent interrupt controller. diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index dc1640be0345..c8ff0baddf1d 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -630,6 +630,37 @@ sata: sata@3200000 { interrupts = ; clocks = <&clockgen 4 1>; }; + + msi1: msi-controller@1580000 { + compatible = "fsl,ls1046a-msi"; + msi-controller; + reg = <0x0 0x1580000 0x0 0x10000>; + interrupts = , + , + , + ; + }; + + msi2: msi-controller@1590000 { + compatible = "fsl,ls1046a-msi"; + msi-controller; + reg = <0x0 0x1590000 0x0 0x10000>; + interrupts = , + , + , + ; + }; + + msi3: msi-controller@15a0000 { + compatible = "fsl,ls1046a-msi"; + msi-controller; + reg = <0x0 0x15a0000 0x0 0x10000>; + interrupts = , + , + , + ; + }; + }; reserved-memory { From 4dd5da65a39d9a0405304fdef0804afffece044b Mon Sep 17 00:00:00 2001 From: Minghuan Lian Date: Wed, 5 Jul 2017 14:59:01 +0800 Subject: [PATCH 51/53] irqchip/ls-scfg-msi: Add LS1046a MSI support LS1046a includes 4 MSIRs, each MSIR is assigned a dedicate GIC SPI interrupt and provides 32 MSI interrupts. Compared to previous MSI, LS1046a's IBS(interrupt bit select) shift is changed to 2 and total MSI interrupt number is changed to 128. The patch adds structure 'ls_scfg_msir' to describe MSIR setting and 'ibs_shift' to store the different value between the SoCs. Signed-off-by: Minghuan Lian Signed-off-by: Hou Zhiqiang Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-ls-scfg-msi.c | 165 +++++++++++++++++++++++------- 1 file changed, 130 insertions(+), 35 deletions(-) diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c index cef67cc5c0f2..0b1f34ddab00 100644 --- a/drivers/irqchip/irq-ls-scfg-msi.c +++ b/drivers/irqchip/irq-ls-scfg-msi.c @@ -17,13 +17,24 @@ #include #include #include +#include #include #include #include -#define MSI_MAX_IRQS 32 -#define MSI_IBS_SHIFT 3 -#define MSIR 4 +#define MSI_IRQS_PER_MSIR 32 +#define MSI_MSIR_OFFSET 4 + +struct ls_scfg_msi_cfg { + u32 ibs_shift; /* Shift of interrupt bit select */ +}; + +struct ls_scfg_msir { + struct ls_scfg_msi *msi_data; + unsigned int index; + unsigned int gic_irq; + void __iomem *reg; +}; struct ls_scfg_msi { spinlock_t lock; @@ -32,8 +43,11 @@ struct ls_scfg_msi { struct irq_domain *msi_domain; void __iomem *regs; phys_addr_t msiir_addr; - int irq; - DECLARE_BITMAP(used, MSI_MAX_IRQS); + struct ls_scfg_msi_cfg *cfg; + u32 msir_num; + struct ls_scfg_msir *msir; + u32 irqs_num; + unsigned long *used; }; static struct irq_chip ls_scfg_msi_irq_chip = { @@ -55,7 +69,7 @@ static void ls_scfg_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) msg->address_hi = upper_32_bits(msi_data->msiir_addr); msg->address_lo = lower_32_bits(msi_data->msiir_addr); - msg->data = data->hwirq << MSI_IBS_SHIFT; + msg->data = data->hwirq; } static int ls_scfg_msi_set_affinity(struct irq_data *irq_data, @@ -81,8 +95,8 @@ static int ls_scfg_msi_domain_irq_alloc(struct irq_domain *domain, WARN_ON(nr_irqs != 1); spin_lock(&msi_data->lock); - pos = find_first_zero_bit(msi_data->used, MSI_MAX_IRQS); - if (pos < MSI_MAX_IRQS) + pos = find_first_zero_bit(msi_data->used, msi_data->irqs_num); + if (pos < msi_data->irqs_num) __set_bit(pos, msi_data->used); else err = -ENOSPC; @@ -106,7 +120,7 @@ static void ls_scfg_msi_domain_irq_free(struct irq_domain *domain, int pos; pos = d->hwirq; - if (pos < 0 || pos >= MSI_MAX_IRQS) { + if (pos < 0 || pos >= msi_data->irqs_num) { pr_err("failed to teardown msi. Invalid hwirq %d\n", pos); return; } @@ -123,15 +137,17 @@ static const struct irq_domain_ops ls_scfg_msi_domain_ops = { static void ls_scfg_msi_irq_handler(struct irq_desc *desc) { - struct ls_scfg_msi *msi_data = irq_desc_get_handler_data(desc); + struct ls_scfg_msir *msir = irq_desc_get_handler_data(desc); + struct ls_scfg_msi *msi_data = msir->msi_data; unsigned long val; - int pos, virq; + int pos, virq, hwirq; chained_irq_enter(irq_desc_get_chip(desc), desc); - val = ioread32be(msi_data->regs + MSIR); - for_each_set_bit(pos, &val, MSI_MAX_IRQS) { - virq = irq_find_mapping(msi_data->parent, (31 - pos)); + val = ioread32be(msir->reg); + for_each_set_bit(pos, &val, MSI_IRQS_PER_MSIR) { + hwirq = ((31 - pos) << msi_data->cfg->ibs_shift) | msir->index; + virq = irq_find_mapping(msi_data->parent, hwirq); if (virq) generic_handle_irq(virq); } @@ -143,7 +159,7 @@ static int ls_scfg_msi_domains_init(struct ls_scfg_msi *msi_data) { /* Initialize MSI domain parent */ msi_data->parent = irq_domain_add_linear(NULL, - MSI_MAX_IRQS, + msi_data->irqs_num, &ls_scfg_msi_domain_ops, msi_data); if (!msi_data->parent) { @@ -164,16 +180,87 @@ static int ls_scfg_msi_domains_init(struct ls_scfg_msi *msi_data) return 0; } +static int ls_scfg_msi_setup_hwirq(struct ls_scfg_msi *msi_data, int index) +{ + struct ls_scfg_msir *msir; + int virq, i, hwirq; + + virq = platform_get_irq(msi_data->pdev, index); + if (virq <= 0) + return -ENODEV; + + msir = &msi_data->msir[index]; + msir->index = index; + msir->msi_data = msi_data; + msir->gic_irq = virq; + msir->reg = msi_data->regs + MSI_MSIR_OFFSET + 4 * index; + + irq_set_chained_handler_and_data(msir->gic_irq, + ls_scfg_msi_irq_handler, + msir); + + /* Release the hwirqs corresponding to this MSIR */ + for (i = 0; i < MSI_IRQS_PER_MSIR; i++) { + hwirq = i << msi_data->cfg->ibs_shift | msir->index; + bitmap_clear(msi_data->used, hwirq, 1); + } + + return 0; +} + +static int ls_scfg_msi_teardown_hwirq(struct ls_scfg_msir *msir) +{ + struct ls_scfg_msi *msi_data = msir->msi_data; + int i, hwirq; + + if (msir->gic_irq > 0) + irq_set_chained_handler_and_data(msir->gic_irq, NULL, NULL); + + for (i = 0; i < MSI_IRQS_PER_MSIR; i++) { + hwirq = i << msi_data->cfg->ibs_shift | msir->index; + bitmap_set(msi_data->used, hwirq, 1); + } + + return 0; +} + +static struct ls_scfg_msi_cfg ls1021_msi_cfg = { + .ibs_shift = 3, +}; + +static struct ls_scfg_msi_cfg ls1046_msi_cfg = { + .ibs_shift = 2, +}; + +static const struct of_device_id ls_scfg_msi_id[] = { + /* The following two misspelled compatibles are obsolete */ + { .compatible = "fsl,1s1021a-msi", .data = &ls1021_msi_cfg}, + { .compatible = "fsl,1s1043a-msi", .data = &ls1021_msi_cfg}, + + { .compatible = "fsl,ls1021a-msi", .data = &ls1021_msi_cfg }, + { .compatible = "fsl,ls1043a-msi", .data = &ls1021_msi_cfg }, + { .compatible = "fsl,ls1046a-msi", .data = &ls1046_msi_cfg }, + {}, +}; +MODULE_DEVICE_TABLE(of, ls_scfg_msi_id); + static int ls_scfg_msi_probe(struct platform_device *pdev) { + const struct of_device_id *match; struct ls_scfg_msi *msi_data; struct resource *res; - int ret; + int i, ret; + + match = of_match_device(ls_scfg_msi_id, &pdev->dev); + if (!match) + return -ENODEV; msi_data = devm_kzalloc(&pdev->dev, sizeof(*msi_data), GFP_KERNEL); if (!msi_data) return -ENOMEM; + msi_data->cfg = (struct ls_scfg_msi_cfg *) match->data; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); msi_data->regs = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(msi_data->regs)) { @@ -182,23 +269,37 @@ static int ls_scfg_msi_probe(struct platform_device *pdev) } msi_data->msiir_addr = res->start; - msi_data->irq = platform_get_irq(pdev, 0); - if (msi_data->irq <= 0) { - dev_err(&pdev->dev, "failed to get MSI irq\n"); - return -ENODEV; - } - msi_data->pdev = pdev; spin_lock_init(&msi_data->lock); + msi_data->irqs_num = MSI_IRQS_PER_MSIR * + (1 << msi_data->cfg->ibs_shift); + msi_data->used = devm_kcalloc(&pdev->dev, + BITS_TO_LONGS(msi_data->irqs_num), + sizeof(*msi_data->used), + GFP_KERNEL); + if (!msi_data->used) + return -ENOMEM; + /* + * Reserve all the hwirqs + * The available hwirqs will be released in ls1_msi_setup_hwirq() + */ + bitmap_set(msi_data->used, 0, msi_data->irqs_num); + + msi_data->msir_num = of_irq_count(pdev->dev.of_node); + msi_data->msir = devm_kcalloc(&pdev->dev, msi_data->msir_num, + sizeof(*msi_data->msir), + GFP_KERNEL); + if (!msi_data->msir) + return -ENOMEM; + + for (i = 0; i < msi_data->msir_num; i++) + ls_scfg_msi_setup_hwirq(msi_data, i); + ret = ls_scfg_msi_domains_init(msi_data); if (ret) return ret; - irq_set_chained_handler_and_data(msi_data->irq, - ls_scfg_msi_irq_handler, - msi_data); - platform_set_drvdata(pdev, msi_data); return 0; @@ -207,8 +308,10 @@ static int ls_scfg_msi_probe(struct platform_device *pdev) static int ls_scfg_msi_remove(struct platform_device *pdev) { struct ls_scfg_msi *msi_data = platform_get_drvdata(pdev); + int i; - irq_set_chained_handler_and_data(msi_data->irq, NULL, NULL); + for (i = 0; i < msi_data->msir_num; i++) + ls_scfg_msi_teardown_hwirq(&msi_data->msir[i]); irq_domain_remove(msi_data->msi_domain); irq_domain_remove(msi_data->parent); @@ -218,14 +321,6 @@ static int ls_scfg_msi_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id ls_scfg_msi_id[] = { - { .compatible = "fsl,1s1021a-msi", }, /* a typo */ - { .compatible = "fsl,1s1043a-msi", }, /* a typo */ - { .compatible = "fsl,ls1021a-msi", }, - { .compatible = "fsl,ls1043a-msi", }, - {}, -}; - static struct platform_driver ls_scfg_msi_driver = { .driver = { .name = "ls-scfg-msi", From fd100dab63ef634e1e0e8b5d9d6d4ba7df9be93f Mon Sep 17 00:00:00 2001 From: Minghuan Lian Date: Wed, 5 Jul 2017 14:59:02 +0800 Subject: [PATCH 52/53] irqchip/ls-scfg-msi: Add LS1043a v1.1 MSI support A MSI controller of LS1043a v1.0 only includes one MSIR and is assigned one GIC interrupt. In order to support affinity, LS1043a v1.1 MSI is assigned 4 MSIRs and 4 GIC interrupts. But the MSIR has the different offset and only supports 8 MSIs. The bits between variable bit_start and bit_end in structure ls_scfg_msir are used to show 8 MSI interrupts. msir_irqs and msir_base are added to describe the difference of MSI between LS1043a v1.1 and other SoCs. Acked-by: Rob Herring Signed-off-by: Minghuan Lian Signed-off-by: Hou Zhiqiang Signed-off-by: Marc Zyngier --- .../interrupt-controller/fsl,ls-scfg-msi.txt | 1 + drivers/irqchip/irq-ls-scfg-msi.c | 45 ++++++++++++++++--- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt b/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt index dde455289c16..49ccabbfa6f3 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt @@ -7,6 +7,7 @@ Required properties: "fsl,ls1021a-msi" "fsl,ls1043a-msi" "fsl,ls1046a-msi" + "fsl,ls1043a-v1.1-msi" - msi-controller: indicates that this is a PCIe MSI controller node - reg: physical base address of the controller and length of memory mapped. - interrupts: an interrupt to the parent interrupt controller. diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c index 0b1f34ddab00..8d57a599c0fb 100644 --- a/drivers/irqchip/irq-ls-scfg-msi.c +++ b/drivers/irqchip/irq-ls-scfg-msi.c @@ -25,14 +25,21 @@ #define MSI_IRQS_PER_MSIR 32 #define MSI_MSIR_OFFSET 4 +#define MSI_LS1043V1_1_IRQS_PER_MSIR 8 +#define MSI_LS1043V1_1_MSIR_OFFSET 0x10 + struct ls_scfg_msi_cfg { u32 ibs_shift; /* Shift of interrupt bit select */ + u32 msir_irqs; /* The irq number per MSIR */ + u32 msir_base; /* The base address of MSIR */ }; struct ls_scfg_msir { struct ls_scfg_msi *msi_data; unsigned int index; unsigned int gic_irq; + unsigned int bit_start; + unsigned int bit_end; void __iomem *reg; }; @@ -140,13 +147,18 @@ static void ls_scfg_msi_irq_handler(struct irq_desc *desc) struct ls_scfg_msir *msir = irq_desc_get_handler_data(desc); struct ls_scfg_msi *msi_data = msir->msi_data; unsigned long val; - int pos, virq, hwirq; + int pos, size, virq, hwirq; chained_irq_enter(irq_desc_get_chip(desc), desc); val = ioread32be(msir->reg); - for_each_set_bit(pos, &val, MSI_IRQS_PER_MSIR) { - hwirq = ((31 - pos) << msi_data->cfg->ibs_shift) | msir->index; + + pos = msir->bit_start; + size = msir->bit_end + 1; + + for_each_set_bit_from(pos, &val, size) { + hwirq = ((msir->bit_end - pos) << msi_data->cfg->ibs_shift) | + msir->index; virq = irq_find_mapping(msi_data->parent, hwirq); if (virq) generic_handle_irq(virq); @@ -193,14 +205,24 @@ static int ls_scfg_msi_setup_hwirq(struct ls_scfg_msi *msi_data, int index) msir->index = index; msir->msi_data = msi_data; msir->gic_irq = virq; - msir->reg = msi_data->regs + MSI_MSIR_OFFSET + 4 * index; + msir->reg = msi_data->regs + msi_data->cfg->msir_base + 4 * index; + + if (msi_data->cfg->msir_irqs == MSI_LS1043V1_1_IRQS_PER_MSIR) { + msir->bit_start = 32 - ((msir->index + 1) * + MSI_LS1043V1_1_IRQS_PER_MSIR); + msir->bit_end = msir->bit_start + + MSI_LS1043V1_1_IRQS_PER_MSIR - 1; + } else { + msir->bit_start = 0; + msir->bit_end = msi_data->cfg->msir_irqs - 1; + } irq_set_chained_handler_and_data(msir->gic_irq, ls_scfg_msi_irq_handler, msir); /* Release the hwirqs corresponding to this MSIR */ - for (i = 0; i < MSI_IRQS_PER_MSIR; i++) { + for (i = 0; i < msi_data->cfg->msir_irqs; i++) { hwirq = i << msi_data->cfg->ibs_shift | msir->index; bitmap_clear(msi_data->used, hwirq, 1); } @@ -216,7 +238,7 @@ static int ls_scfg_msi_teardown_hwirq(struct ls_scfg_msir *msir) if (msir->gic_irq > 0) irq_set_chained_handler_and_data(msir->gic_irq, NULL, NULL); - for (i = 0; i < MSI_IRQS_PER_MSIR; i++) { + for (i = 0; i < msi_data->cfg->msir_irqs; i++) { hwirq = i << msi_data->cfg->ibs_shift | msir->index; bitmap_set(msi_data->used, hwirq, 1); } @@ -226,10 +248,20 @@ static int ls_scfg_msi_teardown_hwirq(struct ls_scfg_msir *msir) static struct ls_scfg_msi_cfg ls1021_msi_cfg = { .ibs_shift = 3, + .msir_irqs = MSI_IRQS_PER_MSIR, + .msir_base = MSI_MSIR_OFFSET, }; static struct ls_scfg_msi_cfg ls1046_msi_cfg = { .ibs_shift = 2, + .msir_irqs = MSI_IRQS_PER_MSIR, + .msir_base = MSI_MSIR_OFFSET, +}; + +static struct ls_scfg_msi_cfg ls1043_v1_1_msi_cfg = { + .ibs_shift = 2, + .msir_irqs = MSI_LS1043V1_1_IRQS_PER_MSIR, + .msir_base = MSI_LS1043V1_1_MSIR_OFFSET, }; static const struct of_device_id ls_scfg_msi_id[] = { @@ -239,6 +271,7 @@ static const struct of_device_id ls_scfg_msi_id[] = { { .compatible = "fsl,ls1021a-msi", .data = &ls1021_msi_cfg }, { .compatible = "fsl,ls1043a-msi", .data = &ls1021_msi_cfg }, + { .compatible = "fsl,ls1043a-v1.1-msi", .data = &ls1043_v1_1_msi_cfg }, { .compatible = "fsl,ls1046a-msi", .data = &ls1046_msi_cfg }, {}, }; From ae3efabfadea92a7300f57792ebeb24b5d18469f Mon Sep 17 00:00:00 2001 From: Minghuan Lian Date: Wed, 5 Jul 2017 14:59:03 +0800 Subject: [PATCH 53/53] irqchip/ls-scfg-msi: Add MSI affinity support For LS1046a and LS1043a v1.1, the MSI controller has 4 MSIRs and 4 GIC SPI interrupts which can be associated with different Core. So we can support affinity to improve the performance. The MSI message data is a byte for Layerscape MSI. 7 6 5 4 3 2 1 0 | - | IBS | SRS | SRS bit0-1 is to select a MSIR which is associated with a CPU. IBS bit2-6 of ls1046, bit2-4 of ls1043a v1.1 is to select bit of the MSIR. With affinity, only bits of MSIR0(srs=0 cpu0) are available. All other bits of the MSIR1-3(cpu1-3) are reserved. The MSI hwirq always equals bit index of the MSIR0. When changing affinity, MSI message data will be appended corresponding SRS then MSI will be moved to the corresponding core. But in affinity mode, there is only 8 MSI interrupts for a controller of LS1043a v1.1. It cannot meet the requirement of the some PCIe devices such as 4 ports Ethernet card. In contrast, without affinity, all MSIRs can be used for core 0, the MSI interrupts can up to 32. So the parameter is added to control affinity mode. "lsmsi=no-affinity" will disable affinity and increase MSI interrupt number. Signed-off-by: Minghuan Lian Signed-off-by: Hou Zhiqiang Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-ls-scfg-msi.c | 68 ++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c index 8d57a599c0fb..119f4ef0d421 100644 --- a/drivers/irqchip/irq-ls-scfg-msi.c +++ b/drivers/irqchip/irq-ls-scfg-msi.c @@ -40,6 +40,7 @@ struct ls_scfg_msir { unsigned int gic_irq; unsigned int bit_start; unsigned int bit_end; + unsigned int srs; /* Shared interrupt register select */ void __iomem *reg; }; @@ -70,6 +71,19 @@ static struct msi_domain_info ls_scfg_msi_domain_info = { .chip = &ls_scfg_msi_irq_chip, }; +static int msi_affinity_flag = 1; + +static int __init early_parse_ls_scfg_msi(char *p) +{ + if (p && strncmp(p, "no-affinity", 11) == 0) + msi_affinity_flag = 0; + else + msi_affinity_flag = 1; + + return 0; +} +early_param("lsmsi", early_parse_ls_scfg_msi); + static void ls_scfg_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) { struct ls_scfg_msi *msi_data = irq_data_get_irq_chip_data(data); @@ -77,12 +91,36 @@ static void ls_scfg_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) msg->address_hi = upper_32_bits(msi_data->msiir_addr); msg->address_lo = lower_32_bits(msi_data->msiir_addr); msg->data = data->hwirq; + + if (msi_affinity_flag) + msg->data |= cpumask_first(data->common->affinity); } static int ls_scfg_msi_set_affinity(struct irq_data *irq_data, const struct cpumask *mask, bool force) { - return -EINVAL; + struct ls_scfg_msi *msi_data = irq_data_get_irq_chip_data(irq_data); + u32 cpu; + + if (!msi_affinity_flag) + return -EINVAL; + + if (!force) + cpu = cpumask_any_and(mask, cpu_online_mask); + else + cpu = cpumask_first(mask); + + if (cpu >= msi_data->msir_num) + return -EINVAL; + + if (msi_data->msir[cpu].gic_irq <= 0) { + pr_warn("cannot bind the irq to cpu%d\n", cpu); + return -EINVAL; + } + + cpumask_copy(irq_data->common->affinity, mask); + + return IRQ_SET_MASK_OK; } static struct irq_chip ls_scfg_msi_parent_chip = { @@ -158,7 +196,7 @@ static void ls_scfg_msi_irq_handler(struct irq_desc *desc) for_each_set_bit_from(pos, &val, size) { hwirq = ((msir->bit_end - pos) << msi_data->cfg->ibs_shift) | - msir->index; + msir->srs; virq = irq_find_mapping(msi_data->parent, hwirq); if (virq) generic_handle_irq(virq); @@ -221,10 +259,19 @@ static int ls_scfg_msi_setup_hwirq(struct ls_scfg_msi *msi_data, int index) ls_scfg_msi_irq_handler, msir); + if (msi_affinity_flag) { + /* Associate MSIR interrupt to the cpu */ + irq_set_affinity(msir->gic_irq, get_cpu_mask(index)); + msir->srs = 0; /* This value is determined by the CPU */ + } else + msir->srs = index; + /* Release the hwirqs corresponding to this MSIR */ - for (i = 0; i < msi_data->cfg->msir_irqs; i++) { - hwirq = i << msi_data->cfg->ibs_shift | msir->index; - bitmap_clear(msi_data->used, hwirq, 1); + if (!msi_affinity_flag || msir->index == 0) { + for (i = 0; i < msi_data->cfg->msir_irqs; i++) { + hwirq = i << msi_data->cfg->ibs_shift | msir->index; + bitmap_clear(msi_data->used, hwirq, 1); + } } return 0; @@ -320,6 +367,17 @@ static int ls_scfg_msi_probe(struct platform_device *pdev) bitmap_set(msi_data->used, 0, msi_data->irqs_num); msi_data->msir_num = of_irq_count(pdev->dev.of_node); + + if (msi_affinity_flag) { + u32 cpu_num; + + cpu_num = num_possible_cpus(); + if (msi_data->msir_num >= cpu_num) + msi_data->msir_num = cpu_num; + else + msi_affinity_flag = 0; + } + msi_data->msir = devm_kcalloc(&pdev->dev, msi_data->msir_num, sizeof(*msi_data->msir), GFP_KERNEL);