From abef0695f9665c3dbe9473f964c4da3c1f7c5d3f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 13 Sep 2023 15:48:12 +0100 Subject: [PATCH 01/22] arm64/sve: Remove ZCR pseudo register from cpufeature code For reasons that are not currently apparent during cpufeature enumeration we maintain a pseudo register for ZCR which records the maximum supported vector length using the value that would be written to ZCR_EL1.LEN to configure it. This is not exposed to userspace and is not sufficient for detecting unsupportable configurations, we need the more detailed checks in vec_update_vq_map() for that since we can't cope with missing vector lengths on late CPUs and KVM requires an exactly matching set of supported vector lengths as EL1 can enumerate VLs directly with the hardware. Remove the code, replacing the usage in sve_setup() with a query of the vq_map. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230913-arm64-vec-len-cpufeature-v1-1-cc69b0600a8a@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpu.h | 3 --- arch/arm64/include/asm/fpsimd.h | 1 - arch/arm64/kernel/cpufeature.c | 30 ++++++---------------------- arch/arm64/kernel/fpsimd.c | 35 +++------------------------------ 4 files changed, 9 insertions(+), 60 deletions(-) diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index e749838b9c5d..f85650dc561c 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -64,9 +64,6 @@ struct cpuinfo_arm64 { struct cpuinfo_32bit aarch32; - /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */ - u64 reg_zcr; - /* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */ u64 reg_smcr; }; diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 8df46f186c64..9e5d3a0812b6 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -128,7 +128,6 @@ extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void sme2_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused); -extern u64 read_zcr_features(void); extern u64 read_smcr_features(void); /* diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 444a73c2e638..5783e9349563 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -611,12 +611,6 @@ static const struct arm64_ftr_bits ftr_id_dfr1[] = { ARM64_FTR_END, }; -static const struct arm64_ftr_bits ftr_zcr[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, - ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_WIDTH, 0), /* LEN */ - ARM64_FTR_END, -}; - static const struct arm64_ftr_bits ftr_smcr[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_WIDTH, 0), /* LEN */ @@ -736,7 +730,6 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3), /* Op1 = 0, CRn = 1, CRm = 2 */ - ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr), /* Op1 = 1, CRn = 0, CRm = 0 */ @@ -1040,8 +1033,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) if (IS_ENABLED(CONFIG_ARM64_SVE) && id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) { - info->reg_zcr = read_zcr_features(); - init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr); + sve_kernel_enable(NULL); vec_init_vq_map(ARM64_VEC_SVE); } @@ -1289,15 +1281,13 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu, info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0); + /* Probe vector lengths */ if (IS_ENABLED(CONFIG_ARM64_SVE) && id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) { - info->reg_zcr = read_zcr_features(); - taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu, - info->reg_zcr, boot->reg_zcr); - - /* Probe vector lengths */ - if (!system_capabilities_finalized()) + if (!system_capabilities_finalized()) { + sve_kernel_enable(NULL); vec_update_vq_map(ARM64_VEC_SVE); + } } if (IS_ENABLED(CONFIG_ARM64_SME) && @@ -3153,19 +3143,11 @@ static void verify_local_elf_hwcaps(void) static void verify_sve_features(void) { - u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1); - u64 zcr = read_zcr_features(); - - unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK; - unsigned int len = zcr & ZCR_ELx_LEN_MASK; - - if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SVE)) { + if (vec_verify_vq_map(ARM64_VEC_SVE)) { pr_crit("CPU%d: SVE: vector length support mismatch\n", smp_processor_id()); cpu_die_early(); } - - /* Add checks on other ZCR bits here if necessary */ } static void verify_sme_features(void) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 91e44ac7150f..35bf40c2f972 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1170,32 +1170,12 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) isb(); } -/* - * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE - * vector length. - * - * Use only if SVE is present. - * This function clobbers the SVE vector length. - */ -u64 read_zcr_features(void) -{ - /* - * Set the maximum possible VL, and write zeroes to all other - * bits to see if they stick. - */ - sve_kernel_enable(NULL); - write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1); - - /* Return LEN value that would be written to get the maximum VL */ - return sve_vq_from_vl(sve_get_vl()) - 1; -} - void __init sve_setup(void) { struct vl_info *info = &vl_info[ARM64_VEC_SVE]; - u64 zcr; DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); unsigned long b; + int max_bit; if (!system_supports_sve()) return; @@ -1208,17 +1188,8 @@ void __init sve_setup(void) if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map))) set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map); - zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1); - info->max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1); - - /* - * Sanity-check that the max VL we determined through CPU features - * corresponds properly to sve_vq_map. If not, do our best: - */ - if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SVE, - info->max_vl))) - info->max_vl = find_supported_vector_length(ARM64_VEC_SVE, - info->max_vl); + max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX); + info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit)); /* * For the default VL, pick the maximum supported value <= 64. From 391208485c3ad50ff4f2ddd9f7d4a1d77341acbb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 13 Sep 2023 15:48:13 +0100 Subject: [PATCH 02/22] arm64/sve: Remove SMCR pseudo register from cpufeature code For reasons that are not currently apparent during cpufeature enumeration we maintain a pseudo register for SMCR which records the maximum supported vector length using the value that would be written to SMCR_EL1.LEN to configure it. This is not exposed to userspace and is not sufficient for detecting unsupportable configurations, we need the more detailed checks in vec_update_vq_map() for that since we can't cope with missing vector lengths on late CPUs and KVM requires an exactly matching set of supported vector lengths as EL1 can enumerate VLs directly with the hardware. Remove the code, replacing the usage in sme_setup() with a query of the vq_map. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230913-arm64-vec-len-cpufeature-v1-2-cc69b0600a8a@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpu.h | 3 --- arch/arm64/kernel/cpufeature.c | 28 +++++------------------- arch/arm64/kernel/fpsimd.c | 40 +++++----------------------------- 3 files changed, 10 insertions(+), 61 deletions(-) diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index f85650dc561c..f3034099fd95 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -63,9 +63,6 @@ struct cpuinfo_arm64 { u64 reg_id_aa64smfr0; struct cpuinfo_32bit aarch32; - - /* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */ - u64 reg_smcr; }; DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5783e9349563..ea36dc532b3b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -611,12 +611,6 @@ static const struct arm64_ftr_bits ftr_id_dfr1[] = { ARM64_FTR_END, }; -static const struct arm64_ftr_bits ftr_smcr[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, - SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_WIDTH, 0), /* LEN */ - ARM64_FTR_END, -}; - /* * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of @@ -729,9 +723,6 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3), - /* Op1 = 0, CRn = 1, CRm = 2 */ - ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr), - /* Op1 = 1, CRn = 0, CRm = 0 */ ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), @@ -1039,14 +1030,14 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) if (IS_ENABLED(CONFIG_ARM64_SME) && id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) { - info->reg_smcr = read_smcr_features(); + sme_kernel_enable(NULL); + /* * We mask out SMPS since even if the hardware * supports priorities the kernel does not at present * and we block access to them. */ info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS; - init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr); vec_init_vq_map(ARM64_VEC_SME); } @@ -1292,15 +1283,14 @@ void update_cpu_features(int cpu, if (IS_ENABLED(CONFIG_ARM64_SME) && id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) { - info->reg_smcr = read_smcr_features(); + sme_kernel_enable(NULL); + /* * We mask out SMPS since even if the hardware * supports priorities the kernel does not at present * and we block access to them. */ info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS; - taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu, - info->reg_smcr, boot->reg_smcr); /* Probe vector lengths */ if (!system_capabilities_finalized()) @@ -3152,19 +3142,11 @@ static void verify_sve_features(void) static void verify_sme_features(void) { - u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1); - u64 smcr = read_smcr_features(); - - unsigned int safe_len = safe_smcr & SMCR_ELx_LEN_MASK; - unsigned int len = smcr & SMCR_ELx_LEN_MASK; - - if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SME)) { + if (vec_verify_vq_map(ARM64_VEC_SME)) { pr_crit("CPU%d: SME: vector length support mismatch\n", smp_processor_id()); cpu_die_early(); } - - /* Add checks on other SMCR bits here if necessary */ } static void verify_hyp_capabilities(void) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 35bf40c2f972..04c801001767 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1304,32 +1304,10 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) SYS_SMCR_EL1); } -/* - * Read the pseudo-SMCR used by cpufeatures to identify the supported - * vector length. - * - * Use only if SME is present. - * This function clobbers the SME vector length. - */ -u64 read_smcr_features(void) -{ - sme_kernel_enable(NULL); - - /* - * Set the maximum possible VL. - */ - write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK, - SYS_SMCR_EL1); - - /* Return LEN value that would be written to get the maximum VL */ - return sve_vq_from_vl(sme_get_vl()) - 1; -} - void __init sme_setup(void) { struct vl_info *info = &vl_info[ARM64_VEC_SME]; - u64 smcr; - int min_bit; + int min_bit, max_bit; if (!system_supports_sme()) return; @@ -1338,24 +1316,16 @@ void __init sme_setup(void) * SME doesn't require any particular vector length be * supported but it does require at least one. We should have * disabled the feature entirely while bringing up CPUs but - * let's double check here. + * let's double check here. The bitmap is SVE_VQ_MAP sized for + * sharing with SVE. */ WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX)); min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX); info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit)); - smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1); - info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1); - - /* - * Sanity-check that the max VL we determined through CPU features - * corresponds properly to sme_vq_map. If not, do our best: - */ - if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME, - info->max_vl))) - info->max_vl = find_supported_vector_length(ARM64_VEC_SME, - info->max_vl); + max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX); + info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit)); WARN_ON(info->min_vl > info->max_vl); From a02026bf9da13cd44fb444857d5aebc934e1af5a Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 6 Sep 2023 09:02:56 -0700 Subject: [PATCH 03/22] irqchip/gic-v3: Enable support for SGIs to act as NMIs As of commit 6abbd6988971 ("irqchip/gic, gic-v3: Make SGIs use handle_percpu_devid_irq()") SGIs are treated the same as PPIs/EPPIs and use handle_percpu_devid_irq() by default. Unfortunately, handle_percpu_devid_irq() isn't NMI safe, and so to run in an NMI context those should use handle_percpu_devid_fasteoi_nmi(). In order to accomplish this, we just have to make room for SGIs in the array of refcounts that keeps track of which interrupts are set as NMI. We also rename the array and create a new indexing scheme that accounts for SGIs. Also, enable NMI support prior to gic_smp_init() as allocation of SGIs as IRQs/NMIs happen as part of this routine. Co-developed-by: Sumit Garg Signed-off-by: Sumit Garg Acked-by: Mark Rutland Tested-by: Chen-Yu Tsai Signed-off-by: Douglas Anderson Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20230906090246.v13.1.I1223c11c88937bd0cbd9b086d4ef216985797302@changeid Signed-off-by: Catalin Marinas --- drivers/irqchip/irq-gic-v3.c | 59 +++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index eedfa8e9f077..787ccc880b22 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -78,6 +78,13 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); #define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U) #define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer) +/* + * There are 16 SGIs, though we only actually use 8 in Linux. The other 8 SGIs + * are potentially stolen by the secure side. Some code, especially code dealing + * with hwirq IDs, is simplified by accounting for all 16. + */ +#define SGI_NR 16 + /* * The behaviours of RPR and PMR registers differ depending on the value of * SCR_EL3.FIQ, and the behaviour of non-secure priority registers of the @@ -125,8 +132,8 @@ EXPORT_SYMBOL(gic_nonsecure_priorities); __priority; \ }) -/* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */ -static refcount_t *ppi_nmi_refs; +/* rdist_nmi_refs[n] == number of cpus having the rdist interrupt n set as NMI */ +static refcount_t *rdist_nmi_refs; static struct gic_kvm_info gic_v3_kvm_info __initdata; static DEFINE_PER_CPU(bool, has_rss); @@ -519,9 +526,22 @@ static u32 __gic_get_ppi_index(irq_hw_number_t hwirq) } } -static u32 gic_get_ppi_index(struct irq_data *d) +static u32 __gic_get_rdist_index(irq_hw_number_t hwirq) { - return __gic_get_ppi_index(d->hwirq); + switch (__get_intid_range(hwirq)) { + case SGI_RANGE: + case PPI_RANGE: + return hwirq; + case EPPI_RANGE: + return hwirq - EPPI_BASE_INTID + 32; + default: + unreachable(); + } +} + +static u32 gic_get_rdist_index(struct irq_data *d) +{ + return __gic_get_rdist_index(d->hwirq); } static int gic_irq_nmi_setup(struct irq_data *d) @@ -545,11 +565,14 @@ static int gic_irq_nmi_setup(struct irq_data *d) /* desc lock should already be held */ if (gic_irq_in_rdist(d)) { - u32 idx = gic_get_ppi_index(d); + u32 idx = gic_get_rdist_index(d); - /* Setting up PPI as NMI, only switch handler for first NMI */ - if (!refcount_inc_not_zero(&ppi_nmi_refs[idx])) { - refcount_set(&ppi_nmi_refs[idx], 1); + /* + * Setting up a percpu interrupt as NMI, only switch handler + * for first NMI + */ + if (!refcount_inc_not_zero(&rdist_nmi_refs[idx])) { + refcount_set(&rdist_nmi_refs[idx], 1); desc->handle_irq = handle_percpu_devid_fasteoi_nmi; } } else { @@ -582,10 +605,10 @@ static void gic_irq_nmi_teardown(struct irq_data *d) /* desc lock should already be held */ if (gic_irq_in_rdist(d)) { - u32 idx = gic_get_ppi_index(d); + u32 idx = gic_get_rdist_index(d); /* Tearing down NMI, only switch handler for last NMI */ - if (refcount_dec_and_test(&ppi_nmi_refs[idx])) + if (refcount_dec_and_test(&rdist_nmi_refs[idx])) desc->handle_irq = handle_percpu_devid_irq; } else { desc->handle_irq = handle_fasteoi_irq; @@ -1279,10 +1302,10 @@ static void gic_cpu_init(void) rbase = gic_data_rdist_sgi_base(); /* Configure SGIs/PPIs as non-secure Group-1 */ - for (i = 0; i < gic_data.ppi_nr + 16; i += 32) + for (i = 0; i < gic_data.ppi_nr + SGI_NR; i += 32) writel_relaxed(~0, rbase + GICR_IGROUPR0 + i / 8); - gic_cpu_config(rbase, gic_data.ppi_nr + 16, gic_redist_wait_for_rwp); + gic_cpu_config(rbase, gic_data.ppi_nr + SGI_NR, gic_redist_wait_for_rwp); /* initialise system registers */ gic_cpu_sys_reg_init(); @@ -1939,12 +1962,13 @@ static void gic_enable_nmi_support(void) return; } - ppi_nmi_refs = kcalloc(gic_data.ppi_nr, sizeof(*ppi_nmi_refs), GFP_KERNEL); - if (!ppi_nmi_refs) + rdist_nmi_refs = kcalloc(gic_data.ppi_nr + SGI_NR, + sizeof(*rdist_nmi_refs), GFP_KERNEL); + if (!rdist_nmi_refs) return; - for (i = 0; i < gic_data.ppi_nr; i++) - refcount_set(&ppi_nmi_refs[i], 0); + for (i = 0; i < gic_data.ppi_nr + SGI_NR; i++) + refcount_set(&rdist_nmi_refs[i], 0); pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n", gic_has_relaxed_pmr_sync() ? "relaxed" : "forced"); @@ -2061,6 +2085,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, gic_dist_init(); gic_cpu_init(); + gic_enable_nmi_support(); gic_smp_init(); gic_cpu_pm_init(); @@ -2073,8 +2098,6 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, gicv2m_init(handle, gic_data.domain); } - gic_enable_nmi_support(); - return 0; out_free: From d0c14a7d36f035aeae1bdd6f4afc6488400ed5cf Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 6 Sep 2023 09:02:57 -0700 Subject: [PATCH 04/22] arm64: idle: Tag the arm64 idle functions as __cpuidle As per the (somewhat recent) comment before the definition of `__cpuidle`, the tag is like `noinstr` but also marks a function so it can be identified by cpu_in_idle(). Let's add these markings to arm64 cpuidle functions With this change we get useful backtraces like: NMI backtrace for cpu N skipped: idling at cpu_do_idle+0x94/0x98 instead of useless backtraces when dumping all processors using nmi_cpu_backtrace(). NOTE: this patch won't make cpu_in_idle() work perfectly for arm64, but it doesn't hurt and does catch some cases. Specifically an example that wasn't caught in my testing looked like this: gic_cpu_sys_reg_init+0x1f8/0x314 gic_cpu_pm_notifier+0x40/0x78 raw_notifier_call_chain+0x5c/0x134 cpu_pm_notify+0x38/0x64 cpu_pm_exit+0x20/0x2c psci_enter_idle_state+0x48/0x70 cpuidle_enter_state+0xb8/0x260 cpuidle_enter+0x44/0x5c do_idle+0x188/0x30c Acked-by: Mark Rutland Reviewed-by: Stephen Boyd Acked-by: Sumit Garg Tested-by: Chen-Yu Tsai Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20230906090246.v13.2.I4baba13e220bdd24d11400c67f137c35f07f82c7@changeid Signed-off-by: Catalin Marinas --- arch/arm64/kernel/idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c index c1125753fe9b..05cfb347ec26 100644 --- a/arch/arm64/kernel/idle.c +++ b/arch/arm64/kernel/idle.c @@ -20,7 +20,7 @@ * ensure that interrupts are not masked at the PMR (because the core will * not wake up if we block the wake up signal in the interrupt controller). */ -void noinstr cpu_do_idle(void) +void __cpuidle cpu_do_idle(void) { struct arm_cpuidle_irq_context context; @@ -35,7 +35,7 @@ void noinstr cpu_do_idle(void) /* * This is our default idle handler. */ -void noinstr arch_cpu_idle(void) +void __cpuidle arch_cpu_idle(void) { /* * This should do all the clock switching and wait for interrupt From 2b2d0a7a96ab36ed6d963e29b6211b184ef81596 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 6 Sep 2023 09:02:58 -0700 Subject: [PATCH 05/22] arm64: smp: Remove dedicated wakeup IPI To enable NMI backtrace and KGDB's NMI cpu roundup, we need to free up at least one dedicated IPI. On arm64 the IPI_WAKEUP IPI is only used for the ACPI parking protocol, which itself is only used on some very early ARMv8 systems which couldn't implement PSCI. Remove the IPI_WAKEUP IPI, and rely on the IPI_RESCHEDULE IPI to wake CPUs from the parked state. This will cause a tiny amonut of redundant work to check the thread flags, but this is miniscule in relation to the cost of taking and handling the IPI in the first place. We can safely handle redundant IPI_RESCHEDULE IPIs, so there should be no functional impact as a result of this change. Signed-off-by: Mark Rutland Reviewed-by: Stephen Boyd Reviewed-by: Sumit Garg Tested-by: Chen-Yu Tsai Signed-off-by: Douglas Anderson Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20230906090246.v13.3.I7209db47ef8ec151d3de61f59005bbc59fe8f113@changeid Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/smp.h | 4 ++-- arch/arm64/kernel/acpi_parking_protocol.c | 2 +- arch/arm64/kernel/smp.c | 28 +++++++++-------------- 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 9b31e6d0da17..efb13112b408 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -89,9 +89,9 @@ extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL -extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); +extern void arch_send_wakeup_ipi(unsigned int cpu); #else -static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask) +static inline void arch_send_wakeup_ipi(unsigned int cpu) { BUILD_BUG(); } diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c index b1990e38aed0..e1be29e608b7 100644 --- a/arch/arm64/kernel/acpi_parking_protocol.c +++ b/arch/arm64/kernel/acpi_parking_protocol.c @@ -103,7 +103,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu) &mailbox->entry_point); writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id); - arch_send_wakeup_ipi_mask(cpumask_of(cpu)); + arch_send_wakeup_ipi(cpu); return 0; } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 960b98b43506..a5848f1ef817 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -72,7 +72,6 @@ enum ipi_msg_type { IPI_CPU_CRASH_STOP, IPI_TIMER, IPI_IRQ_WORK, - IPI_WAKEUP, NR_IPI }; @@ -764,7 +763,6 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", [IPI_TIMER] = "Timer broadcast interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", - [IPI_WAKEUP] = "CPU wake-up interrupts", }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); @@ -797,13 +795,6 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC); } -#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL -void arch_send_wakeup_ipi_mask(const struct cpumask *mask) -{ - smp_cross_call(mask, IPI_WAKEUP); -} -#endif - #ifdef CONFIG_IRQ_WORK void arch_irq_work_raise(void) { @@ -897,14 +888,6 @@ static void do_handle_IPI(int ipinr) break; #endif -#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL - case IPI_WAKEUP: - WARN_ONCE(!acpi_parking_protocol_valid(cpu), - "CPU%u: Wake-up IPI outside the ACPI parking protocol\n", - cpu); - break; -#endif - default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; @@ -979,6 +962,17 @@ void arch_smp_send_reschedule(int cpu) smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); } +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL +void arch_send_wakeup_ipi(unsigned int cpu) +{ + /* + * We use a scheduler IPI to wake the CPU as this avoids the need for a + * dedicated IPI and we can safely handle spurious scheduler IPIs. + */ + arch_smp_send_reschedule(cpu); +} +#endif + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST void tick_broadcast(const struct cpumask *mask) { From 331a1b3a836c0f38165dcec168c0a03b93cf0c17 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 6 Sep 2023 09:02:59 -0700 Subject: [PATCH 06/22] arm64: smp: Add arch support for backtrace using pseudo-NMI Enable arch_trigger_cpumask_backtrace() support on arm64. This enables things much like they are enabled on arm32 (including some of the funky logic around NR_IPI, nr_ipi, and MAX_IPI) but with the difference that, unlike arm32, we'll try to enable the backtrace to use pseudo-NMI. NOTE: this patch is a squash of the little bit of code adding the ability to mark an IPI to try to use pseudo-NMI plus the little bit of code to hook things up for kgdb. This approach was decided upon in the discussion of v9 [1]. This patch depends on commit 8d539b84f1e3 ("nmi_backtrace: allow excluding an arbitrary CPU") since that commit changed the prototype of arch_trigger_cpumask_backtrace(), which this patch implements. [1] https://lore.kernel.org/r/ZORY51mF4alI41G1@FVFF77S0Q05N Co-developed-by: Sumit Garg Signed-off-by: Sumit Garg Co-developed-by: Mark Rutland Signed-off-by: Mark Rutland Reviewed-by: Stephen Boyd Reviewed-by: Misono Tomohiro Tested-by: Chen-Yu Tsai Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20230906090246.v13.4.Ie6c132b96ebbbcddbf6954b9469ed40a6960343c@changeid Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/irq.h | 3 ++ arch/arm64/kernel/smp.c | 86 +++++++++++++++++++++++++++++++----- 2 files changed, 78 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index fac08e18bcd5..50ce8b697ff3 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -6,6 +6,9 @@ #include +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu); +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace + struct pt_regs; int set_handle_irq(void (*handle_irq)(struct pt_regs *)); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index a5848f1ef817..28c904ca499a 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -72,12 +73,18 @@ enum ipi_msg_type { IPI_CPU_CRASH_STOP, IPI_TIMER, IPI_IRQ_WORK, - NR_IPI + NR_IPI, + /* + * Any enum >= NR_IPI and < MAX_IPI is special and not tracable + * with trace_ipi_* + */ + IPI_CPU_BACKTRACE = NR_IPI, + MAX_IPI }; static int ipi_irq_base __read_mostly; static int nr_ipi __read_mostly = NR_IPI; -static struct irq_desc *ipi_desc[NR_IPI] __read_mostly; +static struct irq_desc *ipi_desc[MAX_IPI] __read_mostly; static void ipi_setup(int cpu); @@ -845,6 +852,22 @@ static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs #endif } +static void arm64_backtrace_ipi(cpumask_t *mask) +{ + __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask); +} + +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) +{ + /* + * NOTE: though nmi_trigger_cpumask_backtrace() has "nmi_" in the name, + * nothing about it truly needs to be implemented using an NMI, it's + * just that it's _allowed_ to work with NMIs. If ipi_should_be_nmi() + * returned false our backtrace attempt will just use a regular IPI. + */ + nmi_trigger_cpumask_backtrace(mask, exclude_cpu, arm64_backtrace_ipi); +} + /* * Main handler for inter-processor interrupts */ @@ -888,6 +911,14 @@ static void do_handle_IPI(int ipinr) break; #endif + case IPI_CPU_BACKTRACE: + /* + * NOTE: in some cases this _won't_ be NMI context. See the + * comment in arch_trigger_cpumask_backtrace(). + */ + nmi_cpu_backtrace(get_irq_regs()); + break; + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; @@ -909,6 +940,19 @@ static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) __ipi_send_mask(ipi_desc[ipinr], target); } +static bool ipi_should_be_nmi(enum ipi_msg_type ipi) +{ + if (!system_uses_irq_prio_masking()) + return false; + + switch (ipi) { + case IPI_CPU_BACKTRACE: + return true; + default: + return false; + } +} + static void ipi_setup(int cpu) { int i; @@ -916,8 +960,14 @@ static void ipi_setup(int cpu) if (WARN_ON_ONCE(!ipi_irq_base)) return; - for (i = 0; i < nr_ipi; i++) - enable_percpu_irq(ipi_irq_base + i, 0); + for (i = 0; i < nr_ipi; i++) { + if (ipi_should_be_nmi(i)) { + prepare_percpu_nmi(ipi_irq_base + i); + enable_percpu_nmi(ipi_irq_base + i, 0); + } else { + enable_percpu_irq(ipi_irq_base + i, 0); + } + } } #ifdef CONFIG_HOTPLUG_CPU @@ -928,8 +978,14 @@ static void ipi_teardown(int cpu) if (WARN_ON_ONCE(!ipi_irq_base)) return; - for (i = 0; i < nr_ipi; i++) - disable_percpu_irq(ipi_irq_base + i); + for (i = 0; i < nr_ipi; i++) { + if (ipi_should_be_nmi(i)) { + disable_percpu_nmi(ipi_irq_base + i); + teardown_percpu_nmi(ipi_irq_base + i); + } else { + disable_percpu_irq(ipi_irq_base + i); + } + } } #endif @@ -937,15 +993,23 @@ void __init set_smp_ipi_range(int ipi_base, int n) { int i; - WARN_ON(n < NR_IPI); - nr_ipi = min(n, NR_IPI); + WARN_ON(n < MAX_IPI); + nr_ipi = min(n, MAX_IPI); for (i = 0; i < nr_ipi; i++) { int err; - err = request_percpu_irq(ipi_base + i, ipi_handler, - "IPI", &cpu_number); - WARN_ON(err); + if (ipi_should_be_nmi(i)) { + err = request_percpu_nmi(ipi_base + i, ipi_handler, + "IPI", &cpu_number); + WARN(err, "Could not request IPI %d as NMI, err=%d\n", + i, err); + } else { + err = request_percpu_irq(ipi_base + i, ipi_handler, + "IPI", &cpu_number); + WARN(err, "Could not request IPI %d as IRQ, err=%d\n", + i, err); + } ipi_desc[i] = irq_to_desc(ipi_base + i); irq_set_status_flags(ipi_base + i, IRQ_HIDDEN); From d7402513c935ad87413b01aa51a7ada0ad2f0163 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 6 Sep 2023 09:03:00 -0700 Subject: [PATCH 07/22] arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI There's no reason why IPI_CPU_STOP and IPI_CPU_CRASH_STOP can't be handled as NMI. They are very simple and everything in them is NMI-safe. Mark them as things to use NMI for if NMI is available. Suggested-by: Mark Rutland Reviewed-by: Stephen Boyd Reviewed-by: Misono Tomohiro Reviewed-by: Sumit Garg Acked-by: Mark Rutland Tested-by: Mark Rutland Tested-by: Chen-Yu Tsai Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20230906090246.v13.5.Ifadbfd45b22c52edcb499034dd4783d096343260@changeid Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 28c904ca499a..800c59cf9b64 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -946,6 +946,8 @@ static bool ipi_should_be_nmi(enum ipi_msg_type ipi) return false; switch (ipi) { + case IPI_CPU_STOP: + case IPI_CPU_CRASH_STOP: case IPI_CPU_BACKTRACE: return true; default: From 2f5cd0c7ffde0ec7779f27e5c4ed30e131b66393 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 6 Sep 2023 09:03:01 -0700 Subject: [PATCH 08/22] arm64: kgdb: Implement kgdb_roundup_cpus() to enable pseudo-NMI roundup Up until now we've been using the generic (weak) implementation for kgdb_roundup_cpus() when using kgdb on arm64. Let's move to a custom one. The advantage here is that, when pseudo-NMI is enabled on a device, we'll be able to round up CPUs using pseudo-NMI. This allows us to debug CPUs that are stuck with interrupts disabled. If pseudo-NMIs are not enabled then we'll fallback to just using an IPI, which is still slightly better than the generic implementation since it avoids the potential situation described in the generic kgdb_call_nmi_hook(). Co-developed-by: Sumit Garg Signed-off-by: Sumit Garg Reviewed-by: Daniel Thompson Reviewed-by: Stephen Boyd Acked-by: Mark Rutland Tested-by: Chen-Yu Tsai Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20230906090246.v13.6.I2ef26d1b3bfbed2d10a281942b0da7d9854de05e@changeid Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 800c59cf9b64..1a53e57c81d0 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -79,6 +80,7 @@ enum ipi_msg_type { * with trace_ipi_* */ IPI_CPU_BACKTRACE = NR_IPI, + IPI_KGDB_ROUNDUP, MAX_IPI }; @@ -868,6 +870,22 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) nmi_trigger_cpumask_backtrace(mask, exclude_cpu, arm64_backtrace_ipi); } +#ifdef CONFIG_KGDB +void kgdb_roundup_cpus(void) +{ + int this_cpu = raw_smp_processor_id(); + int cpu; + + for_each_online_cpu(cpu) { + /* No need to roundup ourselves */ + if (cpu == this_cpu) + continue; + + __ipi_send_single(ipi_desc[IPI_KGDB_ROUNDUP], cpu); + } +} +#endif + /* * Main handler for inter-processor interrupts */ @@ -919,6 +937,10 @@ static void do_handle_IPI(int ipinr) nmi_cpu_backtrace(get_irq_regs()); break; + case IPI_KGDB_ROUNDUP: + kgdb_nmicallback(cpu, get_irq_regs()); + break; + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; @@ -949,6 +971,7 @@ static bool ipi_should_be_nmi(enum ipi_msg_type ipi) case IPI_CPU_STOP: case IPI_CPU_CRASH_STOP: case IPI_CPU_BACKTRACE: + case IPI_KGDB_ROUNDUP: return true; default: return false; From 62817d5ba25d2361488184f85d628b5b540e254b Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 6 Sep 2023 09:03:02 -0700 Subject: [PATCH 09/22] arm64: smp: Mark IPI globals as __ro_after_init Mark the three IPI-related globals in smp.c as "__ro_after_init" since they are only ever set in set_smp_ipi_range(), which is marked "__init". This is a better and more secure marking than the old "__read_mostly". Suggested-by: Stephen Boyd Acked-by: Mark Rutland Tested-by: Chen-Yu Tsai Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20230906090246.v13.7.I625d393afd71e1766ef73d3bfaac0b347a4afd19@changeid Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 1a53e57c81d0..814d9aa93b21 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -84,9 +84,9 @@ enum ipi_msg_type { MAX_IPI }; -static int ipi_irq_base __read_mostly; -static int nr_ipi __read_mostly = NR_IPI; -static struct irq_desc *ipi_desc[MAX_IPI] __read_mostly; +static int ipi_irq_base __ro_after_init; +static int nr_ipi __ro_after_init = NR_IPI; +static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init; static void ipi_setup(int cpu); From 11a7a42ea76e61a8d2f7374ecdd95460dec4413f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 Sep 2023 14:42:55 +0100 Subject: [PATCH 10/22] kselftest/arm64: Validate SVCR in streaming SVE stress test In the ZA and ZT test programs we explicitly validate that PSTATE.ZA is as expected on each loop but we do not do the equivalent for our streaming SVE test, add a check that we are still in streaming mode on every loop in case that goes wrong. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230922-arm64-ssve-validate-svcr-v1-1-f518960eaeda@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/sve-test.S | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index 4328895dfc87..547d077e3517 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -473,6 +473,13 @@ function _start // mov x8, #__NR_sched_yield // Encourage preemption // svc #0 +#ifdef SSVE + mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=0,SM=1 + and x1, x0, #3 + cmp x1, #1 + b.ne svcr_barf +#endif + mov x21, #0 0: mov x0, x21 bl check_zreg @@ -553,3 +560,15 @@ function vl_barf mov x1, #1 svc #0 endfunction + +function svcr_barf + mov x10, x0 + + puts "Bad SVCR: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction From a07a594152173a3dd3bdd12fc7d73dbba54cdbca Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Oct 2023 18:00:36 +0100 Subject: [PATCH 11/22] arm64: smp: avoid NMI IPIs with broken MediaTek FW Some MediaTek devices have broken firmware which corrupts some GICR registers behind the back of the OS, and pseudo-NMIs cannot be used on these devices. For more details see commit: 44bd78dd2b8897f5 ("irqchip/gic-v3: Disable pseudo NMIs on Mediatek devices w/ firmware issues") We did not take this problem into account in commit: 331a1b3a836c0f38 ("arm64: smp: Add arch support for backtrace using pseudo-NMI") Since that commit arm64's SMP code will try to setup some IPIs as pseudo-NMIs, even on systems with broken FW. The GICv3 code will (rightly) reject attempts to request interrupts as pseudo-NMIs, resulting in boot-time failures. Avoid the problem by taking the broken FW into account when deciding to request IPIs as pseudo-NMIs. The GICv3 driver maintains a static_key named "supports_pseudo_nmis" which is false on systems with broken FW, and we can consult this within ipi_should_be_nmi(). Fixes: 331a1b3a836c ("arm64: smp: Add arch support for backtrace using pseudo-NMI") Reported-by: Chen-Yu Tsai Closes: https://issuetracker.google.com/issues/197061987#comment68 Signed-off-by: Mark Rutland Reviewed-by: Douglas Anderson Tested-by: Douglas Anderson Reviewed-by: Marc Zyngier Tested-by: Chen-Yu Tsai Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 5 ++++- drivers/irqchip/irq-gic-v3.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 814d9aa93b21..061c69160f90 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -964,7 +964,10 @@ static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) static bool ipi_should_be_nmi(enum ipi_msg_type ipi) { - if (!system_uses_irq_prio_masking()) + DECLARE_STATIC_KEY_FALSE(supports_pseudo_nmis); + + if (!system_uses_irq_prio_masking() || + !static_branch_likely(&supports_pseudo_nmis)) return false; switch (ipi) { diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 787ccc880b22..737da1b9aabf 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -106,7 +106,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); * - Figure 4-7 Secure read of the priority field for a Non-secure Group 1 * interrupt. */ -static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); +DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities); EXPORT_SYMBOL(gic_nonsecure_priorities); From ef31b8ce313eaf891bf705d5db754e549351816f Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 2 Oct 2023 09:45:30 -0700 Subject: [PATCH 12/22] arm64: smp: Don't directly call arch_smp_send_reschedule() for wakeup In commit 2b2d0a7a96ab ("arm64: smp: Remove dedicated wakeup IPI") we started using a scheduler IPI to avoid a dedicated reschedule. When we did this, we used arch_smp_send_reschedule() directly rather than calling smp_send_reschedule(). The only difference is that calling arch_smp_send_reschedule() directly avoids tracing. Presumably we _don't_ want to avoid tracing here, so switch to smp_send_reschedule(). Fixes: 2b2d0a7a96ab ("arm64: smp: Remove dedicated wakeup IPI") Signed-off-by: Douglas Anderson Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 061c69160f90..16ead57a583d 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1061,7 +1061,7 @@ void arch_send_wakeup_ipi(unsigned int cpu) * We use a scheduler IPI to wake the CPU as this avoids the need for a * dedicated IPI and we can safely handle spurious scheduler IPIs. */ - arch_smp_send_reschedule(cpu); + smp_send_reschedule(cpu); } #endif From 65033574ade97afccba074d837fd269903a83a9a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 5 Oct 2023 16:40:30 +0100 Subject: [PATCH 13/22] arm64: swiotlb: Reduce the default size if no ZONE_DMA bouncing needed With CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC enabled, the arm64 kernel still allocates the default SWIOTLB buffer (64MB) even if ZONE_DMA is disabled or all the RAM fits into this zone. However, this potentially wastes a non-negligible amount of memory on platforms with little RAM. Reduce the SWIOTLB size to 1MB per 1GB of RAM if only needed for kmalloc() buffer bouncing. Signed-off-by: Catalin Marinas Suggested-by: Ross Burton Cc: Ross Burton Cc: Will Deacon Reviewed-by: Robin Murphy --- arch/arm64/mm/init.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8a0f8604348b..8deec68028ac 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -493,8 +494,16 @@ void __init mem_init(void) { bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit); - if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC)) + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb) { + /* + * If no bouncing needed for ZONE_DMA, reduce the swiotlb + * buffer for kmalloc() bouncing to 1MB per 1GB of RAM. + */ + unsigned long size = + DIV_ROUND_UP(memblock_phys_mem_size(), 1024); + swiotlb_adjust_size(min(swiotlb_size_or_default(), size)); swiotlb = true; + } swiotlb_init(swiotlb, SWIOTLB_VERBOSE); From 3425cec42c3ce0f65fe74e412756b567b152e61d Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 5 Oct 2023 15:07:30 +0100 Subject: [PATCH 14/22] arm64/mm: Hoist synchronization out of set_ptes() loop set_ptes() sets a physically contiguous block of memory (which all belongs to the same folio) to a contiguous block of ptes. The arm64 implementation of this previously just looped, operating on each individual pte. But the __sync_icache_dcache() and mte_sync_tags() operations can both be hoisted out of the loop so that they are performed once for the contiguous set of pages (which may be less than the whole folio). This should result in minor performance gains. __sync_icache_dcache() already acts on the whole folio, and sets a flag in the folio so that it skips duplicate calls. But by hoisting the call, all the pte testing is done only once. mte_sync_tags() operates on each individual page with its own loop. But by passing the number of pages explicitly, we can rely solely on its loop and do the checks only once. This approach also makes it robust for the future, rather than assuming if a head page of a compound page is being mapped, then the whole compound page is being mapped, instead we explicitly know how many pages are being mapped. The old assumption may not continue to hold once the "anonymous large folios" feature is merged. Signed-off-by: Ryan Roberts Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20231005140730.2191134-1-ryan.roberts@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mte.h | 4 ++-- arch/arm64/include/asm/pgtable.h | 27 +++++++++++++++++---------- arch/arm64/kernel/mte.c | 4 ++-- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 4cedbaa16f41..91fbd5c8a391 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -90,7 +90,7 @@ static inline bool try_page_mte_tagging(struct page *page) } void mte_zero_clear_page_tags(void *addr); -void mte_sync_tags(pte_t pte); +void mte_sync_tags(pte_t pte, unsigned int nr_pages); void mte_copy_page_tags(void *kto, const void *kfrom); void mte_thread_init_user(void); void mte_thread_switch(struct task_struct *next); @@ -122,7 +122,7 @@ static inline bool try_page_mte_tagging(struct page *page) static inline void mte_zero_clear_page_tags(void *addr) { } -static inline void mte_sync_tags(pte_t pte) +static inline void mte_sync_tags(pte_t pte, unsigned int nr_pages) { } static inline void mte_copy_page_tags(void *kto, const void *kfrom) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7f7d9b1df4e5..68984ba9ce2a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -325,8 +325,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep, __func__, pte_val(old_pte), pte_val(pte)); } -static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) +static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages) { if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte); @@ -339,20 +338,18 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, */ if (system_supports_mte() && pte_access_permitted(pte, false) && !pte_special(pte) && pte_tagged(pte)) - mte_sync_tags(pte); - - __check_safe_pte_update(mm, ptep, pte); - - set_pte(ptep, pte); + mte_sync_tags(pte, nr_pages); } static inline void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned int nr) { page_table_check_ptes_set(mm, ptep, pte, nr); + __sync_cache_and_tags(pte, nr); for (;;) { - __set_pte_at(mm, addr, ptep, pte); + __check_safe_pte_update(mm, ptep, pte); + set_pte(ptep, pte); if (--nr == 0) break; ptep++; @@ -531,18 +528,28 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) +static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned int nr) +{ + __sync_cache_and_tags(pte, nr); + __check_safe_pte_update(mm, ptep, pte); + set_pte(ptep, pte); +} + static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { page_table_check_pmd_set(mm, pmdp, pmd); - return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)); + return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd), + PMD_SIZE >> PAGE_SHIFT); } static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud) { page_table_check_pud_set(mm, pudp, pud); - return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud)); + return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud), + PUD_SIZE >> PAGE_SHIFT); } #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 4edecaac8f91..2fb5e7a7a4d5 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -35,10 +35,10 @@ DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode); #endif -void mte_sync_tags(pte_t pte) +void mte_sync_tags(pte_t pte, unsigned int nr_pages) { struct page *page = pte_page(pte); - long i, nr_pages = compound_nr(page); + unsigned int i; /* if PG_mte_tagged is set, tags have already been initialised */ for (i = 0; i < nr_pages; i++, page++) { From dba2ff4922b3cf573c25c3886e869258a6076030 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 17 Oct 2023 11:57:55 +0100 Subject: [PATCH 15/22] arm64: Mark the 'addr' argument to set_ptes() and __set_pte_at() as unused This argument is not used by the arm64 implementation. Mark it as __always_unused and also remove the unnecessary 'addr' increment in set_ptes(). Signed-off-by: Catalin Marinas Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310140531.BQQwt3NQ-lkp@intel.com/ Cc: Will Deacon Tested-by: Ryan Roberts Link: https://lore.kernel.org/r/ZS6EvMiJ0QF5INkv@arm.com --- arch/arm64/include/asm/pgtable.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 68984ba9ce2a..b19a8aee684c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -341,8 +341,9 @@ static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages) mte_sync_tags(pte, nr_pages); } -static inline void set_ptes(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte, unsigned int nr) +static inline void set_ptes(struct mm_struct *mm, + unsigned long __always_unused addr, + pte_t *ptep, pte_t pte, unsigned int nr) { page_table_check_ptes_set(mm, ptep, pte, nr); __sync_cache_and_tags(pte, nr); @@ -353,7 +354,6 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, if (--nr == 0) break; ptep++; - addr += PAGE_SIZE; pte_val(pte) += PAGE_SIZE; } } @@ -528,7 +528,8 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) -static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, +static inline void __set_pte_at(struct mm_struct *mm, + unsigned long __always_unused addr, pte_t *ptep, pte_t pte, unsigned int nr) { __sync_cache_and_tags(pte, nr); From 0899a6278a86b32e0b9d55f68f265519306a5be0 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 17 Oct 2023 10:50:36 +1000 Subject: [PATCH 16/22] arm64: Remove system_uses_lse_atomics() There are two variants of system_uses_lse_atomics(), depending on CONFIG_ARM64_LSE_ATOMICS. The function isn't called anywhere when CONFIG_ARM64_LSE_ATOMICS is disabled. It can be directly replaced by alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS) when the kernel option is enabled. No need to keep system_uses_lse_atomics() and just remove it. Signed-off-by: Gavin Shan Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20231017005036.334067-1-gshan@redhat.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/lse.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index cbbcdc35c4cd..3129a5819d0e 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -16,14 +16,9 @@ #include #include -static __always_inline bool system_uses_lse_atomics(void) -{ - return alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS); -} - #define __lse_ll_sc_body(op, ...) \ ({ \ - system_uses_lse_atomics() ? \ + alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS) ? \ __lse_##op(__VA_ARGS__) : \ __ll_sc_##op(__VA_ARGS__); \ }) @@ -34,8 +29,6 @@ static __always_inline bool system_uses_lse_atomics(void) #else /* CONFIG_ARM64_LSE_ATOMICS */ -static inline bool system_uses_lse_atomics(void) { return false; } - #define __lse_ll_sc_body(op, ...) __ll_sc_##op(__VA_ARGS__) #define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc From 851354cbd12bb9500909733c3d4054306f61df87 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 16 Oct 2023 16:31:27 +0100 Subject: [PATCH 17/22] clocksource/drivers/arm_arch_timer: limit XGene-1 workaround The AppliedMicro XGene-1 CPU has an erratum where the timer condition would only consider TVAL, not CVAL. We currently apply a workaround when seeing the PartNum field of MIDR_EL1 being 0x000, under the assumption that this would match only the XGene-1 CPU model. However even the Ampere eMAG (aka XGene-3) uses that same part number, and only differs in the "Variant" and "Revision" fields: XGene-1's MIDR is 0x500f0000, our eMAG reports 0x503f0002. Experiments show the latter doesn't show the faulty behaviour. Increase the specificity of the check to only consider partnum 0x000 and variant 0x00, to exclude the Ampere eMAG. Fixes: 012f18850452 ("clocksource/drivers/arm_arch_timer: Work around broken CVAL implementations") Reported-by: Ross Burton Signed-off-by: Andre Przywara Acked-by: Marc Zyngier Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20231016153127.116101-1-andre.przywara@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 3 ++- arch/arm64/kvm/guest.c | 2 +- drivers/clocksource/arm_arch_timer.c | 5 +++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 5f6f84837a49..818ea1c83b50 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -85,7 +85,8 @@ #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B -#define APM_CPU_PART_POTENZA 0x000 +#define APM_CPU_PART_XGENE 0x000 +#define APM_CPU_VAR_POTENZA 0x00 #define CAVIUM_CPU_PART_THUNDERX 0x0A1 #define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 95f6945c4432..a1710e5fa72b 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -874,7 +874,7 @@ u32 __attribute_const__ kvm_target_cpu(void) break; case ARM_CPU_IMP_APM: switch (part_number) { - case APM_CPU_PART_POTENZA: + case APM_CPU_PART_XGENE: return KVM_ARM_TARGET_XGENE_POTENZA; } break; diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 7dd2c615bce2..071b04f1ee73 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -836,8 +836,9 @@ static u64 __arch_timer_check_delta(void) * Note that TVAL is signed, thus has only 31 of its * 32 bits to express magnitude. */ - MIDR_ALL_VERSIONS(MIDR_CPU_MODEL(ARM_CPU_IMP_APM, - APM_CPU_PART_POTENZA)), + MIDR_REV_RANGE(MIDR_CPU_MODEL(ARM_CPU_IMP_APM, + APM_CPU_PART_XGENE), + APM_CPU_VAR_POTENZA, 0x0, 0xf), {}, }; From 23b727dc2092d4b0604347ff3f0a75705078058a Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Tue, 17 Oct 2023 00:23:20 -0500 Subject: [PATCH 18/22] arm64: cpufeature: Display the set of cores with a feature The AMU feature can be enabled on a subset of the cores in a system. Because of that, it prints a message for each core as it is detected. This becomes tedious when there are hundreds of cores. Instead, for CPU features which can be enabled on a subset of the present cores, lets wait until update_cpu_capabilities() and print the subset of cores the feature was enabled on. Signed-off-by: Jeremy Linton Reviewed-by: Ionela Voinescu Tested-by: Ionela Voinescu Reviewed-by: Punit Agrawal Tested-by: Punit Agrawal Link: https://lore.kernel.org/r/20231017052322.1211099-2-jeremy.linton@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 2 ++ arch/arm64/kernel/cpufeature.c | 22 +++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 5bba39376055..19b4d001d845 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -23,6 +23,7 @@ #include #include #include +#include /* * CPU feature register tracking @@ -380,6 +381,7 @@ struct arm64_cpu_capabilities { * method is robust against being called multiple times. */ const struct arm64_cpu_capabilities *match_list; + const struct cpumask *cpus; }; static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 444a73c2e638..2dd695fc3472 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1944,8 +1944,6 @@ int get_cpu_with_amu_feat(void) static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap) { if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) { - pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n", - smp_processor_id()); cpumask_set_cpu(smp_processor_id(), &amu_cpus); /* 0 reference values signal broken/disabled counters */ @@ -2405,16 +2403,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #endif /* CONFIG_ARM64_RAS_EXTN */ #ifdef CONFIG_ARM64_AMU_EXTN { - /* - * The feature is enabled by default if CONFIG_ARM64_AMU_EXTN=y. - * Therefore, don't provide .desc as we don't want the detection - * message to be shown until at least one CPU is detected to - * support the feature. - */ + .desc = "Activity Monitors Unit (AMU)", .capability = ARM64_HAS_AMU_EXTN, .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_amu, .cpu_enable = cpu_amu_enable, + .cpus = &amu_cpus, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, AMU, IMP) }, #endif /* CONFIG_ARM64_AMU_EXTN */ @@ -2981,7 +2975,7 @@ static void update_cpu_capabilities(u16 scope_mask) !caps->matches(caps, cpucap_default_scope(caps))) continue; - if (caps->desc) + if (caps->desc && !caps->cpus) pr_info("detected: %s\n", caps->desc); __set_bit(caps->capability, system_cpucaps); @@ -3330,6 +3324,7 @@ unsigned long cpu_get_elf_hwcap2(void) static void __init setup_system_capabilities(void) { + int i; /* * We have finalised the system-wide safe feature * registers, finalise the capabilities that depend @@ -3338,6 +3333,15 @@ static void __init setup_system_capabilities(void) */ update_cpu_capabilities(SCOPE_SYSTEM); enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU); + + for (i = 0; i < ARM64_NCAPS; i++) { + const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i]; + + if (caps && caps->cpus && caps->desc && + cpumask_any(caps->cpus) < nr_cpu_ids) + pr_info("detected: %s on CPU%*pbl\n", + caps->desc, cpumask_pr_args(caps->cpus)); + } } void __init setup_cpu_features(void) From 04d402a453c37f74487d690d11caeccd59fdeb9f Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Tue, 17 Oct 2023 00:23:21 -0500 Subject: [PATCH 19/22] arm64: cpufeature: Change DBM to display enabled cores Now that we have the ability to display the list of cores with a feature when its selectivly enabled, lets convert DBM to use that as well. Signed-off-by: Jeremy Linton Link: https://lore.kernel.org/r/20231017052322.1211099-3-jeremy.linton@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 2dd695fc3472..b7b67bac0e60 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1848,6 +1848,8 @@ static int __init parse_kpti(char *str) early_param("kpti", parse_kpti); #ifdef CONFIG_ARM64_HW_AFDBM +static struct cpumask dbm_cpus __read_mostly; + static inline void __cpu_enable_hw_dbm(void) { u64 tcr = read_sysreg(tcr_el1) | TCR_HD; @@ -1883,35 +1885,22 @@ static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap) static void cpu_enable_hw_dbm(struct arm64_cpu_capabilities const *cap) { - if (cpu_can_use_dbm(cap)) + if (cpu_can_use_dbm(cap)) { __cpu_enable_hw_dbm(); + cpumask_set_cpu(smp_processor_id(), &dbm_cpus); + } } static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap, int __unused) { - static bool detected = false; /* * DBM is a non-conflicting feature. i.e, the kernel can safely * run a mix of CPUs with and without the feature. So, we * unconditionally enable the capability to allow any late CPU * to use the feature. We only enable the control bits on the - * CPU, if it actually supports. - * - * We have to make sure we print the "feature" detection only - * when at least one CPU actually uses it. So check if this CPU - * can actually use it and print the message exactly once. - * - * This is safe as all CPUs (including secondary CPUs - due to the - * LOCAL_CPU scope - and the hotplugged CPUs - via verification) - * goes through the "matches" check exactly once. Also if a CPU - * matches the criteria, it is guaranteed that the CPU will turn - * the DBM on, as the capability is unconditionally enabled. + * CPU, if it is supported. */ - if (!detected && cpu_can_use_dbm(cap)) { - detected = true; - pr_info("detected: Hardware dirty bit management\n"); - } return true; } @@ -2448,18 +2437,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { }, #ifdef CONFIG_ARM64_HW_AFDBM { - /* - * Since we turn this on always, we don't want the user to - * think that the feature is available when it may not be. - * So hide the description. - * - * .desc = "Hardware pagetable Dirty Bit Management", - * - */ + .desc = "Hardware dirty bit management", .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .capability = ARM64_HW_DBM, .matches = has_hw_dbm, .cpu_enable = cpu_enable_hw_dbm, + .cpus = &dbm_cpus, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM) }, #endif From c54e52f84d7aa590e90e1f73f462517ac40051e1 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 23 Oct 2023 14:35:03 +0100 Subject: [PATCH 20/22] arm64, irqchip/gic-v3, ACPI: Move MADT GICC enabled check into a helper ACPI, irqchip and the architecture code all inspect the MADT enabled bit for a GICC entry in the MADT. The addition of an 'online capable' bit means all these sites need updating. Move the current checks behind a helper to make future updates easier. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Acked-by: "Rafael J. Wysocki" Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/E1quv5D-00AeNJ-U8@rmk-PC.armlinux.org.uk Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 2 +- drivers/acpi/processor_core.c | 2 +- drivers/irqchip/irq-gic-v3.c | 10 ++++------ include/linux/acpi.h | 5 +++++ 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 960b98b43506..8c8f55721786 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -520,7 +520,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) { u64 hwid = processor->arm_mpidr; - if (!(processor->flags & ACPI_MADT_ENABLED)) { + if (!acpi_gicc_is_usable(processor)) { pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); return; } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 7dd6dbaa98c3..b203cfe28550 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -90,7 +90,7 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry, struct acpi_madt_generic_interrupt *gicc = container_of(entry, struct acpi_madt_generic_interrupt, header); - if (!(gicc->flags & ACPI_MADT_ENABLED)) + if (!acpi_gicc_is_usable(gicc)) return -ENODEV; /* device_declaration means Device object in DSDT, in the diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index eedfa8e9f077..72d3cdebdad1 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -2367,8 +2367,7 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header, u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2; void __iomem *redist_base; - /* GICC entry which has !ACPI_MADT_ENABLED is not unusable so skip */ - if (!(gicc->flags & ACPI_MADT_ENABLED)) + if (!acpi_gicc_is_usable(gicc)) return 0; redist_base = ioremap(gicc->gicr_base_address, size); @@ -2418,7 +2417,7 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header, * If GICC is enabled and has valid gicr base address, then it means * GICR base is presented via GICC */ - if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) { + if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address) { acpi_data.enabled_rdists++; return 0; } @@ -2427,7 +2426,7 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header, * It's perfectly valid firmware can pass disabled GICC entry, driver * should not treat as errors, skip the entry instead of probe fail. */ - if (!(gicc->flags & ACPI_MADT_ENABLED)) + if (!acpi_gicc_is_usable(gicc)) return 0; return -ENODEV; @@ -2486,8 +2485,7 @@ static int __init gic_acpi_parse_virt_madt_gicc(union acpi_subtable_headers *hea int maint_irq_mode; static int first_madt = true; - /* Skip unusable CPUs */ - if (!(gicc->flags & ACPI_MADT_ENABLED)) + if (!acpi_gicc_is_usable(gicc)) return 0; maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ? diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a73246c3c35e..fae2aa0b5e8b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -256,6 +256,11 @@ acpi_table_parse_cedt(enum acpi_cedt_type id, int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); +static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc) +{ + return gicc->flags & ACPI_MADT_ENABLED; +} + /* the following numa functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); From d35686444fc80950c731e33a2f6ad4a55822be9b Mon Sep 17 00:00:00 2001 From: Maria Yu Date: Tue, 24 Oct 2023 09:09:54 +0800 Subject: [PATCH 21/22] arm64: module: Fix PLT counting when CONFIG_RANDOMIZE_BASE=n The counting of module PLTs has been broken when CONFIG_RANDOMIZE_BASE=n since commit: 3e35d303ab7d22c4 ("arm64: module: rework module VA range selection") Prior to that commit, when CONFIG_RANDOMIZE_BASE=n, the kernel image and all modules were placed within a 128M region, and no PLTs were necessary for B or BL. Hence count_plts() and partition_branch_plt_relas() skipped handling B and BL when CONFIG_RANDOMIZE_BASE=n. After that commit, modules can be placed anywhere within a 2G window regardless of CONFIG_RANDOMIZE_BASE, and hence PLTs may be necessary for B and BL even when CONFIG_RANDOMIZE_BASE=n. Unfortunately that commit failed to update count_plts() and partition_branch_plt_relas() accordingly. Due to this, module_emit_plt_entry() may fail if an insufficient number of PLT entries have been reserved, resulting in modules failing to load with -ENOEXEC. Fix this by counting PLTs regardless of CONFIG_RANDOMIZE_BASE in count_plts() and partition_branch_plt_relas(). Fixes: 3e35d303ab7d ("arm64: module: rework module VA range selection") Signed-off-by: Maria Yu Cc: # 6.5.x Acked-by: Ard Biesheuvel Fixes: 3e35d303ab7d ("arm64: module: rework module VA range selection") Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20231024010954.6768-1-quic_aiquny@quicinc.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/module-plts.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index bd69a4e7cd60..79200f21e123 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -167,9 +167,6 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, switch (ELF64_R_TYPE(rela[i].r_info)) { case R_AARCH64_JUMP26: case R_AARCH64_CALL26: - if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) - break; - /* * We only have to consider branch targets that resolve * to symbols that are defined in a different section. @@ -269,9 +266,6 @@ static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela, { int i = 0, j = numrels - 1; - if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) - return 0; - while (i < j) { if (branch_rela_needs_plt(syms, &rela[i], dstidx)) i++; From 146a15b873353f8ac28dc281c139ff611a3c4848 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 25 Oct 2023 10:21:28 -0700 Subject: [PATCH 22/22] arm64: Restrict CPU_BIG_ENDIAN to GNU as or LLVM IAS 15.x or newer Prior to LLVM 15.0.0, LLVM's integrated assembler would incorrectly byte-swap NOP when compiling for big-endian, and the resulting series of bytes happened to match the encoding of FNMADD S21, S30, S0, S0. This went unnoticed until commit: 34f66c4c4d5518c1 ("arm64: Use a positive cpucap for FP/SIMD") Prior to that commit, the kernel would always enable the use of FPSIMD early in boot when __cpu_setup() initialized CPACR_EL1, and so usage of FNMADD within the kernel was not detected, but could result in the corruption of user or kernel FPSIMD state. After that commit, the instructions happen to trap during boot prior to FPSIMD being detected and enabled, e.g. | Unhandled 64-bit el1h sync exception on CPU0, ESR 0x000000001fe00000 -- ASIMD | CPU: 0 PID: 0 Comm: swapper Not tainted 6.6.0-rc3-00013-g34f66c4c4d55 #1 | Hardware name: linux,dummy-virt (DT) | pstate: 400000c9 (nZcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : __pi_strcmp+0x1c/0x150 | lr : populate_properties+0xe4/0x254 | sp : ffffd014173d3ad0 | x29: ffffd014173d3af0 x28: fffffbfffddffcb8 x27: 0000000000000000 | x26: 0000000000000058 x25: fffffbfffddfe054 x24: 0000000000000008 | x23: fffffbfffddfe000 x22: fffffbfffddfe000 x21: fffffbfffddfe044 | x20: ffffd014173d3b70 x19: 0000000000000001 x18: 0000000000000005 | x17: 0000000000000010 x16: 0000000000000000 x15: 00000000413e7000 | x14: 0000000000000000 x13: 0000000000001bcc x12: 0000000000000000 | x11: 00000000d00dfeed x10: ffffd414193f2cd0 x9 : 0000000000000000 | x8 : 0101010101010101 x7 : ffffffffffffffc0 x6 : 0000000000000000 | x5 : 0000000000000000 x4 : 0101010101010101 x3 : 000000000000002a | x2 : 0000000000000001 x1 : ffffd014171f2988 x0 : fffffbfffddffcb8 | Kernel panic - not syncing: Unhandled exception | CPU: 0 PID: 0 Comm: swapper Not tainted 6.6.0-rc3-00013-g34f66c4c4d55 #1 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0xec/0x108 | show_stack+0x18/0x2c | dump_stack_lvl+0x50/0x68 | dump_stack+0x18/0x24 | panic+0x13c/0x340 | el1t_64_irq_handler+0x0/0x1c | el1_abort+0x0/0x5c | el1h_64_sync+0x64/0x68 | __pi_strcmp+0x1c/0x150 | unflatten_dt_nodes+0x1e8/0x2d8 | __unflatten_device_tree+0x5c/0x15c | unflatten_device_tree+0x38/0x50 | setup_arch+0x164/0x1e0 | start_kernel+0x64/0x38c | __primary_switched+0xbc/0xc4 Restrict CONFIG_CPU_BIG_ENDIAN to a known good assembler, which is either GNU as or LLVM's IAS 15.0.0 and newer, which contains the linked commit. Closes: https://github.com/ClangBuiltLinux/linux/issues/1948 Link: https://github.com/llvm/llvm-project/commit/1379b150991f70a5782e9a143c2ba5308da1161c Signed-off-by: Nathan Chancellor Cc: stable@vger.kernel.org Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20231025-disable-arm64-be-ias-b4-llvm-15-v1-1-b25263ed8b23@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b10515c0200b..cacc67121adb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1355,6 +1355,8 @@ choice config CPU_BIG_ENDIAN bool "Build big-endian kernel" depends on !LD_IS_LLD || LLD_VERSION >= 130000 + # https://github.com/llvm/llvm-project/commit/1379b150991f70a5782e9a143c2ba5308da1161c + depends on AS_IS_GNU || AS_VERSION >= 150000 help Say Y if you plan on running a kernel with a big-endian userspace.