From adb4907007445ab9d392f4ce398cd7fa5d26612b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 15 Apr 2016 12:11:21 +0200 Subject: [PATCH 1/4] arm64: fix invalidation of wrong __early_cpu_boot_status cacheline In head.S, the str_l macro, which takes a source register, a symbol name and a temp register, is used to store a status value to the variable __early_cpu_boot_status. Subsequently, the value of the temp register is reused to invalidate any cachelines covering this variable. However, since str_l resolves to adrp \tmp, \sym str \src, [\tmp, :lo12:\sym] the temp register never actually holds the address of the variable but only of the 4 KB window that covers it, and reusing it leads to the wrong cacheline being invalidated. So instead, take the address explicitly before doing the store, and reuse that value to perform the cache invalidation. Fixes: bb9052744f4b ("arm64: Handle early CPU boot failures") Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Acked-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/kernel/head.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4203d5f257bc..80a709aa211a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -734,7 +734,8 @@ ENDPROC(__secondary_switched) .macro update_early_cpu_boot_status status, tmp1, tmp2 mov \tmp2, #\status - str_l \tmp2, __early_cpu_boot_status, \tmp1 + adr_l \tmp1, __early_cpu_boot_status + str \tmp2, [\tmp1] dmb sy dc ivac, \tmp1 // Invalidate potentially stale cache line .endm From 2fee7d5b08b6419bb59de9e875d895e3a6e7bf5a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 20 Apr 2016 10:23:31 +0900 Subject: [PATCH 2/4] arm64: spin-table: add missing of_node_put() Since of_get_cpu_node() increments refcount, the node should be put. Signed-off-by: Masahiro Yamada Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp_spin_table.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index aef3605a8c47..18a71bcd26ee 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -52,6 +52,7 @@ static void write_pen_release(u64 val) static int smp_spin_table_cpu_init(unsigned int cpu) { struct device_node *dn; + int ret; dn = of_get_cpu_node(cpu, NULL); if (!dn) @@ -60,15 +61,15 @@ static int smp_spin_table_cpu_init(unsigned int cpu) /* * Determine the address from which the CPU is polling. */ - if (of_property_read_u64(dn, "cpu-release-addr", - &cpu_release_addr[cpu])) { + ret = of_property_read_u64(dn, "cpu-release-addr", + &cpu_release_addr[cpu]); + if (ret) pr_err("CPU %d: missing or invalid cpu-release-addr property\n", cpu); - return -1; - } + of_node_put(dn); - return 0; + return ret; } static int smp_spin_table_cpu_prepare(unsigned int cpu) From cbcc72e037b8a3eb1fad3c1ae22021df21c97a51 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 21 Apr 2016 10:24:34 +0100 Subject: [PATCH 3/4] drivers/perf: arm-pmu: fix RCU usage on pmu resume from low-power Commit da4e4f18afe0 ("drivers/perf: arm_pmu: implement CPU_PM notifier") added code in the arm perf infrastructure that allows the kernel to save/restore perf counters whenever the CPU enters a low-power state. The kernel saves/restores the counters for each active event through the armpmu_{stop/start} ARM pmu API, so that the low-power state enter/exit cycle is emulated through pmu start/stop operations for each event in use. However, calling armpmu_start() for each active event on power up executes code that requires RCU locking (perf_event_update_userpage()) to be functional, so, given that the core may call the CPU_PM notifiers while running the idle thread in an quiescent RCU state this is not allowed as detected through the following splat when kernel is run with CONFIG_PROVE_LOCKING enabled: [ 49.293286] [ 49.294761] =============================== [ 49.298895] [ INFO: suspicious RCU usage. ] [ 49.303031] 4.6.0-rc3+ #421 Not tainted [ 49.306821] ------------------------------- [ 49.310956] include/linux/rcupdate.h:872 rcu_read_lock() used illegally while idle! [ 49.318530] [ 49.318530] other info that might help us debug this: [ 49.318530] [ 49.326451] [ 49.326451] RCU used illegally from idle CPU! [ 49.326451] rcu_scheduler_active = 1, debug_locks = 0 [ 49.337209] RCU used illegally from extended quiescent state! [ 49.342892] 2 locks held by swapper/2/0: [ 49.346768] #0: (cpu_pm_notifier_lock){......}, at: [] cpu_pm_exit+0x18/0x80 [ 49.355492] #1: (rcu_read_lock){......}, at: [] perf_event_update_userpage+0x0/0x260 This patch wraps the armpmu_start() call (that indirectly calls perf_event_update_userpage()) on CPU_PM notifier power state exit (or failed entry) within the RCU_NONIDLE() macro so that the RCU subsystem is made aware the calling cpu is not idle from an RCU perspective for the armpmu_start() call duration, therefore fixing the issue. Fixes: da4e4f18afe0 ("drivers/perf: arm_pmu: implement CPU_PM notifier") Signed-off-by: Lorenzo Pieralisi Reported-by: James Morse Suggested-by: Kevin Hilman Cc: Ashwin Chaugule Cc: Kevin Hilman Cc: Sudeep Holla Cc: Daniel Lezcano Cc: Mathieu Poirier Acked-by: Mark Rutland Acked-by: Paul E. McKenney Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- drivers/perf/arm_pmu.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 32346b5a8a11..f70090897fdf 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -737,8 +737,19 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) break; case CPU_PM_EXIT: case CPU_PM_ENTER_FAILED: - /* Restore and enable the counter */ - armpmu_start(event, PERF_EF_RELOAD); + /* + * Restore and enable the counter. + * armpmu_start() indirectly calls + * + * perf_event_update_userpage() + * + * that requires RCU read locking to be functional, + * wrap the call within RCU_NONIDLE to make the + * RCU subsystem aware this cpu is not idle from + * an RCU perspective for the armpmu_start() call + * duration. + */ + RCU_NONIDLE(armpmu_start(event, PERF_EF_RELOAD)); break; default: break; From 882416c1e4d1c9d4c7b50e0f5c3fa9ef043cd710 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 18 Apr 2016 18:57:26 +0100 Subject: [PATCH 4/4] arm64: Fix EL1/EL2 early init inconsistencies with VHE When using the Virtualisation Host Extensions, EL1 is not used in the host and requires no separate configuration. In addition, with VHE enabled, non-hyp-specific EL2 configuration that does not need to be done early will be done anyway in __cpu_setup via the _EL1 system register aliases. In particular, the layout and definition of CPTR_EL2 are changed by enabling VHE so that they resemble CPACR_EL1, so existing code to initialise CPTR_EL2 becomes architecturally wrong in this case. This patch simply skips the affected initialisation code in the non-VHE case. Signed-off-by: Dave Martin Reviewed-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/head.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 80a709aa211a..85da0f599cd6 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -588,6 +588,15 @@ set_hcr: msr vpidr_el2, x0 msr vmpidr_el2, x1 + /* + * When VHE is not in use, early init of EL2 and EL1 needs to be + * done here. + * When VHE _is_ in use, EL1 will not be used in the host and + * requires no configuration, and all non-hyp-specific EL2 setup + * will be done via the _EL1 system register aliases in __cpu_setup. + */ + cbnz x2, 1f + /* sctlr_el1 */ mov x0, #0x0800 // Set/clear RES{1,0} bits CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems @@ -597,6 +606,7 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems /* Coprocessor traps. */ mov x0, #0x33ff msr cptr_el2, x0 // Disable copro. traps to EL2 +1: #ifdef CONFIG_COMPAT msr hstr_el2, xzr // Disable CP15 traps to EL2