From 4510bffb4d0246cdcc1f14c7367c026b807a862d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 May 2022 14:17:32 +0100 Subject: [PATCH 1/2] arch: make TRACE_IRQFLAGS_NMI_SUPPORT generic On most architectures, IRQ flag tracing is disabled in NMI context, and architectures need to define and select TRACE_IRQFLAGS_NMI_SUPPORT in order to enable this. Commit: 859d069ee1ddd878 ("lockdep: Prepare for NMI IRQ state tracking") Permitted IRQ flag tracing in NMI context, allowing lockdep to work in NMI context where an architecture had suitable entry logic. At the time, most architectures did not have such suitable entry logic, and this broke lockdep on such architectures. Thus, this was partially disabled in commit: ed00495333ccc80f ("locking/lockdep: Fix TRACE_IRQFLAGS vs. NMIs") ... with architectures needing to select TRACE_IRQFLAGS_NMI_SUPPORT to enable IRQ flag tracing in NMI context. Currently TRACE_IRQFLAGS_NMI_SUPPORT is defined under arch/x86/Kconfig.debug. Move it to arch/Kconfig so architectures can select it without having to provide their own definition. Since the regular TRACE_IRQFLAGS_SUPPORT is selected by arch/x86/Kconfig, the select of TRACE_IRQFLAGS_NMI_SUPPORT is moved there too. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Ingo Molnar Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Cc: Will Deacon Link: https://lore.kernel.org/r/20220511131733.4074499-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/Kconfig | 3 +++ arch/x86/Kconfig | 1 + arch/x86/Kconfig.debug | 3 --- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index fcf9a41a4ef5..52eaa16d9b79 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -223,6 +223,9 @@ config HAVE_FUNCTION_DESCRIPTORS config TRACE_IRQFLAGS_SUPPORT bool +config TRACE_IRQFLAGS_NMI_SUPPORT + bool + # # An arch should select this if it provides all these things: # diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be0b95e51df6..e31840edd90e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -277,6 +277,7 @@ config X86 select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT + select TRACE_IRQFLAGS_NMI_SUPPORT select USER_STACKTRACE_SUPPORT select VIRT_TO_BUS select HAVE_ARCH_KCSAN if X86_64 diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 340399f69954..bdfe08f1a930 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -config TRACE_IRQFLAGS_NMI_SUPPORT - def_bool y - config EARLY_PRINTK_USB bool From 3381da254fab37ba08c4b7c4f19b4ee28b1a27ec Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 May 2022 14:17:33 +0100 Subject: [PATCH 2/2] arm64: select TRACE_IRQFLAGS_NMI_SUPPORT Due to an oversight, on arm64 lockdep IRQ state tracking doesn't work as intended in NMI context. This demonstrably results in bogus warnings from lockdep, and in theory could mask a variety of issues. On arm64, we've consistently tracked IRQ flag state for NMIs (and saved/restored the state of the interrupted context) since commit: f0cd5ac1e4c53cb6 ("arm64: entry: fix NMI {user, kernel}->kernel transitions") That commit fixed most lockdep issues with NMI by virtue of the save/restore of the lockdep state of the interrupted context. However, for lockdep IRQ state tracking to consistently take effect in NMI context it has been necessary to select TRACE_IRQFLAGS_NMI_SUPPORT since commit: ed00495333ccc80f ("locking/lockdep: Fix TRACE_IRQFLAGS vs. NMIs") As arm64 does not select TRACE_IRQFLAGS_NMI_SUPPORT, this means that the lockdep state can be stale in NMI context, and some uses of that state can consume stale data. When an NMI is taken arm64 entry code will call arm64_enter_nmi(). This will enter NMI context via __nmi_enter() before calling lockdep_hardirqs_off() to inform lockdep that IRQs have been masked. Where TRACE_IRQFLAGS_NMI_SUPPORT is not selected, lockdep_hardirqs_off() will not update lockdep state if called in NMI context. Thus if IRQs were enabled in the original context, lockdep will continue to believe that IRQs are enabled despite the call to lockdep_hardirqs_off(). However, the lockdep_assert_*() checks do take effect in NMI context, and will consume the stale lockdep state. If an NMI is taken from a context which had IRQs enabled, and during the handling of the NMI something calls lockdep_assert_irqs_disabled(), this will result in a spurious warning based upon the stale lockdep state. This can be seen when using perf with GICv3 pseudo-NMIs. Within the perf NMI handler we may attempt a uaccess to record the userspace callchain, and is this faults the el1_abort() call in the nested context will call exit_to_kernel_mode() when returning, which has a lockdep_assert_irqs_disabled() assertion: | # ./perf record -a -g sh | ------------[ cut here ]------------ | WARNING: CPU: 0 PID: 164 at arch/arm64/kernel/entry-common.c:73 exit_to_kernel_mode+0x118/0x1ac | Modules linked in: | CPU: 0 PID: 164 Comm: perf Not tainted 5.18.0-rc5 #1 | Hardware name: linux,dummy-virt (DT) | pstate: 004003c5 (nzcv DAIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : exit_to_kernel_mode+0x118/0x1ac | lr : el1_abort+0x80/0xbc | sp : ffff8000080039f0 | pmr_save: 000000f0 | x29: ffff8000080039f0 x28: ffff6831054e4980 x27: ffff683103adb400 | x26: 0000000000000000 x25: 0000000000000001 x24: 0000000000000001 | x23: 00000000804000c5 x22: 00000000000000c0 x21: 0000000000000001 | x20: ffffbd51e635ec44 x19: ffff800008003a60 x18: 0000000000000000 | x17: ffffaadf98d23000 x16: ffff800008004000 x15: 0000ffffd14f25c0 | x14: 0000000000000000 x13: 00000000000018eb x12: 0000000000000040 | x11: 000000000000001e x10: 000000002b820020 x9 : 0000000100110000 | x8 : 000000000045cac0 x7 : 0000ffffd14f25c0 x6 : ffffbd51e639b000 | x5 : 00000000000003e5 x4 : ffffbd51e58543b0 x3 : 0000000000000001 | x2 : ffffaadf98d23000 x1 : ffff6831054e4980 x0 : 0000000100110000 | Call trace: | exit_to_kernel_mode+0x118/0x1ac | el1_abort+0x80/0xbc | el1h_64_sync_handler+0xa4/0xd0 | el1h_64_sync+0x74/0x78 | __arch_copy_from_user+0xa4/0x230 | get_perf_callchain+0x134/0x1e4 | perf_callchain+0x7c/0xa0 | perf_prepare_sample+0x414/0x660 | perf_event_output_forward+0x80/0x180 | __perf_event_overflow+0x70/0x13c | perf_event_overflow+0x1c/0x30 | armv8pmu_handle_irq+0xe8/0x160 | armpmu_dispatch_irq+0x2c/0x70 | handle_percpu_devid_fasteoi_nmi+0x7c/0xbc | generic_handle_domain_nmi+0x3c/0x60 | gic_handle_irq+0x1dc/0x310 | call_on_irq_stack+0x2c/0x54 | do_interrupt_handler+0x80/0x94 | el1_interrupt+0xb0/0xe4 | el1h_64_irq_handler+0x18/0x24 | el1h_64_irq+0x74/0x78 | lockdep_hardirqs_off+0x50/0x120 | trace_hardirqs_off+0x38/0x214 | _raw_spin_lock_irq+0x98/0xa0 | pipe_read+0x1f8/0x404 | new_sync_read+0x140/0x150 | vfs_read+0x190/0x1dc | ksys_read+0xdc/0xfc | __arm64_sys_read+0x20/0x30 | invoke_syscall+0x48/0x114 | el0_svc_common.constprop.0+0x158/0x17c | do_el0_svc+0x28/0x90 | el0_svc+0x60/0x150 | el0t_64_sync_handler+0xa4/0x130 | el0t_64_sync+0x19c/0x1a0 | irq event stamp: 483 | hardirqs last enabled at (483): [] _raw_spin_unlock_irqrestore+0xa4/0xb0 | hardirqs last disabled at (482): [] _raw_spin_lock_irqsave+0xb0/0xb4 | softirqs last enabled at (468): [] put_cpu_fpsimd_context+0x28/0x70 | softirqs last disabled at (466): [] get_cpu_fpsimd_context+0x0/0x5c | ---[ end trace 0000000000000000 ]--- Note that as lockdep_assert_irqs_disabled() uses WARN_ON_ONCE(), and this uses a BRK, the warning is logged with the real PSTATE at the time of the warning, which clearly has DAIF.I set, meaning IRQs (and pseudo-NMIs) were definitely masked and the warning is spurious. Fix this by selecting TRACE_IRQFLAGS_NMI_SUPPORT such that the existing entry tracking takes effect, as we had originally intended when the arm64 entry code was fixed for transitions to/from NMI. Arguably the lockdep_assert_*() functions should have the same NMI checks as the rest of the code to prevent spurious warnings when TRACE_IRQFLAGS_NMI_SUPPORT is not selected, but the real fix for any architecture is to explicitly handle the transitions to/from NMI in the entry code. Fixes: f0cd5ac1e4c5 ("arm64: entry: fix NMI {user, kernel}->kernel transitions") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Ingo Molnar Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Cc: Will Deacon Link: https://lore.kernel.org/r/20220511131733.4074499-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1652a9800ebe..3e0477b2dbf1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -226,6 +226,7 @@ config ARM64 select THREAD_INFO_IN_TASK select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD select TRACE_IRQFLAGS_SUPPORT + select TRACE_IRQFLAGS_NMI_SUPPORT help ARM 64-bit (AArch64) Linux support.