Merge branch kvm-arm64/nvhe-stacktrace into kvmarm-master/next

* kvm-arm64/nvhe-stacktrace: (27 commits)
  : .
  : Add an overflow stack to the nVHE EL2 code, allowing
  : the implementation of an unwinder, courtesy of
  : Kalesh Singh. From the cover letter (slightly edited):
  :
  : "nVHE has two modes of operation: protected (pKVM) and unprotected
  : (conventional nVHE). Depending on the mode, a slightly different approach
  : is used to dump the hypervisor stacktrace but the core unwinding logic
  : remains the same.
  :
  : * Protected nVHE (pKVM) stacktraces:
  :
  : In protected nVHE mode, the host cannot directly access hypervisor memory.
  :
  : The hypervisor stack unwinding happens in EL2 and is made accessible to
  : the host via a shared buffer. Symbolizing and printing the stacktrace
  : addresses is delegated to the host and happens in EL1.
  :
  : * Non-protected (Conventional) nVHE stacktraces:
  :
  : In non-protected mode, the host is able to directly access the hypervisor
  : stack pages.
  :
  : The hypervisor stack unwinding and dumping of the stacktrace is performed
  : by the host in EL1, as this avoids the memory overhead of setting up
  : shared buffers between the host and hypervisor."
  :
  : Additional patches from Oliver Upton and Marc Zyngier, tidying up
  : the initial series.
  : .
  arm64: Update 'unwinder howto'
  KVM: arm64: Don't open code ARRAY_SIZE()
  KVM: arm64: Move nVHE-only helpers into kvm/stacktrace.c
  KVM: arm64: Make unwind()/on_accessible_stack() per-unwinder functions
  KVM: arm64: Move nVHE stacktrace unwinding into its own compilation unit
  KVM: arm64: Move PROTECTED_NVHE_STACKTRACE around
  KVM: arm64: Introduce pkvm_dump_backtrace()
  KVM: arm64: Implement protected nVHE hyp stack unwinder
  KVM: arm64: Save protected-nVHE (pKVM) hyp stacktrace
  KVM: arm64: Stub implementation of pKVM HYP stack unwinder
  KVM: arm64: Allocate shared pKVM hyp stacktrace buffers
  KVM: arm64: Add PROTECTED_NVHE_STACKTRACE Kconfig
  KVM: arm64: Introduce hyp_dump_backtrace()
  KVM: arm64: Implement non-protected nVHE hyp stack unwinder
  KVM: arm64: Prepare non-protected nVHE hypervisor stacktrace
  KVM: arm64: Stub implementation of non-protected nVHE HYP stack unwinder
  KVM: arm64: On stack overflow switch to hyp overflow_stack
  arm64: stacktrace: Add description of stacktrace/common.h
  arm64: stacktrace: Factor out common unwind()
  arm64: stacktrace: Handle frame pointer from different address spaces
  ...

Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Marc Zyngier 2022-07-27 18:33:27 +01:00
commit 0982c8d859
16 changed files with 777 additions and 172 deletions

View File

@ -176,6 +176,22 @@ struct kvm_nvhe_init_params {
unsigned long vtcr;
};
/*
* Used by the host in EL1 to dump the nVHE hypervisor backtrace on
* hyp_panic() in non-protected mode.
*
* @stack_base: hyp VA of the hyp_stack base.
* @overflow_stack_base: hyp VA of the hyp_overflow_stack base.
* @fp: hyp FP where the backtrace begins.
* @pc: hyp PC where the backtrace begins.
*/
struct kvm_nvhe_stacktrace_info {
unsigned long stack_base;
unsigned long overflow_stack_base;
unsigned long fp;
unsigned long pc;
};
/* Translate a kernel address @ptr into its equivalent linear mapping */
#define kvm_ksym_ref(ptr) \
({ \

View File

@ -113,6 +113,14 @@
#define OVERFLOW_STACK_SIZE SZ_4K
/*
* With the minimum frame size of [x29, x30], exactly half the combined
* sizes of the hyp and overflow stacks is the maximum size needed to
* save the unwinded stacktrace; plus an additional entry to delimit the
* end.
*/
#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long))
/*
* Alignment of kernel segments (e.g. .text, .data).
*

View File

@ -8,52 +8,20 @@
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/types.h>
#include <linux/llist.h>
#include <asm/memory.h>
#include <asm/pointer_auth.h>
#include <asm/ptrace.h>
#include <asm/sdei.h>
enum stack_type {
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
STACK_TYPE_IRQ,
STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
__NR_STACK_TYPES
};
struct stack_info {
unsigned long low;
unsigned long high;
enum stack_type type;
};
#include <asm/stacktrace/common.h>
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
const char *loglvl);
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
static inline bool on_stack(unsigned long sp, unsigned long size,
unsigned long low, unsigned long high,
enum stack_type type, struct stack_info *info)
{
if (!low)
return false;
if (sp < low || sp + size < sp || sp + size > high)
return false;
if (info) {
info->low = low;
info->high = high;
info->type = type;
}
return true;
}
static inline bool on_irq_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
@ -89,30 +57,4 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info) { return false; }
#endif
/*
* We can only safely access per-cpu stacks from current in a non-preemptible
* context.
*/
static inline bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
if (on_task_stack(tsk, sp, size, info))
return true;
if (tsk != current || preemptible())
return false;
if (on_irq_stack(sp, size, info))
return true;
if (on_overflow_stack(sp, size, info))
return true;
if (on_sdei_stack(sp, size, info))
return true;
return false;
}
#endif /* __ASM_STACKTRACE_H */

View File

@ -0,0 +1,199 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Common arm64 stack unwinder code.
*
* To implement a new arm64 stack unwinder:
* 1) Include this header
*
* 2) Call into unwind_next_common() from your top level unwind
* function, passing it the validation and translation callbacks
* (though the later can be NULL if no translation is required).
*
* See: arch/arm64/kernel/stacktrace.c for the reference implementation.
*
* Copyright (C) 2012 ARM Ltd.
*/
#ifndef __ASM_STACKTRACE_COMMON_H
#define __ASM_STACKTRACE_COMMON_H
#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/kprobes.h>
#include <linux/types.h>
enum stack_type {
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
STACK_TYPE_IRQ,
STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
STACK_TYPE_HYP,
__NR_STACK_TYPES
};
struct stack_info {
unsigned long low;
unsigned long high;
enum stack_type type;
};
/*
* A snapshot of a frame record or fp/lr register values, along with some
* accounting information necessary for robust unwinding.
*
* @fp: The fp value in the frame record (or the real fp)
* @pc: The lr value in the frame record (or the real lr)
*
* @stacks_done: Stacks which have been entirely unwound, for which it is no
* longer valid to unwind to.
*
* @prev_fp: The fp that pointed to this frame record, or a synthetic value
* of 0. This is used to ensure that within a stack, each
* subsequent frame record is at an increasing address.
* @prev_type: The type of stack this frame record was on, or a synthetic
* value of STACK_TYPE_UNKNOWN. This is used to detect a
* transition from one stack to another.
*
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
* associated with the most recently encountered replacement lr
* value.
*
* @task: The task being unwound.
*/
struct unwind_state {
unsigned long fp;
unsigned long pc;
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
unsigned long prev_fp;
enum stack_type prev_type;
#ifdef CONFIG_KRETPROBES
struct llist_node *kr_cur;
#endif
struct task_struct *task;
};
static inline bool on_stack(unsigned long sp, unsigned long size,
unsigned long low, unsigned long high,
enum stack_type type, struct stack_info *info)
{
if (!low)
return false;
if (sp < low || sp + size < sp || sp + size > high)
return false;
if (info) {
info->low = low;
info->high = high;
info->type = type;
}
return true;
}
static inline void unwind_init_common(struct unwind_state *state,
struct task_struct *task)
{
state->task = task;
#ifdef CONFIG_KRETPROBES
state->kr_cur = NULL;
#endif
/*
* Prime the first unwind.
*
* In unwind_next() we'll check that the FP points to a valid stack,
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
* treated as a transition to whichever stack that happens to be. The
* prev_fp value won't be used, but we set it to 0 such that it is
* definitely not an accessible stack address.
*/
bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
state->prev_fp = 0;
state->prev_type = STACK_TYPE_UNKNOWN;
}
/*
* stack_trace_translate_fp_fn() - Translates a non-kernel frame pointer to
* a kernel address.
*
* @fp: the frame pointer to be updated to its kernel address.
* @type: the stack type associated with frame pointer @fp
*
* Returns true and success and @fp is updated to the corresponding
* kernel virtual address; otherwise returns false.
*/
typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp,
enum stack_type type);
/*
* on_accessible_stack_fn() - Check whether a stack range is on any
* of the possible stacks.
*
* @tsk: task whose stack is being unwound
* @sp: stack address being checked
* @size: size of the stack range being checked
* @info: stack unwinding context
*/
typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info);
static inline int unwind_next_common(struct unwind_state *state,
struct stack_info *info,
on_accessible_stack_fn accessible,
stack_trace_translate_fp_fn translate_fp)
{
unsigned long fp = state->fp, kern_fp = fp;
struct task_struct *tsk = state->task;
if (fp & 0x7)
return -EINVAL;
if (!accessible(tsk, fp, 16, info))
return -EINVAL;
if (test_bit(info->type, state->stacks_done))
return -EINVAL;
/*
* If fp is not from the current address space perform the necessary
* translation before dereferencing it to get the next fp.
*/
if (translate_fp && !translate_fp(&kern_fp, info->type))
return -EINVAL;
/*
* As stacks grow downward, any valid record on the same stack must be
* at a strictly higher address than the prior record.
*
* Stacks can nest in several valid orders, e.g.
*
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
* HYP -> OVERFLOW
*
* ... but the nesting itself is strict. Once we transition from one
* stack to another, it's never valid to unwind back to that first
* stack.
*/
if (info->type == state->prev_type) {
if (fp <= state->prev_fp)
return -EINVAL;
} else {
__set_bit(state->prev_type, state->stacks_done);
}
/*
* Record this frame record's values and location. The prev_fp and
* prev_type are only meaningful to the next unwind_next() invocation.
*/
state->fp = READ_ONCE(*(unsigned long *)(kern_fp));
state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8));
state->prev_fp = fp;
state->prev_type = info->type;
return 0;
}
#endif /* __ASM_STACKTRACE_COMMON_H */

View File

@ -0,0 +1,55 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* KVM nVHE hypervisor stack tracing support.
*
* The unwinder implementation depends on the nVHE mode:
*
* 1) Non-protected nVHE mode - the host can directly access the
* HYP stack pages and unwind the HYP stack in EL1. This saves having
* to allocate shared buffers for the host to read the unwinded
* stacktrace.
*
* 2) pKVM (protected nVHE) mode - the host cannot directly access
* the HYP memory. The stack is unwinded in EL2 and dumped to a shared
* buffer where the host can read and print the stacktrace.
*
* Copyright (C) 2022 Google LLC
*/
#ifndef __ASM_STACKTRACE_NVHE_H
#define __ASM_STACKTRACE_NVHE_H
#include <asm/stacktrace/common.h>
/*
* kvm_nvhe_unwind_init - Start an unwind from the given nVHE HYP fp and pc
*
* @state : unwind_state to initialize
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*/
static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
unsigned long fp,
unsigned long pc)
{
unwind_init_common(state, NULL);
state->fp = fp;
state->pc = pc;
}
#ifndef __KVM_NVHE_HYPERVISOR__
/*
* Conventional (non-protected) nVHE HYP stack unwinder
*
* In non-protected mode, the unwinding is done from kernel proper context
* (by the host in EL1).
*/
DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);
#endif /* __KVM_NVHE_HYPERVISOR__ */
#endif /* __ASM_STACKTRACE_NVHE_H */

View File

@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
CFLAGS_syscall.o += -fno-stack-protector
# When KASAN is enabled, a stack trace is recorded for every alloc/free, which
# can significantly impact performance. Avoid instrumenting the stack trace
# collection code to minimize this impact.
KASAN_SANITIZE_stacktrace.o := n
# It's not safe to invoke KCOV when portions of the kernel environment aren't
# available or are out-of-sync with HW state. Since `noinstr` doesn't always
# inhibit KCOV instrumentation, disable it for the entire compilation unit.

View File

@ -7,72 +7,90 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/ftrace.h>
#include <linux/kprobes.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
#include <asm/irq.h>
#include <asm/pointer_auth.h>
#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
/*
* A snapshot of a frame record or fp/lr register values, along with some
* accounting information necessary for robust unwinding.
* Start an unwind from a pt_regs.
*
* @fp: The fp value in the frame record (or the real fp)
* @pc: The lr value in the frame record (or the real lr)
* The unwind will begin at the PC within the regs.
*
* @stacks_done: Stacks which have been entirely unwound, for which it is no
* longer valid to unwind to.
*
* @prev_fp: The fp that pointed to this frame record, or a synthetic value
* of 0. This is used to ensure that within a stack, each
* subsequent frame record is at an increasing address.
* @prev_type: The type of stack this frame record was on, or a synthetic
* value of STACK_TYPE_UNKNOWN. This is used to detect a
* transition from one stack to another.
*
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
* associated with the most recently encountered replacement lr
* value.
* The regs must be on a stack currently owned by the calling task.
*/
struct unwind_state {
unsigned long fp;
unsigned long pc;
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
unsigned long prev_fp;
enum stack_type prev_type;
#ifdef CONFIG_KRETPROBES
struct llist_node *kr_cur;
#endif
};
static notrace void unwind_init(struct unwind_state *state, unsigned long fp,
unsigned long pc)
static inline void unwind_init_from_regs(struct unwind_state *state,
struct pt_regs *regs)
{
state->fp = fp;
state->pc = pc;
#ifdef CONFIG_KRETPROBES
state->kr_cur = NULL;
#endif
unwind_init_common(state, current);
/*
* Prime the first unwind.
*
* In unwind_next() we'll check that the FP points to a valid stack,
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
* treated as a transition to whichever stack that happens to be. The
* prev_fp value won't be used, but we set it to 0 such that it is
* definitely not an accessible stack address.
*/
bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
state->prev_fp = 0;
state->prev_type = STACK_TYPE_UNKNOWN;
state->fp = regs->regs[29];
state->pc = regs->pc;
}
/*
* Start an unwind from a caller.
*
* The unwind will begin at the caller of whichever function this is inlined
* into.
*
* The function which invokes this must be noinline.
*/
static __always_inline void unwind_init_from_caller(struct unwind_state *state)
{
unwind_init_common(state, current);
state->fp = (unsigned long)__builtin_frame_address(1);
state->pc = (unsigned long)__builtin_return_address(0);
}
/*
* Start an unwind from a blocked task.
*
* The unwind will begin at the blocked tasks saved PC (i.e. the caller of
* cpu_switch_to()).
*
* The caller should ensure the task is blocked in cpu_switch_to() for the
* duration of the unwind, or the unwind will be bogus. It is never valid to
* call this for the current task.
*/
static inline void unwind_init_from_task(struct unwind_state *state,
struct task_struct *task)
{
unwind_init_common(state, task);
state->fp = thread_saved_fp(task);
state->pc = thread_saved_pc(task);
}
/*
* We can only safely access per-cpu stacks from current in a non-preemptible
* context.
*/
static bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
if (on_task_stack(tsk, sp, size, info))
return true;
if (tsk != current || preemptible())
return false;
if (on_irq_stack(sp, size, info))
return true;
if (on_overflow_stack(sp, size, info))
return true;
if (on_sdei_stack(sp, size, info))
return true;
return false;
}
NOKPROBE_SYMBOL(unwind_init);
/*
* Unwind from one frame record (A) to the next frame record (B).
@ -81,53 +99,20 @@ NOKPROBE_SYMBOL(unwind_init);
* records (e.g. a cycle), determined based on the location and fp value of A
* and the location (but not the fp value) of B.
*/
static int notrace unwind_next(struct task_struct *tsk,
struct unwind_state *state)
static int notrace unwind_next(struct unwind_state *state)
{
struct task_struct *tsk = state->task;
unsigned long fp = state->fp;
struct stack_info info;
int err;
/* Final frame; nothing to unwind */
if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
return -ENOENT;
if (fp & 0x7)
return -EINVAL;
if (!on_accessible_stack(tsk, fp, 16, &info))
return -EINVAL;
if (test_bit(info.type, state->stacks_done))
return -EINVAL;
/*
* As stacks grow downward, any valid record on the same stack must be
* at a strictly higher address than the prior record.
*
* Stacks can nest in several valid orders, e.g.
*
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
*
* ... but the nesting itself is strict. Once we transition from one
* stack to another, it's never valid to unwind back to that first
* stack.
*/
if (info.type == state->prev_type) {
if (fp <= state->prev_fp)
return -EINVAL;
} else {
set_bit(state->prev_type, state->stacks_done);
}
/*
* Record this frame record's values and location. The prev_fp and
* prev_type are only meaningful to the next unwind_next() invocation.
*/
state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
state->prev_fp = fp;
state->prev_type = info.type;
err = unwind_next_common(state, &info, on_accessible_stack, NULL);
if (err)
return err;
state->pc = ptrauth_strip_insn_pac(state->pc);
@ -157,8 +142,7 @@ static int notrace unwind_next(struct task_struct *tsk,
}
NOKPROBE_SYMBOL(unwind_next);
static void notrace unwind(struct task_struct *tsk,
struct unwind_state *state,
static void notrace unwind(struct unwind_state *state,
stack_trace_consume_fn consume_entry, void *cookie)
{
while (1) {
@ -166,7 +150,7 @@ static void notrace unwind(struct task_struct *tsk,
if (!consume_entry(cookie, state->pc))
break;
ret = unwind_next(tsk, state);
ret = unwind_next(state);
if (ret < 0)
break;
}
@ -212,15 +196,15 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
{
struct unwind_state state;
if (regs)
unwind_init(&state, regs->regs[29], regs->pc);
else if (task == current)
unwind_init(&state,
(unsigned long)__builtin_frame_address(1),
(unsigned long)__builtin_return_address(0));
else
unwind_init(&state, thread_saved_fp(task),
thread_saved_pc(task));
if (regs) {
if (task != current)
return;
unwind_init_from_regs(&state, regs);
} else if (task == current) {
unwind_init_from_caller(&state);
} else {
unwind_init_from_task(&state, task);
}
unwind(task, &state, consume_entry, cookie);
unwind(&state, consume_entry, cookie);
}

View File

@ -56,4 +56,17 @@ config NVHE_EL2_DEBUG
If unsure, say N.
config PROTECTED_NVHE_STACKTRACE
bool "Protected KVM hypervisor stacktraces"
depends on NVHE_EL2_DEBUG
default n
help
Say Y here to enable pKVM hypervisor stacktraces on hyp_panic()
If using protected nVHE mode, but cannot afford the associated
memory cost (less than 0.75 page per CPU) of pKVM stacktraces,
say N.
If unsure, or not using protected nVHE (pKVM), say N.
endif # VIRTUALIZATION

View File

@ -12,7 +12,7 @@ obj-$(CONFIG_KVM) += hyp/
kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
guest.o debug.o reset.o sys_regs.o stacktrace.o \
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
arch_timer.o trng.o vmid.o \
vgic/vgic.o vgic/vgic-init.o \

View File

@ -49,7 +49,7 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);

View File

@ -17,6 +17,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/debug-monitors.h>
#include <asm/stacktrace/nvhe.h>
#include <asm/traps.h>
#include <kvm/arm_hypercalls.h>
@ -353,6 +354,9 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
(void *)(panic_addr + kaslr_offset()));
}
/* Dump the nVHE hypervisor backtrace */
kvm_nvhe_dump_backtrace(hyp_offset);
/*
* Hyp has panicked and we're going to handle that by panicking the
* kernel. The kernel offset will be revealed in the panic so we're

View File

@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs))
hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o

View File

@ -177,13 +177,8 @@ SYM_FUNC_END(__host_hvc)
b hyp_panic
.L__hyp_sp_overflow\@:
/*
* Reset SP to the top of the stack, to allow handling the hyp_panic.
* This corrupts the stack but is ok, since we won't be attempting
* any unwinding here.
*/
ldr_this_cpu x0, kvm_init_params + NVHE_INIT_STACK_HYP_VA, x1
mov sp, x0
/* Switch to the overflow stack */
adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
b hyp_panic_bad_stack
ASM_BUG()

View File

@ -0,0 +1,160 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* KVM nVHE hypervisor stack tracing support.
*
* Copyright (C) 2022 Google LLC
*/
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
#include <asm/memory.h>
#include <asm/percpu.h>
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
DEFINE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
/*
* hyp_prepare_backtrace - Prepare non-protected nVHE backtrace.
*
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*
* Save the information needed by the host to unwind the non-protected
* nVHE hypervisor stack in EL1.
*/
static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr(&kvm_stacktrace_info);
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - PAGE_SIZE);
stacktrace_info->overflow_stack_base = (unsigned long)this_cpu_ptr(overflow_stack);
stacktrace_info->fp = fp;
stacktrace_info->pc = pc;
}
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
#include <asm/stacktrace/nvhe.h>
DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace);
static bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
unsigned long low = (unsigned long)this_cpu_ptr(overflow_stack);
unsigned long high = low + OVERFLOW_STACK_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
}
static bool on_hyp_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
unsigned long high = params->stack_hyp_va;
unsigned long low = high - PAGE_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
}
static bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
return (on_overflow_stack(sp, size, info) ||
on_hyp_stack(sp, size, info));
}
static int unwind_next(struct unwind_state *state)
{
struct stack_info info;
return unwind_next_common(state, &info, on_accessible_stack, NULL);
}
static void notrace unwind(struct unwind_state *state,
stack_trace_consume_fn consume_entry,
void *cookie)
{
while (1) {
int ret;
if (!consume_entry(cookie, state->pc))
break;
ret = unwind_next(state);
if (ret < 0)
break;
}
}
/*
* pkvm_save_backtrace_entry - Saves a protected nVHE HYP stacktrace entry
*
* @arg : index of the entry in the stacktrace buffer
* @where : the program counter corresponding to the stack frame
*
* Save the return address of a stack frame to the shared stacktrace buffer.
* The host can access this shared buffer from EL1 to dump the backtrace.
*/
static bool pkvm_save_backtrace_entry(void *arg, unsigned long where)
{
unsigned long *stacktrace = this_cpu_ptr(pkvm_stacktrace);
int *idx = (int *)arg;
/*
* Need 2 free slots: 1 for current entry and 1 for the
* delimiter.
*/
if (*idx > ARRAY_SIZE(pkvm_stacktrace) - 2)
return false;
stacktrace[*idx] = where;
stacktrace[++*idx] = 0UL;
return true;
}
/*
* pkvm_save_backtrace - Saves the protected nVHE HYP stacktrace
*
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*
* Save the unwinded stack addresses to the shared stacktrace buffer.
* The host can access this shared buffer from EL1 to dump the backtrace.
*/
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
{
struct unwind_state state;
int idx = 0;
kvm_nvhe_unwind_init(&state, fp, pc);
unwind(&state, pkvm_save_backtrace_entry, &idx);
}
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
{
}
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
/*
* kvm_nvhe_prepare_backtrace - prepare to dump the nVHE backtrace
*
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*
* Saves the information needed by the host to dump the nVHE hypervisor
* backtrace.
*/
void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc)
{
if (is_protected_kvm_enabled())
pkvm_save_backtrace(fp, pc);
else
hyp_prepare_backtrace(fp, pc);
}

View File

@ -34,6 +34,8 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
@ -375,6 +377,10 @@ asmlinkage void __noreturn hyp_panic(void)
__sysreg_restore_state_nvhe(host_ctxt);
}
/* Prepare to dump kvm nvhe hyp stacktrace */
kvm_nvhe_prepare_backtrace((unsigned long)__builtin_frame_address(0),
_THIS_IP_);
__hyp_do_panic(host_ctxt, spsr, elr, par);
unreachable();
}

218
arch/arm64/kvm/stacktrace.c Normal file
View File

@ -0,0 +1,218 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* KVM nVHE hypervisor stack tracing support.
*
* The unwinder implementation depends on the nVHE mode:
*
* 1) Non-protected nVHE mode - the host can directly access the
* HYP stack pages and unwind the HYP stack in EL1. This saves having
* to allocate shared buffers for the host to read the unwinded
* stacktrace.
*
* 2) pKVM (protected nVHE) mode - the host cannot directly access
* the HYP memory. The stack is unwinded in EL2 and dumped to a shared
* buffer where the host can read and print the stacktrace.
*
* Copyright (C) 2022 Google LLC
*/
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/stacktrace/nvhe.h>
/*
* kvm_nvhe_stack_kern_va - Convert KVM nVHE HYP stack addresses to a kernel VAs
*
* The nVHE hypervisor stack is mapped in the flexible 'private' VA range, to
* allow for guard pages below the stack. Consequently, the fixed offset address
* translation macros won't work here.
*
* The kernel VA is calculated as an offset from the kernel VA of the hypervisor
* stack base.
*
* Returns true on success and updates @addr to its corresponding kernel VA;
* otherwise returns false.
*/
static bool kvm_nvhe_stack_kern_va(unsigned long *addr,
enum stack_type type)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info;
unsigned long hyp_base, kern_base, hyp_offset;
stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
switch (type) {
case STACK_TYPE_HYP:
kern_base = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page);
hyp_base = (unsigned long)stacktrace_info->stack_base;
break;
case STACK_TYPE_OVERFLOW:
kern_base = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack);
hyp_base = (unsigned long)stacktrace_info->overflow_stack_base;
break;
default:
return false;
}
hyp_offset = *addr - hyp_base;
*addr = kern_base + hyp_offset;
return true;
}
static bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info
= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
unsigned long low = (unsigned long)stacktrace_info->overflow_stack_base;
unsigned long high = low + OVERFLOW_STACK_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
}
static bool on_hyp_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info
= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
unsigned long low = (unsigned long)stacktrace_info->stack_base;
unsigned long high = low + PAGE_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
}
static bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
return (on_overflow_stack(sp, size, info) ||
on_hyp_stack(sp, size, info));
}
static int unwind_next(struct unwind_state *state)
{
struct stack_info info;
return unwind_next_common(state, &info, on_accessible_stack,
kvm_nvhe_stack_kern_va);
}
static void unwind(struct unwind_state *state,
stack_trace_consume_fn consume_entry, void *cookie)
{
while (1) {
int ret;
if (!consume_entry(cookie, state->pc))
break;
ret = unwind_next(state);
if (ret < 0)
break;
}
}
/*
* kvm_nvhe_dump_backtrace_entry - Symbolize and print an nVHE backtrace entry
*
* @arg : the hypervisor offset, used for address translation
* @where : the program counter corresponding to the stack frame
*/
static bool kvm_nvhe_dump_backtrace_entry(void *arg, unsigned long where)
{
unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0);
unsigned long hyp_offset = (unsigned long)arg;
/* Mask tags and convert to kern addr */
where = (where & va_mask) + hyp_offset;
kvm_err(" [<%016lx>] %pB\n", where, (void *)(where + kaslr_offset()));
return true;
}
static void kvm_nvhe_dump_backtrace_start(void)
{
kvm_err("nVHE call trace:\n");
}
static void kvm_nvhe_dump_backtrace_end(void)
{
kvm_err("---[ end nVHE call trace ]---\n");
}
/*
* hyp_dump_backtrace - Dump the non-protected nVHE backtrace.
*
* @hyp_offset: hypervisor offset, used for address translation.
*
* The host can directly access HYP stack pages in non-protected
* mode, so the unwinding is done directly from EL1. This removes
* the need for shared buffers between host and hypervisor for
* the stacktrace.
*/
static void hyp_dump_backtrace(unsigned long hyp_offset)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info;
struct unwind_state state;
stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
kvm_nvhe_unwind_init(&state, stacktrace_info->fp, stacktrace_info->pc);
kvm_nvhe_dump_backtrace_start();
unwind(&state, kvm_nvhe_dump_backtrace_entry, (void *)hyp_offset);
kvm_nvhe_dump_backtrace_end();
}
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
DECLARE_KVM_NVHE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)],
pkvm_stacktrace);
/*
* pkvm_dump_backtrace - Dump the protected nVHE HYP backtrace.
*
* @hyp_offset: hypervisor offset, used for address translation.
*
* Dumping of the pKVM HYP backtrace is done by reading the
* stack addresses from the shared stacktrace buffer, since the
* host cannot directly access hypervisor memory in protected
* mode.
*/
static void pkvm_dump_backtrace(unsigned long hyp_offset)
{
unsigned long *stacktrace
= (unsigned long *) this_cpu_ptr_nvhe_sym(pkvm_stacktrace);
int i;
kvm_nvhe_dump_backtrace_start();
/* The saved stacktrace is terminated by a null entry */
for (i = 0;
i < ARRAY_SIZE(kvm_nvhe_sym(pkvm_stacktrace)) && stacktrace[i];
i++)
kvm_nvhe_dump_backtrace_entry((void *)hyp_offset, stacktrace[i]);
kvm_nvhe_dump_backtrace_end();
}
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
static void pkvm_dump_backtrace(unsigned long hyp_offset)
{
kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE\n");
}
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
/*
* kvm_nvhe_dump_backtrace - Dump KVM nVHE hypervisor backtrace.
*
* @hyp_offset: hypervisor offset, used for address translation.
*/
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset)
{
if (is_protected_kvm_enabled())
pkvm_dump_backtrace(hyp_offset);
else
hyp_dump_backtrace(hyp_offset);
}