linux-stable/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h

265 lines
8.7 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2012-2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
#ifndef __ARM64_KVM_HYP_SYSREG_SR_H__
#define __ARM64_KVM_HYP_SYSREG_SR_H__
#include <linux/compiler.h>
#include <linux/kvm_host.h>
KVM: arm64: Forbid kprobing of the VHE world-switch code On systems with VHE the kernel and KVM's world-switch code run at the same exception level. Code that is only used on a VHE system does not need to be annotated as __hyp_text as it can reside anywhere in the kernel text. __hyp_text was also used to prevent kprobes from patching breakpoint instructions into this region, as this code runs at a different exception level. While this is no longer true with VHE, KVM still switches VBAR_EL1, meaning a kprobe's breakpoint executed in the world-switch code will cause a hyp-panic. echo "p:weasel sysreg_save_guest_state_vhe" > /sys/kernel/debug/tracing/kprobe_events echo 1 > /sys/kernel/debug/tracing/events/kprobes/weasel/enable lkvm run -k /boot/Image --console serial -p "console=ttyS0 earlycon=uart,mmio,0x3f8" # lkvm run -k /boot/Image -m 384 -c 3 --name guest-1474 Info: Placing fdt at 0x8fe00000 - 0x8fffffff Info: virtio-mmio.devices=0x200@0x10000:36 Info: virtio-mmio.devices=0x200@0x10200:37 Info: virtio-mmio.devices=0x200@0x10400:38 [ 614.178186] Kernel panic - not syncing: HYP panic: [ 614.178186] PS:404003c9 PC:ffff0000100d70e0 ESR:f2000004 [ 614.178186] FAR:0000000080080000 HPFAR:0000000000800800 PAR:1d00007edbadc0de [ 614.178186] VCPU:00000000f8de32f1 [ 614.178383] CPU: 2 PID: 1482 Comm: kvm-vcpu-0 Not tainted 5.0.0-rc2 #10799 [ 614.178446] Call trace: [ 614.178480] dump_backtrace+0x0/0x148 [ 614.178567] show_stack+0x24/0x30 [ 614.178658] dump_stack+0x90/0xb4 [ 614.178710] panic+0x13c/0x2d8 [ 614.178793] hyp_panic+0xac/0xd8 [ 614.178880] kvm_vcpu_run_vhe+0x9c/0xe0 [ 614.178958] kvm_arch_vcpu_ioctl_run+0x454/0x798 [ 614.179038] kvm_vcpu_ioctl+0x360/0x898 [ 614.179087] do_vfs_ioctl+0xc4/0x858 [ 614.179174] ksys_ioctl+0x84/0xb8 [ 614.179261] __arm64_sys_ioctl+0x28/0x38 [ 614.179348] el0_svc_common+0x94/0x108 [ 614.179401] el0_svc_handler+0x38/0x78 [ 614.179487] el0_svc+0x8/0xc [ 614.179558] SMP: stopping secondary CPUs [ 614.179661] Kernel Offset: disabled [ 614.179695] CPU features: 0x003,2a80aa38 [ 614.179758] Memory Limit: none [ 614.179858] ---[ end Kernel panic - not syncing: HYP panic: [ 614.179858] PS:404003c9 PC:ffff0000100d70e0 ESR:f2000004 [ 614.179858] FAR:0000000080080000 HPFAR:0000000000800800 PAR:1d00007edbadc0de [ 614.179858] VCPU:00000000f8de32f1 ]--- Annotate the VHE world-switch functions that aren't marked __hyp_text using NOKPROBE_SYMBOL(). Signed-off-by: James Morse <james.morse@arm.com> Fixes: 3f5c90b890ac ("KVM: arm64: Introduce VHE-specific kvm_vcpu_run") Acked-by: Masami Hiramatsu <mhiramat@kernel.org> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
2019-01-24 16:32:54 +00:00
#include <asm/kprobes.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1);
}
static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0);
ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
}
static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt)
{
struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
if (!vcpu)
vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
return vcpu;
}
static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
{
struct kvm_vcpu *vcpu = ctxt_to_vcpu(ctxt);
return kvm_has_mte(kern_hyp_va(vcpu->kvm));
}
static inline bool ctxt_has_s1pie(struct kvm_cpu_context *ctxt)
{
struct kvm_vcpu *vcpu;
if (!cpus_have_final_cap(ARM64_HAS_S1PIE))
return false;
vcpu = ctxt_to_vcpu(ctxt);
return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1PIE, IMP);
}
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR);
ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR);
ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0);
ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1);
ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR);
if (cpus_have_final_cap(ARM64_HAS_TCR2))
ctxt_sys_reg(ctxt, TCR2_EL1) = read_sysreg_el1(SYS_TCR2);
ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR);
ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0);
ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1);
ctxt_sys_reg(ctxt, FAR_EL1) = read_sysreg_el1(SYS_FAR);
ctxt_sys_reg(ctxt, MAIR_EL1) = read_sysreg_el1(SYS_MAIR);
ctxt_sys_reg(ctxt, VBAR_EL1) = read_sysreg_el1(SYS_VBAR);
ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR);
ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR);
ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL);
if (ctxt_has_s1pie(ctxt)) {
ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR);
ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0);
}
ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par();
ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
if (ctxt_has_mte(ctxt)) {
ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
}
ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
}
static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
{
ctxt->regs.pc = read_sysreg_el2(SYS_ELR);
/*
* Guest PSTATE gets saved at guest fixup time in all
* cases. We still need to handle the nVHE host side here.
*/
if (!has_vhe() && ctxt->__hyp_running_vcpu)
ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
}
static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1);
}
static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0);
write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0);
}
static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2);
if (has_vhe() ||
!cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR);
write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR);
} else if (!ctxt->__hyp_running_vcpu) {
/*
* Must only be done for guest registers, hence the context
* test. We're coming from the host, so SCTLR.M is already
* set. Pairs with nVHE's __activate_traps().
*/
write_sysreg_el1((ctxt_sys_reg(ctxt, TCR_EL1) |
TCR_EPD1_MASK | TCR_EPD0_MASK),
SYS_TCR);
isb();
}
write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR);
write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0);
write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1);
if (cpus_have_final_cap(ARM64_HAS_TCR2))
write_sysreg_el1(ctxt_sys_reg(ctxt, TCR2_EL1), SYS_TCR2);
write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR);
write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0);
write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1);
write_sysreg_el1(ctxt_sys_reg(ctxt, FAR_EL1), SYS_FAR);
write_sysreg_el1(ctxt_sys_reg(ctxt, MAIR_EL1), SYS_MAIR);
write_sysreg_el1(ctxt_sys_reg(ctxt, VBAR_EL1), SYS_VBAR);
write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR);
write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR);
write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL);
if (ctxt_has_s1pie(ctxt)) {
write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR);
write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0);
}
write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
if (ctxt_has_mte(ctxt)) {
write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR);
write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
}
if (!has_vhe() &&
cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
ctxt->__hyp_running_vcpu) {
/*
* Must only be done for host registers, hence the context
* test. Pairs with nVHE's __deactivate_traps().
*/
isb();
/*
* At this stage, and thanks to the above isb(), S2 is
* deconfigured and disabled. We can now restore the host's
* S1 configuration: SCTLR, and only then TCR.
*/
write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR);
isb();
write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR);
}
write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1);
write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR);
write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR);
}
/* Read the VCPU state's PSTATE, but translate (v)EL2 to EL1. */
static inline u64 to_hw_pstate(const struct kvm_cpu_context *ctxt)
{
u64 mode = ctxt->regs.pstate & (PSR_MODE_MASK | PSR_MODE32_BIT);
switch (mode) {
case PSR_MODE_EL2t:
mode = PSR_MODE_EL1t;
break;
case PSR_MODE_EL2h:
mode = PSR_MODE_EL1h;
break;
}
return (ctxt->regs.pstate & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode;
}
static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
{
u64 pstate = to_hw_pstate(ctxt);
u64 mode = pstate & PSR_AA32_MODE_MASK;
/*
* Safety check to ensure we're setting the CPU up to enter the guest
* in a less privileged mode.
*
* If we are attempting a return to EL2 or higher in AArch64 state,
* program SPSR_EL2 with M=EL2h and the IL bit set which ensures that
* we'll take an illegal exception state exception immediately after
* the ERET to the guest. Attempts to return to AArch32 Hyp will
* result in an illegal exception return because EL2's execution state
* is determined by SCR_EL3.RW.
*/
if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
pstate = PSR_MODE_EL2h | PSR_IL_BIT;
write_sysreg_el2(ctxt->regs.pc, SYS_ELR);
KVM: arm64: Migrate _elx sysreg accessors to msr_s/mrs_s Currently, the {read,write}_sysreg_el*() accessors for accessing particular ELs' sysregs in the presence of VHE rely on some local hacks and define their system register encodings in a way that is inconsistent with the core definitions in <asm/sysreg.h>. As a result, it is necessary to add duplicate definitions for any system register that already needs a definition in sysreg.h for other reasons. This is a bit of a maintenance headache, and the reasons for the _el*() accessors working the way they do is a bit historical. This patch gets rid of the shadow sysreg definitions in <asm/kvm_hyp.h>, converts the _el*() accessors to use the core __msr_s/__mrs_s interface, and converts all call sites to use the standard sysreg #define names (i.e., upper case, with SYS_ prefix). This patch will conflict heavily anyway, so the opportunity to clean up some bad whitespace in the context of the changes is taken. The change exposes a few system registers that have no sysreg.h definition, due to msr_s/mrs_s being used in place of msr/mrs: additions are made in order to fill in the gaps. Signed-off-by: Dave Martin <Dave.Martin@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christoffer Dall <christoffer.dall@arm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Will Deacon <will.deacon@arm.com> Link: https://www.spinics.net/lists/kvm-arm/msg31717.html [Rebased to v4.21-rc1] Signed-off-by: Sudeep Holla <sudeep.holla@arm.com> [Rebased to v5.2-rc5, changelog updates] Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
2019-04-06 10:29:40 +00:00
write_sysreg_el2(pstate, SYS_SPSR);
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2);
}
static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
{
if (!vcpu_el1_is_32bit(vcpu))
return;
vcpu->arch.ctxt.spsr_abt = read_sysreg(spsr_abt);
vcpu->arch.ctxt.spsr_und = read_sysreg(spsr_und);
vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq);
vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq);
__vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2);
__vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2);
if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
__vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2);
}
static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
{
if (!vcpu_el1_is_32bit(vcpu))
return;
write_sysreg(vcpu->arch.ctxt.spsr_abt, spsr_abt);
write_sysreg(vcpu->arch.ctxt.spsr_und, spsr_und);
write_sysreg(vcpu->arch.ctxt.spsr_irq, spsr_irq);
write_sysreg(vcpu->arch.ctxt.spsr_fiq, spsr_fiq);
write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2);
write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2);
if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2);
}
2017-10-08 15:01:56 +00:00
#endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */