KVM/arm64 updates for Linux 5.8:

- Move the arch-specific code into arch/arm64/kvm
 - Start the post-32bit cleanup
 - Cherry-pick a few non-invasive pre-NV patches
 -----BEGIN PGP SIGNATURE-----
 
 iQJDBAABCgAtFiEEn9UcU+C1Yxj9lZw9I9DQutE9ekMFAl7RLp8PHG1hekBrZXJu
 ZWwub3JnAAoJECPQ0LrRPXpD/iAQAJOHsS1PT9y/Gefam5os9FqKpogj68e3rx9k
 XfPcweexBVqmDWSI4vmL9xHW2F7z4EwAE4dIDsTCKHpihK30+jH8l12tOJBz35yp
 MR1hYjv43F54xzKkkuP4F4wo3Ygg4ipjHZPReGkaGj1QOQs6N/YKa1aSSYfzkzCz
 VLCSqPQz45CkGPYEGwuPn13AjHqGQAwPhteJNAoCxViw1KAldmoqDk6kbKB+b+7a
 2oIvxiTZejICsgSX6UvqQYNG52AyZ/5Daq8iraaigQ8sGyKr+/2Yi+3RUUH6p7ns
 aCsictk+RS3BzMAKDw6MPYc7OhJBhxQEV1pdiPpt0tpS4L9LNmBagKzlaBKZhwdr
 dYDAjOlbgZZUJpKnlBAipuVlQySHdm2WjXr4msdY69D7OGxmkzU/zkSIokqdA2hr
 MuL5W1v2Z1UpxyVltb+c/4lPcFZNnRI0Mz1WcvliEojlf2zzKYMcBAl3bTiAuil5
 aTT2+1G0OSCfUfr8Zart4LoAHeczw4zG/Pern+hl92eMXUlX3pIcqzQaLtVmmEE/
 ecPShMowKsXOOGGp/T8Q04N1fr6KzmufP5+kgJDFZfo6iJ6r5uQ9G8nuLmp3wQOX
 c9mNCwdSxrFBTJ10KfLHquKqwfl18VXzKDx1pzO5nSupmKWfWZ5YFO8j2709e83x
 R42MqKEG
 =aD+9
 -----END PGP SIGNATURE-----

Merge tag 'kvmarm-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 updates for Linux 5.8:

- Move the arch-specific code into arch/arm64/kvm
- Start the post-32bit cleanup
- Cherry-pick a few non-invasive pre-NV patches
This commit is contained in:
Paolo Bonzini 2020-06-01 04:26:27 -04:00
commit 380609445c
56 changed files with 629 additions and 639 deletions

View file

@ -5799,7 +5799,7 @@ will be initialized to 1 when created. This also improves performance because
dirty logging can be enabled gradually in small chunks on the first call
to KVM_CLEAR_DIRTY_LOG. KVM_DIRTY_LOG_INITIALLY_SET depends on
KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on
x86 for now).
x86 and arm64 for now).
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make

View file

@ -9295,7 +9295,6 @@ F: arch/arm64/include/asm/kvm*
F: arch/arm64/include/uapi/asm/kvm*
F: arch/arm64/kvm/
F: include/kvm/arm_*
F: virt/kvm/arm/
KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
L: linux-mips@vger.kernel.org

View file

@ -64,12 +64,14 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu);
extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
extern void __kvm_enable_ssbs(void);
extern u64 __vgic_v3_get_ich_vtr_el2(void);
extern u64 __vgic_v3_read_vmcr(void);
extern void __vgic_v3_write_vmcr(u32 vmcr);

View file

@ -46,6 +46,9 @@
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern unsigned int kvm_sve_max_vl;
@ -112,12 +115,8 @@ struct kvm_vcpu_fault_info {
u64 disr_el1; /* Deferred [SError] Status Register */
};
/*
* 0 is reserved as an invalid value.
* Order should be kept in sync with the save/restore code.
*/
enum vcpu_sysreg {
__INVALID_SYSREG__,
__INVALID_SYSREG__, /* 0 is reserved as an invalid value */
MPIDR_EL1, /* MultiProcessor Affinity Register */
CSSELR_EL1, /* Cache Size Selection Register */
SCTLR_EL1, /* System Control Register */
@ -532,39 +531,6 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr();
}
void __kvm_enable_ssbs(void);
static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
unsigned long hyp_stack_ptr,
unsigned long vector_ptr)
{
/*
* Calculate the raw per-cpu offset without a translation from the
* kernel's mapping to the linear mapping, and store it in tpidr_el2
* so that we can use adr_l to access per-cpu variables in EL2.
*/
u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) -
(u64)kvm_ksym_ref(kvm_host_data));
/*
* Call initialization code, and switch to the full blown HYP code.
* If the cpucaps haven't been finalized yet, something has gone very
* wrong, and hyp will crash and burn when it uses any
* cpus_have_const_cap() wrapper.
*/
BUG_ON(!system_capabilities_finalized());
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
/*
* Disabling SSBD on a non-VHE system requires us to enable SSBS
* at EL2.
*/
if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) &&
arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
kvm_call_hyp(__kvm_enable_ssbs);
}
}
static inline bool kvm_arch_requires_vhe(void)
{
/*
@ -600,8 +566,6 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
static inline void __cpu_init_stage2(void) {}
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);

View file

@ -56,12 +56,12 @@
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __timer_enable_traps(struct kvm_vcpu *vcpu);

View file

@ -363,8 +363,6 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
}
}
#define kvm_virt_to_phys(x) __pa_symbol(x)
void kvm_set_way_flush(struct kvm_vcpu *vcpu);
void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
@ -473,7 +471,7 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
extern void *__kvm_bp_vect_base;
extern int __kvm_harden_el2_vector_slot;
/* This is only called on a VHE system */
/* This is called on both VHE and !VHE systems */
static inline void *kvm_get_hyp_vector(void)
{
struct bp_hardening_data *data = arm64_get_bp_hardening_data();

View file

@ -35,6 +35,7 @@
#define GIC_PRIO_PSR_I_SET (1 << 4)
/* Additional SPSR bits not exposed in the UABI */
#define PSR_MODE_THREAD_BIT (1 << 0)
#define PSR_IL_BIT (1 << 20)
/* AArch32-specific ptrace requests */

View file

@ -85,7 +85,7 @@ static inline bool is_kernel_in_hyp_mode(void)
static __always_inline bool has_vhe(void)
{
if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN))
if (cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN))
return true;
return false;

View file

@ -96,7 +96,7 @@ int main(void)
DEFINE(CPU_BOOT_PTRAUTH_KEY, offsetof(struct secondary_data, ptrauth_key));
#endif
BLANK();
#ifdef CONFIG_KVM_ARM_HOST
#ifdef CONFIG_KVM
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));

View file

@ -234,7 +234,7 @@ static int detect_harden_bp_fw(void)
smccc_end = NULL;
break;
#if IS_ENABLED(CONFIG_KVM_ARM_HOST)
#if IS_ENABLED(CONFIG_KVM)
case SMCCC_CONDUIT_SMC:
cb = call_smc_arch_workaround_1;
smccc_start = __smccc_workaround_1_smc;

View file

@ -430,7 +430,7 @@ static void __init hyp_mode_check(void)
"CPU: CPUs started in inconsistent modes");
else
pr_info("CPU: All CPU(s) started at EL1\n");
if (IS_ENABLED(CONFIG_KVM_ARM_HOST))
if (IS_ENABLED(CONFIG_KVM))
kvm_compute_layout();
}

View file

@ -3,7 +3,6 @@
# KVM configuration
#
source "virt/kvm/Kconfig"
source "virt/lib/Kconfig"
menuconfig VIRTUALIZATION
@ -18,7 +17,7 @@ menuconfig VIRTUALIZATION
if VIRTUALIZATION
config KVM
menuconfig KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on OF
# for TASKSTATS/TASK_DELAY_ACCT:
@ -28,13 +27,11 @@ config KVM
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
select KVM_ARM_HOST
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select SRCU
select KVM_VFIO
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
select KVM_ARM_PMU if HW_PERF_EVENTS
select HAVE_KVM_MSI
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
@ -45,23 +42,24 @@ config KVM
select TASK_DELAY_ACCT
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
levels of fake page tables.
If unsure, say N.
config KVM_ARM_HOST
bool
---help---
Provides host support for ARM processors.
if KVM
source "virt/kvm/Kconfig"
config KVM_ARM_PMU
bool
bool "Virtual Performance Monitoring Unit (PMU) support"
depends on HW_PERF_EVENTS
default y
---help---
Adds support for a virtual Performance Monitoring Unit (PMU) in
virtual machines.
config KVM_INDIRECT_VECTORS
def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS)
def_bool HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS
endif # KVM
endif # VIRTUALIZATION

View file

@ -3,37 +3,25 @@
# Makefile for Kernel-based Virtual Machine module
#
ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic
ccflags-y += -I $(srctree)/$(src)
KVM=../../../virt/kvm
obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp/
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM) += hyp/
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvtime.o
kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
$(KVM)/vfio.o $(KVM)/irqchip.o \
arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
inject_fault.o regmap.o va_layout.o hyp.o hyp-init.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o \
vgic-sys-reg-v3.o fpsimd.o pmu.o \
aarch32.o arch_timer.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \
vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
vgic/vgic-its.o vgic/vgic-debug.o
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o pmu.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-debug.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o

View file

@ -451,17 +451,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
static void set_cntvoff(u64 cntvoff)
{
u32 low = lower_32_bits(cntvoff);
u32 high = upper_32_bits(cntvoff);
/*
* Since kvm_call_hyp doesn't fully support the ARM PCS especially on
* 32-bit systems, but rather passes register by register shifted one
* place (we put the function address in r0/x0), we cannot simply pass
* a 64-bit value as an argument, but have to split the value in two
* 32-bit halves.
*/
kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
}
static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)

View file

@ -22,7 +22,7 @@
#include <trace/events/kvm.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "trace_arm.h"
#include <linux/uaccess.h>
#include <asm/ptrace.h>
@ -95,6 +95,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
return r;
}
static int kvm_arm_default_max_vcpus(void)
{
return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
}
/**
* kvm_arch_init_vm - initializes a VM data structure
* @kvm: pointer to the KVM struct
@ -128,8 +133,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.vmid.vmid_gen = 0;
/* The maximum number of VCPUs is limited by the host's GIC model */
kvm->arch.max_vcpus = vgic_present ?
kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
return ret;
out_free_stage2_pgd:
@ -204,10 +208,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = num_online_cpus();
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
case KVM_CAP_MAX_VCPU_ID:
r = KVM_MAX_VCPU_ID;
if (kvm)
r = kvm->arch.max_vcpus;
else
r = kvm_arm_default_max_vcpus();
break;
case KVM_CAP_MSI_DEVID:
if (!kvm)
@ -455,9 +460,9 @@ void force_vm_exit(const cpumask_t *mask)
*
* The hardware supports a limited set of values with the value zero reserved
* for the host, so we check if an assigned value belongs to a previous
* generation, which which requires us to assign a new value. If we're the
* first to use a VMID for the new generation, we must flush necessary caches
* and TLBs on all CPUs.
* generation, which requires us to assign a new value. If we're the first to
* use a VMID for the new generation, we must flush necessary caches and TLBs
* on all CPUs.
*/
static bool need_new_vmid_gen(struct kvm_vmid *vmid)
{
@ -984,8 +989,11 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
/*
* Ensure a rebooted VM will fault in RAM pages and detect if the
* guest MMU is turned off and flush the caches as needed.
*
* S2FWB enforces all memory accesses to RAM being cacheable, we
* ensure that the cache is always coherent.
*/
if (vcpu->arch.has_run_once)
if (vcpu->arch.has_run_once && !cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
stage2_unmap_vm(vcpu->kvm);
vcpu_reset_hcr(vcpu);
@ -1266,19 +1274,41 @@ static void cpu_init_hyp_mode(void)
{
phys_addr_t pgd_ptr;
unsigned long hyp_stack_ptr;
unsigned long stack_page;
unsigned long vector_ptr;
unsigned long tpidr_el2;
/* Switch from the HYP stub to our own HYP init vector */
__hyp_set_vectors(kvm_get_idmap_vector());
/*
* Calculate the raw per-cpu offset without a translation from the
* kernel's mapping to the linear mapping, and store it in tpidr_el2
* so that we can use adr_l to access per-cpu variables in EL2.
*/
tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) -
(unsigned long)kvm_ksym_ref(kvm_host_data));
pgd_ptr = kvm_mmu_get_httbr();
stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
hyp_stack_ptr = stack_page + PAGE_SIZE;
hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE;
vector_ptr = (unsigned long)kvm_get_hyp_vector();
__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
__cpu_init_stage2();
/*
* Call initialization code, and switch to the full blown HYP code.
* If the cpucaps haven't been finalized yet, something has gone very
* wrong, and hyp will crash and burn when it uses any
* cpus_have_const_cap() wrapper.
*/
BUG_ON(!system_capabilities_finalized());
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
/*
* Disabling SSBD on a non-VHE system requires us to enable SSBS
* at EL2.
*/
if (this_cpu_has_cap(ARM64_SSBS) &&
arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
kvm_call_hyp(__kvm_enable_ssbs);
}
}
static void cpu_hyp_reset(void)

View file

@ -266,7 +266,7 @@ static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
/*
* Vector lengths supported by the host can't currently be
* hidden from the guest individually: instead we can only set a
* maxmium via ZCR_EL2.LEN. So, make sure the available vector
* maximum via ZCR_EL2.LEN. So, make sure the available vector
* lengths match the set requested exactly up to the requested
* maximum:
*/
@ -336,7 +336,7 @@ static int sve_reg_to_region(struct sve_state_reg_region *region,
unsigned int reg_num;
unsigned int reqoffset, reqlen; /* User-requested offset and length */
unsigned int maxlen; /* Maxmimum permitted length */
unsigned int maxlen; /* Maximum permitted length */
size_t sve_state_size;

View file

@ -23,7 +23,7 @@
#include <kvm/arm_hypercalls.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "trace_handle_exit.h"
typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);

View file

@ -6,20 +6,10 @@
ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN)
KVM=../../../../virt/kvm
obj-$(CONFIG_KVM) += hyp.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o
obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o
obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += entry.o
obj-$(CONFIG_KVM_ARM_HOST) += switch.o
obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \
debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o
# KVM code is run at a different exception code with a different map, so
# compiler instrumentation that inserts callbacks or checks into the code may

View file

@ -270,8 +270,8 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
{
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
__vgic_v3_save_state(vcpu);
__vgic_v3_deactivate_traps(vcpu);
__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
__vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
}
}
@ -279,8 +279,8 @@ static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
{
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
__vgic_v3_activate_traps(vcpu);
__vgic_v3_restore_state(vcpu);
__vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
}
}

View file

@ -10,9 +10,8 @@
#include <asm/kvm_hyp.h>
void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high)
void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff)
{
u64 cntvoff = (u64)cntvoff_high << 32 | cntvoff_low;
write_sysreg(cntvoff, cntvoff_el2);
}

View file

@ -194,10 +194,9 @@ static u32 __hyp_text __vgic_v3_read_ap1rn(int n)
return val;
}
void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
u64 used_lrs = cpu_if->used_lrs;
/*
* Make sure stores to the GIC via the memory mapped interface
@ -230,10 +229,9 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
}
}
void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
u64 used_lrs = cpu_if->used_lrs;
int i;
if (used_lrs || cpu_if->its_vpe.its_vm) {
@ -257,10 +255,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
}
}
void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu)
void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
/*
* VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
* Group0 interrupt (as generated in GICv2 mode) to be
@ -306,9 +302,8 @@ void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
}
void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu)
void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 val;
if (!cpu_if->vgic_sre) {
@ -333,15 +328,11 @@ void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu)
write_gicreg(0, ICH_HCR_EL2);
}
void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu)
void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
{
struct vgic_v3_cpu_if *cpu_if;
u64 val;
u32 nr_pre_bits;
vcpu = kern_hyp_va(vcpu);
cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
val = read_gicreg(ICH_VTR_EL2);
nr_pre_bits = vtr_to_nr_pre_bits(val);
@ -370,15 +361,11 @@ void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu)
}
}
void __hyp_text __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu)
void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
{
struct vgic_v3_cpu_if *cpu_if;
u64 val;
u32 nr_pre_bits;
vcpu = kern_hyp_va(vcpu);
cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
val = read_gicreg(ICH_VTR_EL2);
nr_pre_bits = vtr_to_nr_pre_bits(val);
@ -431,8 +418,6 @@ void __hyp_text __vgic_v3_write_vmcr(u32 vmcr)
write_gicreg(vmcr, ICH_VMCR_EL2);
}
#ifdef CONFIG_ARM64
static int __hyp_text __vgic_v3_bpr_min(void)
{
/* See Pseudocode for VPriorityGroup */
@ -453,7 +438,7 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
u32 vmcr,
u64 *lr_val)
{
unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs;
unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
u8 priority = GICv3_IDLE_PRIORITY;
int i, lr = -1;
@ -492,7 +477,7 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu,
int intid, u64 *lr_val)
{
unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs;
unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
int i;
for (i = 0; i < used_lrs; i++) {
@ -579,7 +564,7 @@ static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp)
/*
* The priority value is independent of any of the BPR values, so we
* normalize it using the minumal BPR value. This guarantees that no
* normalize it using the minimal BPR value. This guarantees that no
* matter what the guest does with its BPR, we can always set/get the
* same value of a priority.
*/
@ -1126,5 +1111,3 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
return 1;
}
#endif

View file

@ -26,28 +26,12 @@ enum exception_type {
except_type_serror = 0x180,
};
static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
{
u64 exc_offset;
switch (*vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT)) {
case PSR_MODE_EL1t:
exc_offset = CURRENT_EL_SP_EL0_VECTOR;
break;
case PSR_MODE_EL1h:
exc_offset = CURRENT_EL_SP_ELx_VECTOR;
break;
case PSR_MODE_EL0t:
exc_offset = LOWER_EL_AArch64_VECTOR;
break;
default:
exc_offset = LOWER_EL_AArch32_VECTOR;
}
return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
}
/*
* This performs the exception entry at a given EL (@target_mode), stashing PC
* and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE.
* The EL passed to this function *must* be a non-secure, privileged mode with
* bit 0 being set (PSTATE.SP == 1).
*
* When an exception is taken, most PSTATE fields are left unchanged in the
* handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all
* of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx
@ -59,10 +43,35 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
* Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from
* MSB to LSB.
*/
static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu)
static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
enum exception_type type)
{
unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
unsigned long old, new;
unsigned long sctlr, vbar, old, new, mode;
u64 exc_offset;
mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
if (mode == target_mode)
exc_offset = CURRENT_EL_SP_ELx_VECTOR;
else if ((mode | PSR_MODE_THREAD_BIT) == target_mode)
exc_offset = CURRENT_EL_SP_EL0_VECTOR;
else if (!(mode & PSR_MODE32_BIT))
exc_offset = LOWER_EL_AArch64_VECTOR;
else
exc_offset = LOWER_EL_AArch32_VECTOR;
switch (target_mode) {
case PSR_MODE_EL1h:
vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1);
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
break;
default:
/* Don't do that */
BUG();
}
*vcpu_pc(vcpu) = vbar + exc_offset + type;
old = *vcpu_cpsr(vcpu);
new = 0;
@ -105,9 +114,10 @@ static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu)
new |= PSR_I_BIT;
new |= PSR_F_BIT;
new |= PSR_MODE_EL1h;
new |= target_mode;
return new;
*vcpu_cpsr(vcpu) = new;
vcpu_write_spsr(vcpu, old);
}
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
@ -116,11 +126,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u32 esr = 0;
vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
*vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
vcpu_write_spsr(vcpu, cpsr);
enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
@ -148,14 +154,9 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
static void inject_undef64(struct kvm_vcpu *vcpu)
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
*vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
vcpu_write_spsr(vcpu, cpsr);
enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
/*
* Build an unknown exception, depending on the instruction

View file

@ -131,7 +131,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
/*
* No valid syndrome? Ask userspace for help if it has
* voluntered to do so, and bail out otherwise.
* volunteered to do so, and bail out otherwise.
*/
if (!kvm_vcpu_dabt_isvalid(vcpu)) {
if (vcpu->kvm->arch.return_nisv_io_abort_to_user) {

View file

@ -422,6 +422,9 @@ static void stage2_flush_memslot(struct kvm *kvm,
next = stage2_pgd_addr_end(kvm, addr, end);
if (!stage2_pgd_none(kvm, *pgd))
stage2_flush_puds(kvm, pgd, addr, next);
if (next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
@ -784,7 +787,7 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
mutex_lock(&kvm_hyp_pgd_mutex);
/*
* This assumes that we we have enough space below the idmap
* This assumes that we have enough space below the idmap
* page to allocate our VAs. If not, the check below will
* kick. A potential alternative would be to detect that
* overflow and switch to an allocation above the idmap.
@ -964,7 +967,7 @@ static void stage2_unmap_memslot(struct kvm *kvm,
* stage2_unmap_vm - Unmap Stage-2 RAM mappings
* @kvm: The struct kvm pointer
*
* Go through the memregions and unmap any reguler RAM
* Go through the memregions and unmap any regular RAM
* backing memory already mapped to the VM.
*/
void stage2_unmap_vm(struct kvm *kvm)
@ -1372,47 +1375,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
return ret;
}
static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
{
kvm_pfn_t pfn = *pfnp;
gfn_t gfn = *ipap >> PAGE_SHIFT;
if (kvm_is_transparent_hugepage(pfn)) {
unsigned long mask;
/*
* The address we faulted on is backed by a transparent huge
* page. However, because we map the compound huge page and
* not the individual tail page, we need to transfer the
* refcount to the head page. We have to be careful that the
* THP doesn't start to split while we are adjusting the
* refcounts.
*
* We are sure this doesn't happen, because mmu_notifier_retry
* was successful and we are holding the mmu_lock, so if this
* THP is trying to split, it will be blocked in the mmu
* notifier before touching any of the pages, specifically
* before being able to call __split_huge_page_refcount().
*
* We can therefore safely transfer the refcount from PG_tail
* to PG_head and switch the pfn from a tail page to the head
* page accordingly.
*/
mask = PTRS_PER_PMD - 1;
VM_BUG_ON((gfn & mask) != (pfn & mask));
if (pfn & mask) {
*ipap &= PMD_MASK;
kvm_release_pfn_clean(pfn);
pfn &= ~mask;
kvm_get_pfn(pfn);
*pfnp = pfn;
}
return true;
}
return false;
}
/**
* stage2_wp_ptes - write protect PMD range
* @pmd: pointer to pmd entry
@ -1607,6 +1569,10 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
hva_t uaddr_start, uaddr_end;
size_t size;
/* The memslot and the VMA are guaranteed to be aligned to PAGE_SIZE */
if (map_size == PAGE_SIZE)
return true;
size = memslot->npages * PAGE_SIZE;
gpa_start = memslot->base_gfn << PAGE_SHIFT;
@ -1626,7 +1592,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
* |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
* +-----+--------------------+--------------------+---+
*
* memslot->base_gfn << PAGE_SIZE:
* memslot->base_gfn << PAGE_SHIFT:
* +---+--------------------+--------------------+-----+
* |abc|def Stage-2 block | Stage-2 block |tvxyz|
* +---+--------------------+--------------------+-----+
@ -1656,6 +1622,59 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
(hva & ~(map_size - 1)) + map_size <= uaddr_end;
}
/*
* Check if the given hva is backed by a transparent huge page (THP) and
* whether it can be mapped using block mapping in stage2. If so, adjust
* the stage2 PFN and IPA accordingly. Only PMD_SIZE THPs are currently
* supported. This will need to be updated to support other THP sizes.
*
* Returns the size of the mapping.
*/
static unsigned long
transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
unsigned long hva, kvm_pfn_t *pfnp,
phys_addr_t *ipap)
{
kvm_pfn_t pfn = *pfnp;
/*
* Make sure the adjustment is done only for THP pages. Also make
* sure that the HVA and IPA are sufficiently aligned and that the
* block map is contained within the memslot.
*/
if (kvm_is_transparent_hugepage(pfn) &&
fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
/*
* The address we faulted on is backed by a transparent huge
* page. However, because we map the compound huge page and
* not the individual tail page, we need to transfer the
* refcount to the head page. We have to be careful that the
* THP doesn't start to split while we are adjusting the
* refcounts.
*
* We are sure this doesn't happen, because mmu_notifier_retry
* was successful and we are holding the mmu_lock, so if this
* THP is trying to split, it will be blocked in the mmu
* notifier before touching any of the pages, specifically
* before being able to call __split_huge_page_refcount().
*
* We can therefore safely transfer the refcount from PG_tail
* to PG_head and switch the pfn from a tail page to the head
* page accordingly.
*/
*ipap &= PMD_MASK;
kvm_release_pfn_clean(pfn);
pfn &= ~(PTRS_PER_PMD - 1);
kvm_get_pfn(pfn);
*pfnp = pfn;
return PMD_SIZE;
}
/* Use page mapping if we cannot use block mapping. */
return PAGE_SIZE;
}
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
@ -1769,20 +1788,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (mmu_notifier_retry(kvm, mmu_seq))
goto out_unlock;
if (vma_pagesize == PAGE_SIZE && !force_pte) {
/*
* Only PMD_SIZE transparent hugepages(THP) are
* currently supported. This code will need to be
* updated to support other THP sizes.
*
* Make sure the host VA and the guest IPA are sufficiently
* aligned and that the block is contained within the memslot.
*/
if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
transparent_hugepage_adjust(&pfn, &fault_ipa))
vma_pagesize = PMD_SIZE;
}
/*
* If we are not forced to use page mapping, check if we are
* backed by a THP and thus use block mapping if possible.
*/
if (vma_pagesize == PAGE_SIZE && !force_pte)
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
&pfn, &fault_ipa);
if (writable)
kvm_set_pfn_dirty(pfn);
@ -2185,11 +2197,11 @@ int kvm_mmu_init(void)
{
int err;
hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end);
hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
hyp_idmap_vector = __pa_symbol(__kvm_hyp_init);
/*
* We rely on the linker script to ensure at build time that the HYP
@ -2262,11 +2274,19 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
/*
* At this point memslot has been committed and there is an
* allocated dirty_bitmap[], dirty pages will be be tracked while the
* allocated dirty_bitmap[], dirty pages will be tracked while the
* memory slot is write protected.
*/
if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
kvm_mmu_wp_memory_region(kvm, mem->slot);
if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
/*
* If we're with initial-all-set, we don't need to write
* protect any pages because they're all reported as dirty.
* Huge pages and normal pages will be write protect gradually.
*/
if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
kvm_mmu_wp_memory_region(kvm, mem->slot);
}
}
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,

View file

@ -94,7 +94,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
/*
* NOTE: We always update r0 (or x0) because for PSCI v0.1
* the general puspose registers are undefined upon CPU_ON.
* the general purpose registers are undefined upon CPU_ON.
*/
reset_state->r0 = smccc_get_arg3(source_vcpu);
@ -265,10 +265,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
case PSCI_0_2_FN_SYSTEM_OFF:
kvm_psci_system_off(vcpu);
/*
* We should'nt be going back to guest VCPU after
* We shouldn't be going back to guest VCPU after
* receiving SYSTEM_OFF request.
*
* If user space accidently/deliberately resumes
* If user space accidentally/deliberately resumes
* guest VCPU after SYSTEM_OFF request then guest
* VCPU should see internal failure from PSCI return
* value. To achieve this, we preload r0 (or x0) with

View file

@ -36,15 +36,11 @@ static u32 kvm_ipa_limit;
/*
* ARMv8 Reset Values
*/
static const struct kvm_regs default_regs_reset = {
.regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT |
PSR_F_BIT | PSR_D_BIT),
};
#define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
PSR_F_BIT | PSR_D_BIT)
static const struct kvm_regs default_regs_reset32 = {
.regs.pstate = (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT |
PSR_AA32_I_BIT | PSR_AA32_F_BIT),
};
#define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
PSR_AA32_I_BIT | PSR_AA32_F_BIT)
static bool cpu_has_32bit_el1(void)
{
@ -163,7 +159,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
vl = vcpu->arch.sve_max_vl;
/*
* Resposibility for these properties is shared between
* Responsibility for these properties is shared between
* kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and
* set_sve_vls(). Double-check here just to be sure:
*/
@ -249,7 +245,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
* ioctl or as part of handling a request issued by another VCPU in the PSCI
* handling code. In the first case, the VCPU will not be loaded, and in the
* second case the VCPU will be loaded. Because this function operates purely
* on the memory-backed valus of system registers, we want to do a full put if
* on the memory-backed values of system registers, we want to do a full put if
* we were loaded (handling a request) and load the values back at the end of
* the function. Otherwise we leave the state alone. In both cases, we
* disable preemption around the vcpu reset as we would otherwise race with
@ -257,9 +253,9 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
*/
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
const struct kvm_regs *cpu_reset;
int ret = -EINVAL;
bool loaded;
u32 pstate;
/* Reset PMU outside of the non-preemptible section */
kvm_pmu_vcpu_reset(vcpu);
@ -290,16 +286,17 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
if (!cpu_has_32bit_el1())
goto out;
cpu_reset = &default_regs_reset32;
pstate = VCPU_RESET_PSTATE_SVC;
} else {
cpu_reset = &default_regs_reset;
pstate = VCPU_RESET_PSTATE_EL1;
}
break;
}
/* Reset core registers */
memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset));
memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
vcpu_gp_regs(vcpu)->regs.pstate = pstate;
/* Reset system registers */
kvm_reset_sys_regs(vcpu);
@ -357,7 +354,7 @@ void kvm_set_ipa_limit(void)
*
* So clamp the ipa limit further down to limit the number of levels.
* Since we can concatenate upto 16 tables at entry level, we could
* go upto 4bits above the maximum VA addressible with the current
* go upto 4bits above the maximum VA addressable with the current
* number of levels.
*/
va_max = PGDIR_SHIFT + PAGE_SHIFT - 3;

View file

@ -34,7 +34,7 @@
#include "trace.h"
/*
* All of this file is extremly similar to the ARM coproc.c, but the
* All of this file is extremely similar to the ARM coproc.c, but the
* types are different. My gut feeling is that it should be pretty
* easy to merge, but that would be an ABI breakage -- again. VFP
* would also need to be abstracted.
@ -64,11 +64,8 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
return false;
}
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
{
if (!vcpu->arch.sysregs_loaded_on_cpu)
goto immediate_read;
/*
* System registers listed in the switch are not saved on every
* exit from the guest but are only saved on vcpu_put.
@ -79,75 +76,92 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
* thread when emulating cross-VCPU communication.
*/
switch (reg) {
case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1);
case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12);
case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1);
case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12);
case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12);
case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12);
case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12);
case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12);
case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12);
case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12);
case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12);
case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12);
case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12);
case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12);
case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0);
case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0);
case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1);
case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12);
case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12);
case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1);
case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2);
case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2);
case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2);
case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break;
case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break;
case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1); break;
case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break;
case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break;
case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break;
case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break;
case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break;
case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break;
case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break;
case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break;
case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break;
case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break;
case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break;
case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break;
case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break;
case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break;
case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
default: return false;
}
immediate_read:
return __vcpu_sys_reg(vcpu, reg);
return true;
}
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
{
if (!vcpu->arch.sysregs_loaded_on_cpu)
goto immediate_write;
/*
* System registers listed in the switch are not restored on every
* entry to the guest but are only restored on vcpu_load.
*
* Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
* should never be listed below, because the the MPIDR should only be
* set once, before running the VCPU, and never changed later.
* should never be listed below, because the MPIDR should only be set
* once, before running the VCPU, and never changed later.
*/
switch (reg) {
case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return;
case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return;
case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return;
case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return;
case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return;
case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return;
case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return;
case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return;
case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return;
case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return;
case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return;
case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return;
case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return;
case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return;
case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return;
case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return;
case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return;
case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return;
case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return;
case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return;
case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return;
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return;
case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return;
case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break;
case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break;
case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break;
case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break;
case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break;
case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break;
case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break;
case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break;
case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break;
case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break;
case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break;
case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break;
case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break;
case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break;
case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break;
case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
default: return false;
}
immediate_write:
return true;
}
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
{
u64 val = 0x8badf00d8badf00d;
if (vcpu->arch.sysregs_loaded_on_cpu &&
__vcpu_read_sys_reg_from_cpu(reg, &val))
return val;
return __vcpu_sys_reg(vcpu, reg);
}
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
{
if (vcpu->arch.sysregs_loaded_on_cpu &&
__vcpu_write_sys_reg_to_cpu(val, reg))
return;
__vcpu_sys_reg(vcpu, reg) = val;
}
@ -1532,7 +1546,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
{ SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 },
{ SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, NULL, PMINTENSET_EL1 },
{ SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 },
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
@ -1571,8 +1585,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 },
{ SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
{ SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 },
{ SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 },
{ SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
{ SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 },
{ SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 },
{ SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 },
{ SYS_DESC(SYS_PMCEID0_EL0), access_pmceid },
@ -2073,12 +2087,37 @@ static const struct sys_reg_desc cp15_64_regs[] = {
{ SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer },
};
static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
bool is_32)
{
unsigned int i;
for (i = 0; i < n; i++) {
if (!is_32 && table[i].reg && !table[i].reset) {
kvm_err("sys_reg table %p entry %d has lacks reset\n",
table, i);
return 1;
}
if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
return 1;
}
}
return 0;
}
/* Target specific emulation tables */
static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS];
void kvm_register_target_sys_reg_table(unsigned int target,
struct kvm_sys_reg_target_table *table)
{
if (check_sysreg_table(table->table64.table, table->table64.num, false) ||
check_sysreg_table(table->table32.table, table->table32.num, true))
return;
target_tables[target] = table;
}
@ -2364,19 +2403,13 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
}
static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *table, size_t num,
unsigned long *bmap)
const struct sys_reg_desc *table, size_t num)
{
unsigned long i;
for (i = 0; i < num; i++)
if (table[i].reset) {
int reg = table[i].reg;
if (table[i].reset)
table[i].reset(vcpu, &table[i]);
if (reg > 0 && reg < NR_SYS_REGS)
set_bit(reg, bmap);
}
}
/**
@ -2832,32 +2865,18 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
return write_demux_regids(uindices);
}
static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n)
{
unsigned int i;
for (i = 1; i < n; i++) {
if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
return 1;
}
}
return 0;
}
void kvm_sys_reg_table_init(void)
{
unsigned int i;
struct sys_reg_desc clidr;
/* Make sure tables are unique and in order. */
BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs)));
BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs)));
BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs)));
BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false));
BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true));
BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true));
BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true));
BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true));
BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false));
/* We abuse the reset function to overwrite the table itself. */
for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
@ -2893,17 +2912,10 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
{
size_t num;
const struct sys_reg_desc *table;
DECLARE_BITMAP(bmap, NR_SYS_REGS) = { 0, };
/* Generic chip reset first (so target could override). */
reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs), bmap);
reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
table = get_target_table(vcpu->arch.target, true, &num);
reset_sys_reg_descs(vcpu, table, num, bmap);
for (num = 1; num < NR_SYS_REGS; num++) {
if (WARN(!test_bit(num, bmap),
"Didn't reset __vcpu_sys_reg(%zi)\n", num))
break;
}
reset_sys_reg_descs(vcpu, table, num);
}

View file

@ -1,216 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
#if !defined(_TRACE_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#ifndef _TRACE_ARM64_KVM_H
#define _TRACE_ARM64_KVM_H
#include <linux/tracepoint.h>
#include "sys_regs.h"
#include "trace_arm.h"
#include "trace_handle_exit.h"
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
TRACE_EVENT(kvm_wfx_arm64,
TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
TP_ARGS(vcpu_pc, is_wfe),
TP_STRUCT__entry(
__field(unsigned long, vcpu_pc)
__field(bool, is_wfe)
),
TP_fast_assign(
__entry->vcpu_pc = vcpu_pc;
__entry->is_wfe = is_wfe;
),
TP_printk("guest executed wf%c at: 0x%08lx",
__entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
);
TRACE_EVENT(kvm_hvc_arm64,
TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
TP_ARGS(vcpu_pc, r0, imm),
TP_STRUCT__entry(
__field(unsigned long, vcpu_pc)
__field(unsigned long, r0)
__field(unsigned long, imm)
),
TP_fast_assign(
__entry->vcpu_pc = vcpu_pc;
__entry->r0 = r0;
__entry->imm = imm;
),
TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
__entry->vcpu_pc, __entry->r0, __entry->imm)
);
TRACE_EVENT(kvm_arm_setup_debug,
TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
TP_ARGS(vcpu, guest_debug),
TP_STRUCT__entry(
__field(struct kvm_vcpu *, vcpu)
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->vcpu = vcpu;
__entry->guest_debug = guest_debug;
),
TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
);
TRACE_EVENT(kvm_arm_clear_debug,
TP_PROTO(__u32 guest_debug),
TP_ARGS(guest_debug),
TP_STRUCT__entry(
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->guest_debug = guest_debug;
),
TP_printk("flags: 0x%08x", __entry->guest_debug)
);
TRACE_EVENT(kvm_arm_set_dreg32,
TP_PROTO(const char *name, __u32 value),
TP_ARGS(name, value),
TP_STRUCT__entry(
__field(const char *, name)
__field(__u32, value)
),
TP_fast_assign(
__entry->name = name;
__entry->value = value;
),
TP_printk("%s: 0x%08x", __entry->name, __entry->value)
);
TRACE_DEFINE_SIZEOF(__u64);
TRACE_EVENT(kvm_arm_set_regset,
TP_PROTO(const char *type, int len, __u64 *control, __u64 *value),
TP_ARGS(type, len, control, value),
TP_STRUCT__entry(
__field(const char *, name)
__field(int, len)
__array(u64, ctrls, 16)
__array(u64, values, 16)
),
TP_fast_assign(
__entry->name = type;
__entry->len = len;
memcpy(__entry->ctrls, control, len << 3);
memcpy(__entry->values, value, len << 3);
),
TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name,
__print_array(__entry->ctrls, __entry->len, sizeof(__u64)),
__print_array(__entry->values, __entry->len, sizeof(__u64)))
);
TRACE_EVENT(trap_reg,
TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value),
TP_ARGS(fn, reg, is_write, write_value),
TP_STRUCT__entry(
__field(const char *, fn)
__field(int, reg)
__field(bool, is_write)
__field(u64, write_value)
),
TP_fast_assign(
__entry->fn = fn;
__entry->reg = reg;
__entry->is_write = is_write;
__entry->write_value = write_value;
),
TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
);
TRACE_EVENT(kvm_handle_sys_reg,
TP_PROTO(unsigned long hsr),
TP_ARGS(hsr),
TP_STRUCT__entry(
__field(unsigned long, hsr)
),
TP_fast_assign(
__entry->hsr = hsr;
),
TP_printk("HSR 0x%08lx", __entry->hsr)
);
TRACE_EVENT(kvm_sys_access,
TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg),
TP_ARGS(vcpu_pc, params, reg),
TP_STRUCT__entry(
__field(unsigned long, vcpu_pc)
__field(bool, is_write)
__field(const char *, name)
__field(u8, Op0)
__field(u8, Op1)
__field(u8, CRn)
__field(u8, CRm)
__field(u8, Op2)
),
TP_fast_assign(
__entry->vcpu_pc = vcpu_pc;
__entry->is_write = params->is_write;
__entry->name = reg->name;
__entry->Op0 = reg->Op0;
__entry->Op0 = reg->Op0;
__entry->Op1 = reg->Op1;
__entry->CRn = reg->CRn;
__entry->CRm = reg->CRm;
__entry->Op2 = reg->Op2;
),
TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s",
__entry->vcpu_pc, __entry->name ?: "UNKN",
__entry->Op0, __entry->Op1, __entry->CRn,
__entry->CRm, __entry->Op2,
__entry->is_write ? "write" : "read")
);
TRACE_EVENT(kvm_set_guest_debug,
TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
TP_ARGS(vcpu, guest_debug),
TP_STRUCT__entry(
__field(struct kvm_vcpu *, vcpu)
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->vcpu = vcpu;
__entry->guest_debug = guest_debug;
),
TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
);
#endif /* _TRACE_ARM64_KVM_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace
/* This part must be outside protection */
#include <trace/define_trace.h>
#endif /* _TRACE_ARM64_KVM_H */

View file

@ -1,10 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KVM_H
#if !defined(_TRACE_ARM_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_ARM_ARM64_KVM_H
#include <kvm/arm_arch_timer.h>
#include <linux/tracepoint.h>
#include <asm/kvm_arm.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
@ -368,12 +367,12 @@ TRACE_EVENT(kvm_timer_emulate,
__entry->timer_idx, __entry->should_fire)
);
#endif /* _TRACE_KVM_H */
#endif /* _TRACE_ARM_ARM64_KVM_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../virt/kvm/arm
#define TRACE_INCLUDE_PATH .
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace
#define TRACE_INCLUDE_FILE trace_arm
/* This part must be outside protection */
#include <trace/define_trace.h>

View file

@ -0,0 +1,215 @@
/* SPDX-License-Identifier: GPL-2.0 */
#if !defined(_TRACE_HANDLE_EXIT_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_HANDLE_EXIT_ARM64_KVM_H
#include <linux/tracepoint.h>
#include "sys_regs.h"
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
TRACE_EVENT(kvm_wfx_arm64,
TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
TP_ARGS(vcpu_pc, is_wfe),
TP_STRUCT__entry(
__field(unsigned long, vcpu_pc)
__field(bool, is_wfe)
),
TP_fast_assign(
__entry->vcpu_pc = vcpu_pc;
__entry->is_wfe = is_wfe;
),
TP_printk("guest executed wf%c at: 0x%08lx",
__entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
);
TRACE_EVENT(kvm_hvc_arm64,
TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
TP_ARGS(vcpu_pc, r0, imm),
TP_STRUCT__entry(
__field(unsigned long, vcpu_pc)
__field(unsigned long, r0)
__field(unsigned long, imm)
),
TP_fast_assign(
__entry->vcpu_pc = vcpu_pc;
__entry->r0 = r0;
__entry->imm = imm;
),
TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
__entry->vcpu_pc, __entry->r0, __entry->imm)
);
TRACE_EVENT(kvm_arm_setup_debug,
TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
TP_ARGS(vcpu, guest_debug),
TP_STRUCT__entry(
__field(struct kvm_vcpu *, vcpu)
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->vcpu = vcpu;
__entry->guest_debug = guest_debug;
),
TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
);
TRACE_EVENT(kvm_arm_clear_debug,
TP_PROTO(__u32 guest_debug),
TP_ARGS(guest_debug),
TP_STRUCT__entry(
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->guest_debug = guest_debug;
),
TP_printk("flags: 0x%08x", __entry->guest_debug)
);
TRACE_EVENT(kvm_arm_set_dreg32,
TP_PROTO(const char *name, __u32 value),
TP_ARGS(name, value),
TP_STRUCT__entry(
__field(const char *, name)
__field(__u32, value)
),
TP_fast_assign(
__entry->name = name;
__entry->value = value;
),
TP_printk("%s: 0x%08x", __entry->name, __entry->value)
);
TRACE_DEFINE_SIZEOF(__u64);
TRACE_EVENT(kvm_arm_set_regset,
TP_PROTO(const char *type, int len, __u64 *control, __u64 *value),
TP_ARGS(type, len, control, value),
TP_STRUCT__entry(
__field(const char *, name)
__field(int, len)
__array(u64, ctrls, 16)
__array(u64, values, 16)
),
TP_fast_assign(
__entry->name = type;
__entry->len = len;
memcpy(__entry->ctrls, control, len << 3);
memcpy(__entry->values, value, len << 3);
),
TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name,
__print_array(__entry->ctrls, __entry->len, sizeof(__u64)),
__print_array(__entry->values, __entry->len, sizeof(__u64)))
);
TRACE_EVENT(trap_reg,
TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value),
TP_ARGS(fn, reg, is_write, write_value),
TP_STRUCT__entry(
__field(const char *, fn)
__field(int, reg)
__field(bool, is_write)
__field(u64, write_value)
),
TP_fast_assign(
__entry->fn = fn;
__entry->reg = reg;
__entry->is_write = is_write;
__entry->write_value = write_value;
),
TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
);
TRACE_EVENT(kvm_handle_sys_reg,
TP_PROTO(unsigned long hsr),
TP_ARGS(hsr),
TP_STRUCT__entry(
__field(unsigned long, hsr)
),
TP_fast_assign(
__entry->hsr = hsr;
),
TP_printk("HSR 0x%08lx", __entry->hsr)
);
TRACE_EVENT(kvm_sys_access,
TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg),
TP_ARGS(vcpu_pc, params, reg),
TP_STRUCT__entry(
__field(unsigned long, vcpu_pc)
__field(bool, is_write)
__field(const char *, name)
__field(u8, Op0)
__field(u8, Op1)
__field(u8, CRn)
__field(u8, CRm)
__field(u8, Op2)
),
TP_fast_assign(
__entry->vcpu_pc = vcpu_pc;
__entry->is_write = params->is_write;
__entry->name = reg->name;
__entry->Op0 = reg->Op0;
__entry->Op0 = reg->Op0;
__entry->Op1 = reg->Op1;
__entry->CRn = reg->CRn;
__entry->CRm = reg->CRm;
__entry->Op2 = reg->Op2;
),
TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s",
__entry->vcpu_pc, __entry->name ?: "UNKN",
__entry->Op0, __entry->Op1, __entry->CRn,
__entry->CRm, __entry->Op2,
__entry->is_write ? "write" : "read")
);
TRACE_EVENT(kvm_set_guest_debug,
TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
TP_ARGS(vcpu, guest_debug),
TP_STRUCT__entry(
__field(struct kvm_vcpu *, vcpu)
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->vcpu = vcpu;
__entry->guest_debug = guest_debug;
),
TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
);
#endif /* _TRACE_HANDLE_EXIT_ARM64_KVM_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace_handle_exit
/* This part must be outside protection */
#include <trace/define_trace.h>

View file

@ -7,7 +7,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
#include "vgic.h"
#include "vgic/vgic.h"
#include "sys_regs.h"
static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,

View file

@ -30,7 +30,7 @@ TRACE_EVENT(vgic_update_irq_pending,
#endif /* _TRACE_VGIC_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../virt/kvm/arm/vgic
#define TRACE_INCLUDE_PATH ../../arch/arm64/kvm/vgic
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace

View file

@ -56,7 +56,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
cpuif->vgic_hcr &= ~GICH_HCR_UIE;
for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
for (lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) {
u32 val = cpuif->vgic_lr[lr];
u32 cpuid, intid = val & GICH_LR_VIRTUALID;
struct vgic_irq *irq;
@ -120,7 +120,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
vgic_put_irq(vcpu->kvm, irq);
}
vgic_cpu->used_lrs = 0;
cpuif->used_lrs = 0;
}
/*
@ -427,7 +427,7 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
u64 used_lrs = cpu_if->used_lrs;
u64 elrsr;
int i;
@ -448,7 +448,7 @@ static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
void vgic_v2_save_state(struct kvm_vcpu *vcpu)
{
void __iomem *base = kvm_vgic_global_state.vctrl_base;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
u64 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
if (!base)
return;
@ -463,7 +463,7 @@ void vgic_v2_restore_state(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
void __iomem *base = kvm_vgic_global_state.vctrl_base;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
u64 used_lrs = cpu_if->used_lrs;
int i;
if (!base)

View file

@ -39,7 +39,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
cpuif->vgic_hcr &= ~ICH_HCR_UIE;
for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
for (lr = 0; lr < cpuif->used_lrs; lr++) {
u64 val = cpuif->vgic_lr[lr];
u32 intid, cpuid;
struct vgic_irq *irq;
@ -111,7 +111,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
vgic_put_irq(vcpu->kvm, irq);
}
vgic_cpu->used_lrs = 0;
cpuif->used_lrs = 0;
}
/* Requires the irq to be locked already */
@ -587,7 +587,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
int ret;
/*
* The ListRegs field is 5 bits, but there is a architectural
* The ListRegs field is 5 bits, but there is an architectural
* maximum of 16 list registers. Just ignore bit 4...
*/
kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
@ -630,12 +630,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
if (kvm_vgic_global_state.vcpu_base == 0)
kvm_info("disabling GICv2 emulation\n");
#ifdef CONFIG_ARM64
if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
group0_trap = true;
group1_trap = true;
}
#endif
if (group0_trap || group1_trap || common_trap) {
kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n",
@ -664,10 +662,10 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
if (likely(cpu_if->vgic_sre))
kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
kvm_call_hyp(__vgic_v3_restore_aprs, vcpu);
kvm_call_hyp(__vgic_v3_restore_aprs, kern_hyp_va(cpu_if));
if (has_vhe())
__vgic_v3_activate_traps(vcpu);
__vgic_v3_activate_traps(cpu_if);
WARN_ON(vgic_v4_load(vcpu));
}
@ -682,12 +680,14 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
void vgic_v3_put(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
WARN_ON(vgic_v4_put(vcpu, false));
vgic_v3_vmcr_sync(vcpu);
kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
kvm_call_hyp(__vgic_v3_save_aprs, kern_hyp_va(cpu_if));
if (has_vhe())
__vgic_v3_deactivate_traps(vcpu);
__vgic_v3_deactivate_traps(cpu_if);
}

View file

@ -786,6 +786,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
int count;
bool multi_sgi;
u8 prio = 0xff;
int i = 0;
lockdep_assert_held(&vgic_cpu->ap_list_lock);
@ -827,11 +828,14 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
}
}
vcpu->arch.vgic_cpu.used_lrs = count;
/* Nuke remaining LRs */
for ( ; count < kvm_vgic_global_state.nr_lr; count++)
vgic_clear_lr(vcpu, count);
for (i = count ; i < kvm_vgic_global_state.nr_lr; i++)
vgic_clear_lr(vcpu, i);
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count;
else
vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count;
}
static inline bool can_access_vgic_from_kernel(void)
@ -849,13 +853,13 @@ static inline void vgic_save_state(struct kvm_vcpu *vcpu)
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vgic_v2_save_state(vcpu);
else
__vgic_v3_save_state(vcpu);
__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
}
/* Sync back the hardware VGIC state into our emulation after a guest's run. */
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
int used_lrs;
/* An empty ap_list_head implies used_lrs == 0 */
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
@ -864,7 +868,12 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
if (can_access_vgic_from_kernel())
vgic_save_state(vcpu);
if (vgic_cpu->used_lrs)
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
else
used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
if (used_lrs)
vgic_fold_lr_state(vcpu);
vgic_prune_ap_list(vcpu);
}
@ -874,7 +883,7 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vgic_v2_restore_state(vcpu);
else
__vgic_v3_restore_state(vcpu);
__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
}
/* Flush our emulation state into the GIC hardware before entering the guest. */

View file

@ -274,6 +274,8 @@ struct vgic_v2_cpu_if {
u32 vgic_vmcr;
u32 vgic_apr;
u32 vgic_lr[VGIC_V2_MAX_LRS];
unsigned int used_lrs;
};
struct vgic_v3_cpu_if {
@ -291,6 +293,8 @@ struct vgic_v3_cpu_if {
* linking the Linux IRQ subsystem and the ITS together.
*/
struct its_vpe its_vpe;
unsigned int used_lrs;
};
struct vgic_cpu {
@ -300,7 +304,6 @@ struct vgic_cpu {
struct vgic_v3_cpu_if vgic_v3;
};
unsigned int used_lrs;
struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS];
raw_spinlock_t ap_list_lock; /* Protects the ap_list */

View file

@ -119,7 +119,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
/*
* We're using this spinlock to sync access to the coalesced ring.
* The list doesn't need it's own lock since device registration and
* The list doesn't need its own lock since device registration and
* unregistration should only happen when kvm->slots_lock is held.
*/
spin_lock_init(&kvm->ring_lock);

View file

@ -116,7 +116,7 @@ irqfd_shutdown(struct work_struct *work)
struct kvm *kvm = irqfd->kvm;
u64 cnt;
/* Make sure irqfd has been initalized in assign path. */
/* Make sure irqfd has been initialized in assign path. */
synchronize_srcu(&kvm->irq_srcu);
/*

View file

@ -2825,7 +2825,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
*
* (a) VCPU which has not done pl-exit or cpu relax intercepted recently
* (preempted lock holder), indicated by @in_spin_loop.
* Set at the beiginning and cleared at the end of interception/PLE handler.
* Set at the beginning and cleared at the end of interception/PLE handler.
*
* (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
* chance last time (mostly it has become eligible now since we have probably