Merge branch kvm-arm64/timer-vm-offsets into kvmarm-master/next

* kvm-arm64/timer-vm-offsets: (21 commits)
  : .
  : This series aims at satisfying multiple goals:
  :
  : - allow a VMM to atomically restore a timer offset for a whole VM
  :   instead of updating the offset each time a vcpu get its counter
  :   written
  :
  : - allow a VMM to save/restore the physical timer context, something
  :   that we cannot do at the moment due to the lack of offsetting
  :
  : - provide a framework that is suitable for NV support, where we get
  :   both global and per timer, per vcpu offsetting, and manage
  :   interrupts in a less braindead way.
  :
  : Conflict resolution involves using the new per-vcpu config lock instead
  : of the home-grown timer lock.
  : .
  KVM: arm64: Handle 32bit CNTPCTSS traps
  KVM: arm64: selftests: Augment existing timer test to handle variable offset
  KVM: arm64: selftests: Deal with spurious timer interrupts
  KVM: arm64: selftests: Add physical timer registers to the sysreg list
  KVM: arm64: nv: timers: Support hyp timer emulation
  KVM: arm64: nv: timers: Add a per-timer, per-vcpu offset
  KVM: arm64: Document KVM_ARM_SET_CNT_OFFSETS and co
  KVM: arm64: timers: Abstract the number of valid timers per vcpu
  KVM: arm64: timers: Fast-track CNTPCT_EL0 trap handling
  KVM: arm64: Elide kern_hyp_va() in VHE-specific parts of the hypervisor
  KVM: arm64: timers: Move the timer IRQs into arch_timer_vm_data
  KVM: arm64: timers: Abstract per-timer IRQ access
  KVM: arm64: timers: Rationalise per-vcpu timer init
  KVM: arm64: timers: Allow save/restoring of the physical timer
  KVM: arm64: timers: Allow userspace to set the global counter offset
  KVM: arm64: Expose {un,}lock_all_vcpus() to the rest of KVM
  KVM: arm64: timers: Allow physical offset without CNTPOFF_EL2
  KVM: arm64: timers: Use CNTPOFF_EL2 to offset the physical timer
  arm64: Add HAS_ECV_CNTPOFF capability
  arm64: Add CNTPOFF_EL2 register definition
  ...

Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Marc Zyngier 2023-04-21 09:31:17 +01:00
commit b22498c484
25 changed files with 759 additions and 205 deletions

View file

@ -6029,6 +6029,44 @@ delivery must be provided via the "reg_aen" struct.
The "pad" and "reserved" fields may be used for future extensions and should be
set to 0s by userspace.
4.138 KVM_ARM_SET_COUNTER_OFFSET
--------------------------------
:Capability: KVM_CAP_COUNTER_OFFSET
:Architectures: arm64
:Type: vm ioctl
:Parameters: struct kvm_arm_counter_offset (in)
:Returns: 0 on success, < 0 on error
This capability indicates that userspace is able to apply a single VM-wide
offset to both the virtual and physical counters as viewed by the guest
using the KVM_ARM_SET_CNT_OFFSET ioctl and the following data structure:
::
struct kvm_arm_counter_offset {
__u64 counter_offset;
__u64 reserved;
};
The offset describes a number of counter cycles that are subtracted from
both virtual and physical counter views (similar to the effects of the
CNTVOFF_EL2 and CNTPOFF_EL2 system registers, but only global). The offset
always applies to all vcpus (already created or created after this ioctl)
for this VM.
It is userspace's responsibility to compute the offset based, for example,
on previous values of the guest counters.
Any value other than 0 for the "reserved" field may result in an error
(-EINVAL) being returned. This ioctl can also return -EBUSY if any vcpu
ioctl is issued concurrently.
Note that using this ioctl results in KVM ignoring subsequent userspace
writes to the CNTVCT_EL0 and CNTPCT_EL0 registers using the SET_ONE_REG
interface. No error will be returned, but the resulting offset will not be
applied.
5. The kvm_run structure
========================

View file

@ -224,6 +224,10 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_EL1_32BIT 4
/* PSCI SYSTEM_SUSPEND enabled for the guest */
#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5
/* VM counter offset */
#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6
/* Timer PPIs made immutable */
#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7
unsigned long flags;
@ -368,6 +372,10 @@ enum vcpu_sysreg {
TPIDR_EL2, /* EL2 Software Thread ID Register */
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
SP_EL2, /* EL2 Stack Pointer */
CNTHP_CTL_EL2,
CNTHP_CVAL_EL2,
CNTHV_CTL_EL2,
CNTHV_CVAL_EL2,
NR_SYS_REGS /* Nothing after this line! */
};
@ -926,6 +934,9 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
int __init kvm_sys_reg_table_init(void);
bool lock_all_vcpus(struct kvm *kvm);
void unlock_all_vcpus(struct kvm *kvm);
/* MMIO helpers */
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
@ -1011,6 +1022,8 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
struct kvm_arm_copy_mte_tags *copy_tags);
int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
struct kvm_arm_counter_offset *offset);
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);

View file

@ -63,6 +63,7 @@
* specific registers encoded in the instructions).
*/
.macro kern_hyp_va reg
#ifndef __KVM_VHE_HYPERVISOR__
alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask
and \reg, \reg, #1 /* mask with va_mask */
ror \reg, \reg, #1 /* rotate to the first tag bit */
@ -70,6 +71,7 @@ alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask
add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */
ror \reg, \reg, #63 /* rotate back */
alternative_cb_end
#endif
.endm
/*
@ -127,6 +129,7 @@ void kvm_apply_hyp_relocations(void);
static __always_inline unsigned long __kern_hyp_va(unsigned long v)
{
#ifndef __KVM_VHE_HYPERVISOR__
asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
"ror %0, %0, #1\n"
"add %0, %0, #0\n"
@ -135,6 +138,7 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
ARM64_ALWAYS_SYSTEM,
kvm_update_va_mask)
: "+r" (v));
#endif
return v;
}

View file

@ -388,6 +388,7 @@
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1)
#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5)
#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6)
@ -400,7 +401,9 @@
#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0)
#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0)
#define __PMEV_op2(n) ((n) & 0x7)
#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3))

View file

@ -198,6 +198,15 @@ struct kvm_arm_copy_mte_tags {
__u64 reserved[2];
};
/*
* Counter/Timer offset structure. Describe the virtual/physical offset.
* To be used with KVM_ARM_SET_COUNTER_OFFSET.
*/
struct kvm_arm_counter_offset {
__u64 counter_offset;
__u64 reserved;
};
#define KVM_ARM_TAGS_TO_GUEST 0
#define KVM_ARM_TAGS_FROM_GUEST 1
@ -411,6 +420,8 @@ enum {
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2
#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3
#define KVM_ARM_VCPU_PVTIME_CTRL 2
#define KVM_ARM_VCPU_PVTIME_IPA 0

View file

@ -2223,6 +2223,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sign = FTR_UNSIGNED,
.min_field_value = 1,
},
{
.desc = "Enhanced Counter Virtualization (CNTPOFF)",
.capability = ARM64_HAS_ECV_CNTPOFF,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR0_EL1,
.field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT,
.field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = ID_AA64MMFR0_EL1_ECV_CNTPOFF,
},
#ifdef CONFIG_ARM64_PAN
{
.desc = "Privileged Access Never",

View file

@ -16,6 +16,7 @@
#include <asm/arch_timer.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_nested.h>
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
@ -30,14 +31,11 @@ static u32 host_ptimer_irq_flags;
static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
static const struct kvm_irq_level default_ptimer_irq = {
.irq = 30,
.level = 1,
};
static const struct kvm_irq_level default_vtimer_irq = {
.irq = 27,
.level = 1,
static const u8 default_ppi[] = {
[TIMER_PTIMER] = 30,
[TIMER_VTIMER] = 27,
[TIMER_HPTIMER] = 26,
[TIMER_HVTIMER] = 28,
};
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
@ -51,6 +49,24 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
struct arch_timer_context *timer,
enum kvm_arch_timer_regs treg);
static bool kvm_arch_timer_get_input_level(int vintid);
static struct irq_ops arch_timer_irq_ops = {
.get_input_level = kvm_arch_timer_get_input_level,
};
static bool has_cntpoff(void)
{
return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
}
static int nr_timers(struct kvm_vcpu *vcpu)
{
if (!vcpu_has_nv(vcpu))
return NR_KVM_EL0_TIMERS;
return NR_KVM_TIMERS;
}
u32 timer_get_ctl(struct arch_timer_context *ctxt)
{
@ -61,6 +77,10 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt)
return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
case TIMER_PTIMER:
return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
case TIMER_HVTIMER:
return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2);
case TIMER_HPTIMER:
return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2);
default:
WARN_ON(1);
return 0;
@ -76,6 +96,10 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
case TIMER_PTIMER:
return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
case TIMER_HVTIMER:
return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2);
case TIMER_HPTIMER:
return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
default:
WARN_ON(1);
return 0;
@ -84,10 +108,17 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
static u64 timer_get_offset(struct arch_timer_context *ctxt)
{
if (ctxt->offset.vm_offset)
return *ctxt->offset.vm_offset;
u64 offset = 0;
return 0;
if (!ctxt)
return 0;
if (ctxt->offset.vm_offset)
offset += *ctxt->offset.vm_offset;
if (ctxt->offset.vcpu_offset)
offset += *ctxt->offset.vcpu_offset;
return offset;
}
static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
@ -101,6 +132,12 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
case TIMER_PTIMER:
__vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl;
break;
case TIMER_HVTIMER:
__vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl;
break;
case TIMER_HPTIMER:
__vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl;
break;
default:
WARN_ON(1);
}
@ -117,6 +154,12 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
case TIMER_PTIMER:
__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval;
break;
case TIMER_HVTIMER:
__vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval;
break;
case TIMER_HPTIMER:
__vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval;
break;
default:
WARN_ON(1);
}
@ -139,13 +182,27 @@ u64 kvm_phys_timer_read(void)
static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
{
if (has_vhe()) {
if (vcpu_has_nv(vcpu)) {
if (is_hyp_ctxt(vcpu)) {
map->direct_vtimer = vcpu_hvtimer(vcpu);
map->direct_ptimer = vcpu_hptimer(vcpu);
map->emul_vtimer = vcpu_vtimer(vcpu);
map->emul_ptimer = vcpu_ptimer(vcpu);
} else {
map->direct_vtimer = vcpu_vtimer(vcpu);
map->direct_ptimer = vcpu_ptimer(vcpu);
map->emul_vtimer = vcpu_hvtimer(vcpu);
map->emul_ptimer = vcpu_hptimer(vcpu);
}
} else if (has_vhe()) {
map->direct_vtimer = vcpu_vtimer(vcpu);
map->direct_ptimer = vcpu_ptimer(vcpu);
map->emul_vtimer = NULL;
map->emul_ptimer = NULL;
} else {
map->direct_vtimer = vcpu_vtimer(vcpu);
map->direct_ptimer = NULL;
map->emul_vtimer = NULL;
map->emul_ptimer = vcpu_ptimer(vcpu);
}
@ -212,7 +269,7 @@ static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
ns = cyclecounter_cyc2ns(timecounter->cc,
val - now,
timecounter->mask,
&timecounter->frac);
&timer_ctx->ns_frac);
return ns;
}
@ -240,8 +297,11 @@ static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *ctx = vcpu_vtimer(vcpu);
u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
struct arch_timer_context *ctx;
ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu)
: vcpu_vtimer(vcpu);
return kvm_counter_compute_delta(ctx, val);
}
@ -255,7 +315,7 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
u64 min_delta = ULLONG_MAX;
int i;
for (i = 0; i < NR_KVM_TIMERS; i++) {
for (i = 0; i < nr_timers(vcpu); i++) {
struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
WARN(ctx->loaded, "timer %d loaded\n", i);
@ -338,9 +398,11 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
switch (index) {
case TIMER_VTIMER:
case TIMER_HVTIMER:
cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
break;
case TIMER_PTIMER:
case TIMER_HPTIMER:
cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
break;
case NR_KVM_TIMERS:
@ -392,12 +454,12 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
int ret;
timer_ctx->irq.level = new_level;
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
timer_ctx->irq.level);
if (!userspace_irqchip(vcpu->kvm)) {
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
timer_ctx->irq.irq,
timer_irq(timer_ctx),
timer_ctx->irq.level,
timer_ctx);
WARN_ON(ret);
@ -432,6 +494,12 @@ static void set_cntvoff(u64 cntvoff)
kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
}
static void set_cntpoff(u64 cntpoff)
{
if (has_cntpoff())
write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
}
static void timer_save_state(struct arch_timer_context *ctx)
{
struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
@ -447,7 +515,10 @@ static void timer_save_state(struct arch_timer_context *ctx)
goto out;
switch (index) {
u64 cval;
case TIMER_VTIMER:
case TIMER_HVTIMER:
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
@ -473,13 +544,20 @@ static void timer_save_state(struct arch_timer_context *ctx)
set_cntvoff(0);
break;
case TIMER_PTIMER:
case TIMER_HPTIMER:
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL));
cval = read_sysreg_el0(SYS_CNTP_CVAL);
if (!has_cntpoff())
cval -= timer_get_offset(ctx);
timer_set_cval(ctx, cval);
/* Disable the timer */
write_sysreg_el0(0, SYS_CNTP_CTL);
isb();
set_cntpoff(0);
break;
case NR_KVM_TIMERS:
BUG();
@ -510,6 +588,7 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
*/
if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
!kvm_timer_irq_can_fire(map.direct_ptimer) &&
!kvm_timer_irq_can_fire(map.emul_vtimer) &&
!kvm_timer_irq_can_fire(map.emul_ptimer) &&
!vcpu_has_wfit_active(vcpu))
return;
@ -543,14 +622,23 @@ static void timer_restore_state(struct arch_timer_context *ctx)
goto out;
switch (index) {
u64 cval, offset;
case TIMER_VTIMER:
case TIMER_HVTIMER:
set_cntvoff(timer_get_offset(ctx));
write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
break;
case TIMER_PTIMER:
write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL);
case TIMER_HPTIMER:
cval = timer_get_cval(ctx);
offset = timer_get_offset(ctx);
set_cntpoff(offset);
if (!has_cntpoff())
cval += offset;
write_sysreg_el0(cval, SYS_CNTP_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
break;
@ -586,7 +674,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
if (irqchip_in_kernel(vcpu->kvm))
phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq);
phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
phys_active |= ctx->irq.level;
@ -621,6 +709,128 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
}
/* If _pred is true, set bit in _set, otherwise set it in _clr */
#define assign_clear_set_bit(_pred, _bit, _clr, _set) \
do { \
if (_pred) \
(_set) |= (_bit); \
else \
(_clr) |= (_bit); \
} while (0)
static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
struct timer_map *map)
{
int hw, ret;
if (!irqchip_in_kernel(vcpu->kvm))
return;
/*
* We only ever unmap the vtimer irq on a VHE system that runs nested
* virtualization, in which case we have both a valid emul_vtimer,
* emul_ptimer, direct_vtimer, and direct_ptimer.
*
* Since this is called from kvm_timer_vcpu_load(), a change between
* vEL2 and vEL1/0 will have just happened, and the timer_map will
* represent this, and therefore we switch the emul/direct mappings
* below.
*/
hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer));
if (hw < 0) {
kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer));
kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer));
ret = kvm_vgic_map_phys_irq(vcpu,
map->direct_vtimer->host_timer_irq,
timer_irq(map->direct_vtimer),
&arch_timer_irq_ops);
WARN_ON_ONCE(ret);
ret = kvm_vgic_map_phys_irq(vcpu,
map->direct_ptimer->host_timer_irq,
timer_irq(map->direct_ptimer),
&arch_timer_irq_ops);
WARN_ON_ONCE(ret);
/*
* The virtual offset behaviour is "interresting", as it
* always applies when HCR_EL2.E2H==0, but only when
* accessed from EL1 when HCR_EL2.E2H==1. So make sure we
* track E2H when putting the HV timer in "direct" mode.
*/
if (map->direct_vtimer == vcpu_hvtimer(vcpu)) {
struct arch_timer_offset *offs = &map->direct_vtimer->offset;
if (vcpu_el2_e2h_is_set(vcpu))
offs->vcpu_offset = NULL;
else
offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
}
}
}
static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
{
bool tpt, tpc;
u64 clr, set;
/*
* No trapping gets configured here with nVHE. See
* __timer_enable_traps(), which is where the stuff happens.
*/
if (!has_vhe())
return;
/*
* Our default policy is not to trap anything. As we progress
* within this function, reality kicks in and we start adding
* traps based on emulation requirements.
*/
tpt = tpc = false;
/*
* We have two possibility to deal with a physical offset:
*
* - Either we have CNTPOFF (yay!) or the offset is 0:
* we let the guest freely access the HW
*
* - or neither of these condition apply:
* we trap accesses to the HW, but still use it
* after correcting the physical offset
*/
if (!has_cntpoff() && timer_get_offset(map->direct_ptimer))
tpt = tpc = true;
/*
* Apply the enable bits that the guest hypervisor has requested for
* its own guest. We can only add traps that wouldn't have been set
* above.
*/
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
/* Use the VHE format for mental sanity */
if (!vcpu_el2_e2h_is_set(vcpu))
val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
tpt |= !(val & (CNTHCTL_EL1PCEN << 10));
tpc |= !(val & (CNTHCTL_EL1PCTEN << 10));
}
/*
* Now that we have collected our requirements, compute the
* trap and enable bits.
*/
set = 0;
clr = 0;
assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
/* This only happens on VHE, so use the CNTKCTL_EL1 accessor */
sysreg_clear_set(cntkctl_el1, clr, set);
}
void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
@ -632,6 +842,9 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
get_timer_map(vcpu, &map);
if (static_branch_likely(&has_gic_active_state)) {
if (vcpu_has_nv(vcpu))
kvm_timer_vcpu_load_nested_switch(vcpu, &map);
kvm_timer_vcpu_load_gic(map.direct_vtimer);
if (map.direct_ptimer)
kvm_timer_vcpu_load_gic(map.direct_ptimer);
@ -644,9 +857,12 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
timer_restore_state(map.direct_vtimer);
if (map.direct_ptimer)
timer_restore_state(map.direct_ptimer);
if (map.emul_vtimer)
timer_emulate(map.emul_vtimer);
if (map.emul_ptimer)
timer_emulate(map.emul_ptimer);
timer_set_traps(vcpu, &map);
}
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
@ -689,6 +905,8 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
* In any case, we re-schedule the hrtimer for the physical timer when
* coming back to the VCPU thread in kvm_timer_vcpu_load().
*/
if (map.emul_vtimer)
soft_timer_cancel(&map.emul_vtimer->hrtimer);
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);
@ -738,56 +956,89 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
* resets the timer to be disabled and unmasked and is compliant with
* the ARMv7 architecture.
*/
timer_set_ctl(vcpu_vtimer(vcpu), 0);
timer_set_ctl(vcpu_ptimer(vcpu), 0);
for (int i = 0; i < nr_timers(vcpu); i++)
timer_set_ctl(vcpu_get_timer(vcpu, i), 0);
/*
* A vcpu running at EL2 is in charge of the offset applied to
* the virtual timer, so use the physical VM offset, and point
* the vcpu offset to CNTVOFF_EL2.
*/
if (vcpu_has_nv(vcpu)) {
struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
}
if (timer->enabled) {
kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu));
kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu));
for (int i = 0; i < nr_timers(vcpu); i++)
kvm_timer_update_irq(vcpu, false,
vcpu_get_timer(vcpu, i));
if (irqchip_in_kernel(vcpu->kvm)) {
kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq);
kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer));
if (map.direct_ptimer)
kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq);
kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer));
}
}
if (map.emul_vtimer)
soft_timer_cancel(&map.emul_vtimer->hrtimer);
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);
return 0;
}
static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
{
struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
struct kvm *kvm = vcpu->kvm;
ctxt->vcpu = vcpu;
if (timerid == TIMER_VTIMER)
ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
else
ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
ctxt->hrtimer.function = kvm_hrtimer_expire;
switch (timerid) {
case TIMER_PTIMER:
case TIMER_HPTIMER:
ctxt->host_timer_irq = host_ptimer_irq;
break;
case TIMER_VTIMER:
case TIMER_HVTIMER:
ctxt->host_timer_irq = host_vtimer_irq;
break;
}
}
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
vtimer->vcpu = vcpu;
vtimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.voffset;
ptimer->vcpu = vcpu;
for (int i = 0; i < NR_KVM_TIMERS; i++)
timer_context_init(vcpu, i);
/* Synchronize cntvoff across all vtimers of a VM. */
timer_set_offset(vtimer, kvm_phys_timer_read());
timer_set_offset(ptimer, 0);
/* Synchronize offsets across timers of a VM if not already provided */
if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read());
timer_set_offset(vcpu_ptimer(vcpu), 0);
}
hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
timer->bg_timer.function = kvm_bg_timer_expire;
}
hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
vtimer->hrtimer.function = kvm_hrtimer_expire;
ptimer->hrtimer.function = kvm_hrtimer_expire;
vtimer->irq.irq = default_vtimer_irq.irq;
ptimer->irq.irq = default_ptimer_irq.irq;
vtimer->host_timer_irq = host_vtimer_irq;
ptimer->host_timer_irq = host_ptimer_irq;
vtimer->host_timer_irq_flags = host_vtimer_irq_flags;
ptimer->host_timer_irq_flags = host_ptimer_irq_flags;
void kvm_timer_init_vm(struct kvm *kvm)
{
for (int i = 0; i < NR_KVM_TIMERS; i++)
kvm->arch.timer_data.ppi[i] = default_ppi[i];
}
void kvm_timer_cpu_up(void)
@ -814,8 +1065,11 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
break;
case KVM_REG_ARM_TIMER_CNT:
timer = vcpu_vtimer(vcpu);
timer_set_offset(timer, kvm_phys_timer_read() - value);
if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
&vcpu->kvm->arch.flags)) {
timer = vcpu_vtimer(vcpu);
timer_set_offset(timer, kvm_phys_timer_read() - value);
}
break;
case KVM_REG_ARM_TIMER_CVAL:
timer = vcpu_vtimer(vcpu);
@ -825,6 +1079,13 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
timer = vcpu_ptimer(vcpu);
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
break;
case KVM_REG_ARM_PTIMER_CNT:
if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
&vcpu->kvm->arch.flags)) {
timer = vcpu_ptimer(vcpu);
timer_set_offset(timer, kvm_phys_timer_read() - value);
}
break;
case KVM_REG_ARM_PTIMER_CVAL:
timer = vcpu_ptimer(vcpu);
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
@ -902,6 +1163,10 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
val = kvm_phys_timer_read() - timer_get_offset(timer);
break;
case TIMER_REG_VOFF:
val = *timer->offset.vcpu_offset;
break;
default:
BUG();
}
@ -920,7 +1185,7 @@ u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
get_timer_map(vcpu, &map);
timer = vcpu_get_timer(vcpu, tmr);
if (timer == map.emul_ptimer)
if (timer == map.emul_vtimer || timer == map.emul_ptimer)
return kvm_arm_timer_read(vcpu, timer, treg);
preempt_disable();
@ -952,6 +1217,10 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
timer_set_cval(timer, val);
break;
case TIMER_REG_VOFF:
*timer->offset.vcpu_offset = val;
break;
default:
BUG();
}
@ -967,7 +1236,7 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
get_timer_map(vcpu, &map);
timer = vcpu_get_timer(vcpu, tmr);
if (timer == map.emul_ptimer) {
if (timer == map.emul_vtimer || timer == map.emul_ptimer) {
soft_timer_cancel(&timer->hrtimer);
kvm_arm_timer_write(vcpu, timer, treg, val);
timer_emulate(timer);
@ -1047,10 +1316,6 @@ static const struct irq_domain_ops timer_domain_ops = {
.free = timer_irq_domain_free,
};
static struct irq_ops arch_timer_irq_ops = {
.get_input_level = kvm_arch_timer_get_input_level,
};
static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
{
*flags = irq_get_trigger_type(virq);
@ -1192,44 +1457,56 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
{
int vtimer_irq, ptimer_irq, ret;
unsigned long i;
u32 ppis = 0;
bool valid;
vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
if (ret)
return false;
mutex_lock(&vcpu->kvm->arch.config_lock);
ptimer_irq = vcpu_ptimer(vcpu)->irq.irq;
ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu));
if (ret)
return false;
for (int i = 0; i < nr_timers(vcpu); i++) {
struct arch_timer_context *ctx;
int irq;
kvm_for_each_vcpu(i, vcpu, vcpu->kvm) {
if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq ||
vcpu_ptimer(vcpu)->irq.irq != ptimer_irq)
return false;
ctx = vcpu_get_timer(vcpu, i);
irq = timer_irq(ctx);
if (kvm_vgic_set_owner(vcpu, irq, ctx))
break;
/*
* We know by construction that we only have PPIs, so
* all values are less than 32.
*/
ppis |= BIT(irq);
}
return true;
valid = hweight32(ppis) == nr_timers(vcpu);
if (valid)
set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags);
mutex_unlock(&vcpu->kvm->arch.config_lock);
return valid;
}
bool kvm_arch_timer_get_input_level(int vintid)
static bool kvm_arch_timer_get_input_level(int vintid)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
struct arch_timer_context *timer;
if (WARN(!vcpu, "No vcpu context!\n"))
return false;
if (vintid == vcpu_vtimer(vcpu)->irq.irq)
timer = vcpu_vtimer(vcpu);
else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
timer = vcpu_ptimer(vcpu);
else
BUG();
for (int i = 0; i < nr_timers(vcpu); i++) {
struct arch_timer_context *ctx;
return kvm_timer_should_fire(timer);
ctx = vcpu_get_timer(vcpu, i);
if (timer_irq(ctx) == vintid)
return kvm_timer_should_fire(ctx);
}
/* A timer IRQ has fired, but no matching timer was found? */
WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid);
return false;
}
int kvm_timer_enable(struct kvm_vcpu *vcpu)
@ -1258,7 +1535,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_vtimer->host_timer_irq,
map.direct_vtimer->irq.irq,
timer_irq(map.direct_vtimer),
&arch_timer_irq_ops);
if (ret)
return ret;
@ -1266,7 +1543,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
if (map.direct_ptimer) {
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_ptimer->host_timer_irq,
map.direct_ptimer->irq.irq,
timer_irq(map.direct_ptimer),
&arch_timer_irq_ops);
}
@ -1278,45 +1555,17 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
return 0;
}
/*
* On VHE system, we only need to configure the EL2 timer trap register once,
* not for every world switch.
* The host kernel runs at EL2 with HCR_EL2.TGE == 1,
* and this makes those bits have no effect for the host kernel execution.
*/
/* If we have CNTPOFF, permanently set ECV to enable it */
void kvm_timer_init_vhe(void)
{
/* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */
u32 cnthctl_shift = 10;
u64 val;
/*
* VHE systems allow the guest direct access to the EL1 physical
* timer/counter.
*/
val = read_sysreg(cnthctl_el2);
val |= (CNTHCTL_EL1PCEN << cnthctl_shift);
val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
write_sysreg(val, cnthctl_el2);
}
static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
{
struct kvm_vcpu *vcpu;
unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
vcpu_ptimer(vcpu)->irq.irq = ptimer_irq;
}
if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV);
}
int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
int __user *uaddr = (int __user *)(long)attr->addr;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
int irq;
int irq, idx, ret = 0;
if (!irqchip_in_kernel(vcpu->kvm))
return -EINVAL;
@ -1327,21 +1576,42 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
if (!(irq_is_ppi(irq)))
return -EINVAL;
if (vcpu->arch.timer_cpu.enabled)
return -EBUSY;
mutex_lock(&vcpu->kvm->arch.config_lock);
if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
&vcpu->kvm->arch.flags)) {
ret = -EBUSY;
goto out;
}
switch (attr->attr) {
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq);
idx = TIMER_VTIMER;
break;
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq);
idx = TIMER_PTIMER;
break;
case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
idx = TIMER_HVTIMER;
break;
case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
idx = TIMER_HPTIMER;
break;
default:
return -ENXIO;
ret = -ENXIO;
goto out;
}
return 0;
/*
* We cannot validate the IRQ unicity before we run, so take it at
* face value. The verdict will be given on first vcpu run, for each
* vcpu. Yes this is late. Blame it on the stupid API.
*/
vcpu->kvm->arch.timer_data.ppi[idx] = irq;
out:
mutex_unlock(&vcpu->kvm->arch.config_lock);
return ret;
}
int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
@ -1357,11 +1627,17 @@ int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
timer = vcpu_ptimer(vcpu);
break;
case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
timer = vcpu_hvtimer(vcpu);
break;
case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
timer = vcpu_hptimer(vcpu);
break;
default:
return -ENXIO;
}
irq = timer->irq.irq;
irq = timer_irq(timer);
return put_user(irq, uaddr);
}
@ -1370,8 +1646,42 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
switch (attr->attr) {
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
return 0;
}
return -ENXIO;
}
int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
struct kvm_arm_counter_offset *offset)
{
int ret = 0;
if (offset->reserved)
return -EINVAL;
mutex_lock(&kvm->lock);
if (lock_all_vcpus(kvm)) {
set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags);
/*
* If userspace decides to set the offset using this
* API rather than merely restoring the counter
* values, the offset applies to both the virtual and
* physical views.
*/
kvm->arch.timer_data.voffset = offset->counter_offset;
kvm->arch.timer_data.poffset = offset->counter_offset;
unlock_all_vcpus(kvm);
} else {
ret = -EBUSY;
}
mutex_unlock(&kvm->lock);
return ret;
}

View file

@ -158,6 +158,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_vgic_early_init(kvm);
kvm_timer_init_vm(kvm);
/* The maximum number of VCPUs is limited by the host's GIC model */
kvm->max_vcpus = kvm_arm_default_max_vcpus();
@ -230,6 +232,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_VCPU_ATTRIBUTES:
case KVM_CAP_PTP_KVM:
case KVM_CAP_ARM_SYSTEM_SUSPEND:
case KVM_CAP_COUNTER_OFFSET:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@ -1514,11 +1517,61 @@ long kvm_arch_vm_ioctl(struct file *filp,
return -EFAULT;
return kvm_vm_ioctl_mte_copy_tags(kvm, &copy_tags);
}
case KVM_ARM_SET_COUNTER_OFFSET: {
struct kvm_arm_counter_offset offset;
if (copy_from_user(&offset, argp, sizeof(offset)))
return -EFAULT;
return kvm_vm_ioctl_set_counter_offset(kvm, &offset);
}
default:
return -EINVAL;
}
}
/* unlocks vcpus from @vcpu_lock_idx and smaller */
static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
{
struct kvm_vcpu *tmp_vcpu;
for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
mutex_unlock(&tmp_vcpu->mutex);
}
}
void unlock_all_vcpus(struct kvm *kvm)
{
lockdep_assert_held(&kvm->lock);
unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
}
/* Returns true if all vcpus were locked, false otherwise */
bool lock_all_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *tmp_vcpu;
unsigned long c;
lockdep_assert_held(&kvm->lock);
/*
* Any time a vcpu is in an ioctl (including running), the
* core KVM code tries to grab the vcpu->mutex.
*
* By grabbing the vcpu->mutex of all VCPUs we ensure that no
* other VCPUs can fiddle with the state while we access it.
*/
kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
if (!mutex_trylock(&tmp_vcpu->mutex)) {
unlock_vcpus(kvm, c - 1);
return false;
}
}
return true;
}
static unsigned long nvhe_percpu_size(void)
{
return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) -

View file

@ -590,11 +590,16 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu)
return copy_core_reg_indices(vcpu, NULL);
}
/**
* ARM64 versions of the TIMER registers, always available on arm64
*/
static const u64 timer_reg_list[] = {
KVM_REG_ARM_TIMER_CTL,
KVM_REG_ARM_TIMER_CNT,
KVM_REG_ARM_TIMER_CVAL,
KVM_REG_ARM_PTIMER_CTL,
KVM_REG_ARM_PTIMER_CNT,
KVM_REG_ARM_PTIMER_CVAL,
};
#define NUM_TIMER_REGS 3
#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list)
static bool is_timer_reg(u64 index)
{
@ -602,6 +607,9 @@ static bool is_timer_reg(u64 index)
case KVM_REG_ARM_TIMER_CTL:
case KVM_REG_ARM_TIMER_CNT:
case KVM_REG_ARM_TIMER_CVAL:
case KVM_REG_ARM_PTIMER_CTL:
case KVM_REG_ARM_PTIMER_CNT:
case KVM_REG_ARM_PTIMER_CVAL:
return true;
}
return false;
@ -609,14 +617,11 @@ static bool is_timer_reg(u64 index)
static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
{
if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
return -EFAULT;
uindices++;
if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
return -EFAULT;
uindices++;
if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
return -EFAULT;
for (int i = 0; i < NUM_TIMER_REGS; i++) {
if (put_user(timer_reg_list[i], uindices))
return -EFAULT;
uindices++;
}
return 0;
}

View file

@ -26,6 +26,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_nested.h>
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
@ -326,6 +327,55 @@ static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
return true;
}
static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *ctxt;
u32 sysreg;
u64 val;
/*
* We only get here for 64bit guests, 32bit guests will hit
* the long and winding road all the way to the standard
* handling. Yes, it sucks to be irrelevant.
*/
sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
switch (sysreg) {
case SYS_CNTPCT_EL0:
case SYS_CNTPCTSS_EL0:
if (vcpu_has_nv(vcpu)) {
if (is_hyp_ctxt(vcpu)) {
ctxt = vcpu_hptimer(vcpu);
break;
}
/* Check for guest hypervisor trapping */
val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
if (!vcpu_el2_e2h_is_set(vcpu))
val = (val & CNTHCTL_EL1PCTEN) << 10;
if (!(val & (CNTHCTL_EL1PCTEN << 10)))
return false;
}
ctxt = vcpu_ptimer(vcpu);
break;
default:
return false;
}
val = arch_timer_read_cntpct_el0();
if (ctxt->offset.vm_offset)
val -= *kern_hyp_va(ctxt->offset.vm_offset);
if (ctxt->offset.vcpu_offset)
val -= *kern_hyp_va(ctxt->offset.vcpu_offset);
vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val);
__kvm_skip_instr(vcpu);
return true;
}
static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
@ -339,6 +389,9 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
return kvm_hyp_handle_ptrauth(vcpu, exit_code);
if (kvm_hyp_handle_cntpct(vcpu))
return true;
return false;
}

View file

@ -9,6 +9,7 @@
#include <linux/kvm_host.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
void __kvm_timer_set_cntvoff(u64 cntvoff)
{
@ -35,14 +36,19 @@ void __timer_disable_traps(struct kvm_vcpu *vcpu)
*/
void __timer_enable_traps(struct kvm_vcpu *vcpu)
{
u64 val;
u64 clr = 0, set = 0;
/*
* Disallow physical timer access for the guest
* Physical counter access is allowed
* Physical counter access is allowed if no offset is enforced
* or running protected (we don't offset anything in this case).
*/
val = read_sysreg(cnthctl_el2);
val &= ~CNTHCTL_EL1PCEN;
val |= CNTHCTL_EL1PCTEN;
write_sysreg(val, cnthctl_el2);
clr = CNTHCTL_EL1PCEN;
if (is_protected_kvm_enabled() ||
!kern_hyp_va(vcpu->kvm)->arch.timer_data.poffset)
set |= CNTHCTL_EL1PCTEN;
else
clr |= CNTHCTL_EL1PCTEN;
sysreg_clear_set(cnthctl_el2, clr, set);
}

View file

@ -47,7 +47,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.voffset;
break;
case KVM_PTP_PHYS_COUNTER:
cycles = systime_snapshot.cycles;
cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.poffset;
break;
default:
return;

View file

@ -1139,6 +1139,12 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
tmr = TIMER_PTIMER;
treg = TIMER_REG_CVAL;
break;
case SYS_CNTPCT_EL0:
case SYS_CNTPCTSS_EL0:
case SYS_AARCH32_CNTPCT:
tmr = TIMER_PTIMER;
treg = TIMER_REG_CNT;
break;
default:
print_sys_reg_msg(p, "%s", "Unhandled trapped timer register");
kvm_inject_undefined(vcpu);
@ -2075,6 +2081,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
AMU_AMEVTYPER1_EL0(14),
AMU_AMEVTYPER1_EL0(15),
{ SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer },
@ -2525,10 +2533,12 @@ static const struct sys_reg_desc cp15_64_regs[] = {
{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 },
{ CP15_PMU_SYS_REG(DIRECT, 0, 0, 9, 0), .access = access_pmu_evcntr },
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
{ SYS_DESC(SYS_AARCH32_CNTPCT), access_arch_timer },
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 },
{ Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
{ Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
{ SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer },
{ SYS_DESC(SYS_AARCH32_CNTPCTSS), access_arch_timer },
};
static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,

View file

@ -206,6 +206,7 @@ TRACE_EVENT(kvm_get_timer_map,
__field( unsigned long, vcpu_id )
__field( int, direct_vtimer )
__field( int, direct_ptimer )
__field( int, emul_vtimer )
__field( int, emul_ptimer )
),
@ -214,14 +215,17 @@ TRACE_EVENT(kvm_get_timer_map,
__entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer);
__entry->direct_ptimer =
(map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1;
__entry->emul_vtimer =
(map->emul_vtimer) ? arch_timer_ctx_index(map->emul_vtimer) : -1;
__entry->emul_ptimer =
(map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1;
),
TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d",
TP_printk("VCPU: %ld, dv: %d, dp: %d, ev: %d, ep: %d",
__entry->vcpu_id,
__entry->direct_vtimer,
__entry->direct_ptimer,
__entry->emul_vtimer,
__entry->emul_ptimer)
);

View file

@ -345,44 +345,6 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
return 0;
}
/* unlocks vcpus from @vcpu_lock_idx and smaller */
static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
{
struct kvm_vcpu *tmp_vcpu;
for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
mutex_unlock(&tmp_vcpu->mutex);
}
}
void unlock_all_vcpus(struct kvm *kvm)
{
unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
}
/* Returns true if all vcpus were locked, false otherwise */
bool lock_all_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *tmp_vcpu;
unsigned long c;
/*
* Any time a vcpu is run, vcpu_load is called which tries to grab the
* vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
* that no other VCPUs are run and fiddle with the vgic state while we
* access it.
*/
kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
if (!mutex_trylock(&tmp_vcpu->mutex)) {
unlock_vcpus(kvm, c - 1);
return false;
}
}
return true;
}
/**
* vgic_v2_attr_regs_access - allows user space to access VGIC v2 state
*

View file

@ -575,6 +575,21 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
return 0;
}
int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid)
{
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
unsigned long flags;
int ret = -1;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
if (irq->hw)
ret = irq->hwintid;
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
return ret;
}
/**
* kvm_vgic_set_owner - Set the owner of an interrupt for a VM
*

View file

@ -273,9 +273,6 @@ int vgic_init(struct kvm *kvm);
void vgic_debug_init(struct kvm *kvm);
void vgic_debug_destroy(struct kvm *kvm);
bool lock_all_vcpus(struct kvm *kvm);
void unlock_all_vcpus(struct kvm *kvm);
static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu;

View file

@ -23,6 +23,7 @@ HAS_DCPOP
HAS_DIT
HAS_E0PD
HAS_ECV
HAS_ECV_CNTPOFF
HAS_EPAN
HAS_GENERIC_AUTH
HAS_GENERIC_AUTH_ARCH_QARMA3

View file

@ -1952,6 +1952,10 @@ Sysreg CONTEXTIDR_EL2 3 4 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg
Sysreg CNTPOFF_EL2 3 4 14 0 6
Field 63:0 PhysicalOffset
EndSysreg
Sysreg CPACR_EL12 3 5 1 0 2
Fields CPACR_ELx
EndSysreg

View file

@ -21,6 +21,7 @@
#define CNTHCTL_EVNTEN (1 << 2)
#define CNTHCTL_EVNTDIR (1 << 3)
#define CNTHCTL_EVNTI (0xF << 4)
#define CNTHCTL_ECV (1 << 12)
enum arch_timer_reg {
ARCH_TIMER_REG_CTRL,

View file

@ -13,6 +13,9 @@
enum kvm_arch_timers {
TIMER_PTIMER,
TIMER_VTIMER,
NR_KVM_EL0_TIMERS,
TIMER_HVTIMER = NR_KVM_EL0_TIMERS,
TIMER_HPTIMER,
NR_KVM_TIMERS
};
@ -21,6 +24,7 @@ enum kvm_arch_timer_regs {
TIMER_REG_CVAL,
TIMER_REG_TVAL,
TIMER_REG_CTL,
TIMER_REG_VOFF,
};
struct arch_timer_offset {
@ -29,21 +33,29 @@ struct arch_timer_offset {
* structure. If NULL, assume a zero offset.
*/
u64 *vm_offset;
/*
* If set, pointer to one of the offsets in the vcpu's sysreg
* array. If NULL, assume a zero offset.
*/
u64 *vcpu_offset;
};
struct arch_timer_vm_data {
/* Offset applied to the virtual timer/counter */
u64 voffset;
/* Offset applied to the physical timer/counter */
u64 poffset;
/* The PPI for each timer, global to the VM */
u8 ppi[NR_KVM_TIMERS];
};
struct arch_timer_context {
struct kvm_vcpu *vcpu;
/* Timer IRQ */
struct kvm_irq_level irq;
/* Emulated Timer (may be unused) */
struct hrtimer hrtimer;
u64 ns_frac;
/* Offset for this counter/timer */
struct arch_timer_offset offset;
@ -54,14 +66,19 @@ struct arch_timer_context {
*/
bool loaded;
/* Output level of the timer IRQ */
struct {
bool level;
} irq;
/* Duplicated state from arch_timer.c for convenience */
u32 host_timer_irq;
u32 host_timer_irq_flags;
};
struct timer_map {
struct arch_timer_context *direct_vtimer;
struct arch_timer_context *direct_ptimer;
struct arch_timer_context *emul_vtimer;
struct arch_timer_context *emul_ptimer;
};
@ -84,6 +101,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu);
void kvm_timer_update_run(struct kvm_vcpu *vcpu);
void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
void kvm_timer_init_vm(struct kvm *kvm);
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
@ -98,15 +117,18 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu);
void kvm_timer_init_vhe(void);
bool kvm_arch_timer_get_input_level(int vintid);
#define vcpu_timer(v) (&(v)->arch.timer_cpu)
#define vcpu_get_timer(v,t) (&vcpu_timer(v)->timers[(t)])
#define vcpu_vtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_VTIMER])
#define vcpu_ptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_PTIMER])
#define vcpu_hvtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HVTIMER])
#define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER])
#define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers)
#define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data)
#define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)])
u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
enum kvm_arch_timers tmr,
enum kvm_arch_timer_regs treg);

View file

@ -380,6 +380,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
u32 vintid, struct irq_ops *ops);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid);
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);

View file

@ -1184,6 +1184,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
#define KVM_CAP_COUNTER_OFFSET 227
#ifdef KVM_CAP_IRQ_ROUTING
@ -1543,6 +1544,8 @@ struct kvm_s390_ucas_mapping {
#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter)
#define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3)
#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags)
/* Available with KVM_CAP_COUNTER_OFFSET */
#define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)

View file

@ -47,6 +47,7 @@ struct test_args {
int nr_iter;
int timer_period_ms;
int migration_freq_ms;
struct kvm_arm_counter_offset offset;
};
static struct test_args test_args = {
@ -54,6 +55,7 @@ static struct test_args test_args = {
.nr_iter = NR_TEST_ITERS_DEF,
.timer_period_ms = TIMER_TEST_PERIOD_MS_DEF,
.migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS,
.offset = { .reserved = 1 },
};
#define msecs_to_usecs(msec) ((msec) * 1000LL)
@ -121,25 +123,35 @@ static void guest_validate_irq(unsigned int intid,
uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
unsigned long xctl = 0;
unsigned int timer_irq = 0;
unsigned int accessor;
if (stage == GUEST_STAGE_VTIMER_CVAL ||
stage == GUEST_STAGE_VTIMER_TVAL) {
xctl = timer_get_ctl(VIRTUAL);
timer_set_ctl(VIRTUAL, CTL_IMASK);
xcnt = timer_get_cntct(VIRTUAL);
cval = timer_get_cval(VIRTUAL);
if (intid == IAR_SPURIOUS)
return;
switch (stage) {
case GUEST_STAGE_VTIMER_CVAL:
case GUEST_STAGE_VTIMER_TVAL:
accessor = VIRTUAL;
timer_irq = vtimer_irq;
} else if (stage == GUEST_STAGE_PTIMER_CVAL ||
stage == GUEST_STAGE_PTIMER_TVAL) {
xctl = timer_get_ctl(PHYSICAL);
timer_set_ctl(PHYSICAL, CTL_IMASK);
xcnt = timer_get_cntct(PHYSICAL);
cval = timer_get_cval(PHYSICAL);
break;
case GUEST_STAGE_PTIMER_CVAL:
case GUEST_STAGE_PTIMER_TVAL:
accessor = PHYSICAL;
timer_irq = ptimer_irq;
} else {
break;
default:
GUEST_ASSERT(0);
return;
}
xctl = timer_get_ctl(accessor);
if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE))
return;
timer_set_ctl(accessor, CTL_IMASK);
xcnt = timer_get_cntct(accessor);
cval = timer_get_cval(accessor);
xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
/* Make sure we are dealing with the correct timer IRQ */
@ -148,6 +160,8 @@ static void guest_validate_irq(unsigned int intid,
/* Basic 'timer condition met' check */
GUEST_ASSERT_3(xcnt >= cval, xcnt, cval, xcnt_diff_us);
GUEST_ASSERT_1(xctl & CTL_ISTATUS, xctl);
WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
}
static void guest_irq_handler(struct ex_regs *regs)
@ -158,8 +172,6 @@ static void guest_irq_handler(struct ex_regs *regs)
guest_validate_irq(intid, shared_data);
WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
gic_set_eoi(intid);
}
@ -372,6 +384,13 @@ static struct kvm_vm *test_vm_create(void)
vm_init_descriptor_tables(vm);
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
if (!test_args.offset.reserved) {
if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET))
vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &test_args.offset);
else
TEST_FAIL("no support for global offset\n");
}
for (i = 0; i < nr_vcpus; i++)
vcpu_init_descriptor_tables(vcpus[i]);
@ -403,6 +422,7 @@ static void test_print_help(char *name)
TIMER_TEST_PERIOD_MS_DEF);
pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n",
TIMER_TEST_MIGRATION_FREQ_MS);
pr_info("\t-o: Counter offset (in counter cycles, default: 0)\n");
pr_info("\t-h: print this help screen\n");
}
@ -410,7 +430,7 @@ static bool parse_args(int argc, char *argv[])
{
int opt;
while ((opt = getopt(argc, argv, "hn:i:p:m:")) != -1) {
while ((opt = getopt(argc, argv, "hn:i:p:m:o:")) != -1) {
switch (opt) {
case 'n':
test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg);
@ -429,6 +449,10 @@ static bool parse_args(int argc, char *argv[])
case 'm':
test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg);
break;
case 'o':
test_args.offset.counter_offset = strtol(optarg, NULL, 0);
test_args.offset.reserved = 0;
break;
case 'h':
default:
goto err;

View file

@ -651,7 +651,7 @@ int main(int ac, char **av)
* The current blessed list was primed with the output of kernel version
* v4.15 with --core-reg-fixup and then later updated with new registers.
*
* The blessed list is up to date with kernel version v5.13-rc3
* The blessed list is up to date with kernel version v6.4 (or so we hope)
*/
static __u64 base_regs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
@ -858,6 +858,9 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */
ARM64_SYS_REG(3, 3, 14, 2, 1), /* CNTP_CTL_EL0 */
ARM64_SYS_REG(3, 3, 14, 2, 2), /* CNTP_CVAL_EL0 */
ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */