KVM: x86: KVM_CAP_SYNC_REGS

This commit implements an enhanced x86 version of S390
KVM_CAP_SYNC_REGS functionality. KVM_CAP_SYNC_REGS "allow[s]
userspace to access certain guest registers without having
to call SET/GET_*REGS”. This reduces ioctl overhead which
is particularly important when userspace is making synchronous
guest state modifications (e.g. when emulating and/or intercepting
instructions).

Originally implemented upstream for the S390, the x86 differences
follow:
- userspace can select the register sets to be synchronized with kvm_run
using bit-flags in the kvm_valid_registers and kvm_dirty_registers
fields.
- vcpu_events is available in addition to the regs and sregs register
sets.

Signed-off-by: Ken Hofsass <hofsass@google.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
[Removed wrapper around check for reserved kvm_valid_regs. - Radim]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
This commit is contained in:
Ken Hofsass 2018-01-31 16:03:36 -08:00 committed by Radim Krčmář
parent 7b7e39522a
commit 01643c51bf
3 changed files with 145 additions and 16 deletions

View File

@ -4110,6 +4110,46 @@ Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be
accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from
the guest.
6.74 KVM_CAP_SYNC_REGS
Architectures: s390, x86
Target: s390: always enabled, x86: vcpu
Parameters: none
Returns: x86: KVM_CHECK_EXTENSION returns a bit-array indicating which register
sets are supported (bitfields defined in arch/x86/include/uapi/asm/kvm.h).
As described above in the kvm_sync_regs struct info in section 5 (kvm_run):
KVM_CAP_SYNC_REGS "allow[s] userspace to access certain guest registers
without having to call SET/GET_*REGS". This reduces overhead by eliminating
repeated ioctl calls for setting and/or getting register values. This is
particularly important when userspace is making synchronous guest state
modifications, e.g. when emulating and/or intercepting instructions in
userspace.
For s390 specifics, please refer to the source code.
For x86:
- the register sets to be copied out to kvm_run are selectable
by userspace (rather that all sets being copied out for every exit).
- vcpu_events are available in addition to regs and sregs.
For x86, the 'kvm_valid_regs' field of struct kvm_run is overloaded to
function as an input bit-array field set by userspace to indicate the
specific register sets to be copied out on the next exit.
To indicate when userspace has modified values that should be copied into
the vCPU, the all architecture bitarray field, 'kvm_dirty_regs' must be set.
This is done using the same bitflags as for the 'kvm_valid_regs' field.
If the dirty bit is not set, then the register set values will not be copied
into the vCPU even if they've been modified.
Unused bitfields in the bitarrays must be set to zero.
struct kvm_sync_regs {
struct kvm_regs regs;
struct kvm_sregs sregs;
struct kvm_vcpu_events events;
};
7. Capabilities that can be enabled on VMs
------------------------------------------

View File

@ -354,8 +354,25 @@ struct kvm_xcrs {
__u64 padding[16];
};
/* definition of registers in kvm_run */
#define KVM_SYNC_X86_REGS (1UL << 0)
#define KVM_SYNC_X86_SREGS (1UL << 1)
#define KVM_SYNC_X86_EVENTS (1UL << 2)
#define KVM_SYNC_X86_VALID_FIELDS \
(KVM_SYNC_X86_REGS| \
KVM_SYNC_X86_SREGS| \
KVM_SYNC_X86_EVENTS)
/* kvm_sync_regs struct included by kvm_run struct */
struct kvm_sync_regs {
/* Members of this structure are potentially malicious.
* Care must be taken by code reading, esp. interpreting,
* data fields from them inside KVM to prevent TOCTOU and
* double-fetch types of vulnerabilities.
*/
struct kvm_regs regs;
struct kvm_sregs sregs;
struct kvm_vcpu_events events;
};
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)

View File

@ -102,6 +102,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
static void enter_smm(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
static void store_regs(struct kvm_vcpu *vcpu);
static int sync_regs(struct kvm_vcpu *vcpu);
struct kvm_x86_ops *kvm_x86_ops __read_mostly;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
@ -2829,6 +2831,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_GET_MSR_FEATURES:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
r = KVM_SYNC_X86_VALID_FIELDS;
break;
case KVM_CAP_ADJUST_CLOCK:
r = KVM_CLOCK_TSC_STABLE;
break;
@ -7510,7 +7515,6 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
return 0;
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
@ -7536,6 +7540,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
goto out;
}
if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
r = -EINVAL;
goto out;
}
if (vcpu->run->kvm_dirty_regs) {
r = sync_regs(vcpu);
if (r != 0)
goto out;
}
/* re-sync apic's tpr */
if (!lapic_in_kernel(vcpu)) {
if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
@ -7560,6 +7575,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
out:
kvm_put_guest_fpu(vcpu);
if (vcpu->run->kvm_valid_regs)
store_regs(vcpu);
post_kvm_run_save(vcpu);
kvm_sigset_deactivate(vcpu);
@ -7567,10 +7584,8 @@ out:
return r;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
vcpu_load(vcpu);
if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
/*
* We are here if userspace calls get_regs() in the middle of
@ -7603,15 +7618,18 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->rip = kvm_rip_read(vcpu);
regs->rflags = kvm_get_rflags(vcpu);
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
vcpu_load(vcpu);
__get_regs(vcpu, regs);
vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
vcpu_load(vcpu);
vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
@ -7640,7 +7658,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.exception.pending = false;
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
vcpu_load(vcpu);
__set_regs(vcpu, regs);
vcpu_put(vcpu);
return 0;
}
@ -7655,13 +7678,10 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
}
EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct desc_ptr dt;
vcpu_load(vcpu);
kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@ -7692,7 +7712,13 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
set_bit(vcpu->arch.interrupt.nr,
(unsigned long *)sregs->interrupt_bitmap);
}
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
vcpu_load(vcpu);
__get_sregs(vcpu, sregs);
vcpu_put(vcpu);
return 0;
}
@ -7787,8 +7813,7 @@ int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return 0;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct msr_data apic_base_msr;
int mmu_reset_needed = 0;
@ -7796,8 +7821,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct desc_ptr dt;
int ret = -EINVAL;
vcpu_load(vcpu);
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
(sregs->cr4 & X86_CR4_OSXSAVE))
goto out;
@ -7876,6 +7899,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
ret = 0;
out:
return ret;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
int ret;
vcpu_load(vcpu);
ret = __set_sregs(vcpu, sregs);
vcpu_put(vcpu);
return ret;
}
@ -8002,6 +8035,45 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return 0;
}
static void store_regs(struct kvm_vcpu *vcpu)
{
BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
__get_regs(vcpu, &vcpu->run->s.regs.regs);
if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
__get_sregs(vcpu, &vcpu->run->s.regs.sregs);
if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
kvm_vcpu_ioctl_x86_get_vcpu_events(
vcpu, &vcpu->run->s.regs.events);
}
static int sync_regs(struct kvm_vcpu *vcpu)
{
if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
return -EINVAL;
if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
__set_regs(vcpu, &vcpu->run->s.regs.regs);
vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
}
if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
return -EINVAL;
vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
}
if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
if (kvm_vcpu_ioctl_x86_set_vcpu_events(
vcpu, &vcpu->run->s.regs.events))
return -EINVAL;
vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
}
return 0;
}
static void fx_init(struct kvm_vcpu *vcpu)
{
fpstate_init(&vcpu->arch.guest_fpu.state);