KVM: arm64: nv: Add trap forwarding infrastructure

A significant part of what a NV hypervisor needs to do is to decide
whether a trap from a L2+ guest has to be forwarded to a L1 guest
or handled locally. This is done by checking for the trap bits that
the guest hypervisor has set and acting accordingly, as described by
the architecture.

A previous approach was to sprinkle a bunch of checks in all the
system register accessors, but this is pretty error prone and doesn't
help getting an overview of what is happening.

Instead, implement a set of global tables that describe a trap bit,
combinations of trap bits, behaviours on trap, and what bits must
be evaluated on a system register trap.

Although this is painful to describe, this allows to specify each
and every control bit in a static manner. To make it efficient,
the table is inserted in an xarray that is global to the system,
and checked each time we trap a system register while running
a L2 guest.

Add the basic infrastructure for now, while additional patches will
implement configuration registers.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Jing Zhang <jingzhangos@google.com>
Reviewed-by: Miguel Luis <miguel.luis@oracle.com>
Link: https://lore.kernel.org/r/20230815183903.2735724-15-maz@kernel.org
This commit is contained in:
Marc Zyngier 2023-08-15 19:38:48 +01:00
parent e930694e61
commit e58ec47bf6
5 changed files with 317 additions and 0 deletions

View File

@ -988,6 +988,7 @@ int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
int __init kvm_sys_reg_table_init(void);
int __init populate_nv_trap_config(void);
bool lock_all_vcpus(struct kvm *kvm);
void unlock_all_vcpus(struct kvm *kvm);

View File

@ -11,6 +11,8 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features));
}
extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
struct sys_reg_params;
struct sys_reg_desc;

View File

@ -14,6 +14,288 @@
#include "trace.h"
enum trap_behaviour {
BEHAVE_HANDLE_LOCALLY = 0,
BEHAVE_FORWARD_READ = BIT(0),
BEHAVE_FORWARD_WRITE = BIT(1),
BEHAVE_FORWARD_ANY = BEHAVE_FORWARD_READ | BEHAVE_FORWARD_WRITE,
};
struct trap_bits {
const enum vcpu_sysreg index;
const enum trap_behaviour behaviour;
const u64 value;
const u64 mask;
};
/* Coarse Grained Trap definitions */
enum cgt_group_id {
/* Indicates no coarse trap control */
__RESERVED__,
/*
* The first batch of IDs denote coarse trapping that are used
* on their own instead of being part of a combination of
* trap controls.
*/
/*
* Anything after this point is a combination of coarse trap
* controls, which must all be evaluated to decide what to do.
*/
__MULTIPLE_CONTROL_BITS__,
/*
* Anything after this point requires a callback evaluating a
* complex trap condition. Hopefully we'll never need this...
*/
__COMPLEX_CONDITIONS__,
/* Must be last */
__NR_CGT_GROUP_IDS__
};
static const struct trap_bits coarse_trap_bits[] = {
};
#define MCB(id, ...) \
[id - __MULTIPLE_CONTROL_BITS__] = \
(const enum cgt_group_id[]){ \
__VA_ARGS__, __RESERVED__ \
}
static const enum cgt_group_id *coarse_control_combo[] = {
};
typedef enum trap_behaviour (*complex_condition_check)(struct kvm_vcpu *);
#define CCC(id, fn) \
[id - __COMPLEX_CONDITIONS__] = fn
static const complex_condition_check ccc[] = {
};
/*
* Bit assignment for the trap controls. We use a 64bit word with the
* following layout for each trapped sysreg:
*
* [9:0] enum cgt_group_id (10 bits)
* [62:10] Unused (53 bits)
* [63] RES0 - Must be zero, as lost on insertion in the xarray
*/
#define TC_CGT_BITS 10
union trap_config {
u64 val;
struct {
unsigned long cgt:TC_CGT_BITS; /* Coarse Grained Trap id */
unsigned long unused:53; /* Unused, should be zero */
unsigned long mbz:1; /* Must Be Zero */
};
};
struct encoding_to_trap_config {
const u32 encoding;
const u32 end;
const union trap_config tc;
const unsigned int line;
};
#define SR_RANGE_TRAP(sr_start, sr_end, trap_id) \
{ \
.encoding = sr_start, \
.end = sr_end, \
.tc = { \
.cgt = trap_id, \
}, \
.line = __LINE__, \
}
#define SR_TRAP(sr, trap_id) SR_RANGE_TRAP(sr, sr, trap_id)
/*
* Map encoding to trap bits for exception reported with EC=0x18.
* These must only be evaluated when running a nested hypervisor, but
* that the current context is not a hypervisor context. When the
* trapped access matches one of the trap controls, the exception is
* re-injected in the nested hypervisor.
*/
static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
};
static DEFINE_XARRAY(sr_forward_xa);
static union trap_config get_trap_config(u32 sysreg)
{
return (union trap_config) {
.val = xa_to_value(xa_load(&sr_forward_xa, sysreg)),
};
}
static __init void print_nv_trap_error(const struct encoding_to_trap_config *tc,
const char *type, int err)
{
kvm_err("%s line %d encoding range "
"(%d, %d, %d, %d, %d) - (%d, %d, %d, %d, %d) (err=%d)\n",
type, tc->line,
sys_reg_Op0(tc->encoding), sys_reg_Op1(tc->encoding),
sys_reg_CRn(tc->encoding), sys_reg_CRm(tc->encoding),
sys_reg_Op2(tc->encoding),
sys_reg_Op0(tc->end), sys_reg_Op1(tc->end),
sys_reg_CRn(tc->end), sys_reg_CRm(tc->end),
sys_reg_Op2(tc->end),
err);
}
int __init populate_nv_trap_config(void)
{
int ret = 0;
BUILD_BUG_ON(sizeof(union trap_config) != sizeof(void *));
BUILD_BUG_ON(__NR_CGT_GROUP_IDS__ > BIT(TC_CGT_BITS));
for (int i = 0; i < ARRAY_SIZE(encoding_to_cgt); i++) {
const struct encoding_to_trap_config *cgt = &encoding_to_cgt[i];
void *prev;
if (cgt->tc.val & BIT(63)) {
kvm_err("CGT[%d] has MBZ bit set\n", i);
ret = -EINVAL;
}
if (cgt->encoding != cgt->end) {
prev = xa_store_range(&sr_forward_xa,
cgt->encoding, cgt->end,
xa_mk_value(cgt->tc.val),
GFP_KERNEL);
} else {
prev = xa_store(&sr_forward_xa, cgt->encoding,
xa_mk_value(cgt->tc.val), GFP_KERNEL);
if (prev && !xa_is_err(prev)) {
ret = -EINVAL;
print_nv_trap_error(cgt, "Duplicate CGT", ret);
}
}
if (xa_is_err(prev)) {
ret = xa_err(prev);
print_nv_trap_error(cgt, "Failed CGT insertion", ret);
}
}
kvm_info("nv: %ld coarse grained trap handlers\n",
ARRAY_SIZE(encoding_to_cgt));
for (int id = __MULTIPLE_CONTROL_BITS__; id < __COMPLEX_CONDITIONS__; id++) {
const enum cgt_group_id *cgids;
cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__];
for (int i = 0; cgids[i] != __RESERVED__; i++) {
if (cgids[i] >= __MULTIPLE_CONTROL_BITS__) {
kvm_err("Recursive MCB %d/%d\n", id, cgids[i]);
ret = -EINVAL;
}
}
}
if (ret)
xa_destroy(&sr_forward_xa);
return ret;
}
static enum trap_behaviour get_behaviour(struct kvm_vcpu *vcpu,
const struct trap_bits *tb)
{
enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY;
u64 val;
val = __vcpu_sys_reg(vcpu, tb->index);
if ((val & tb->mask) == tb->value)
b |= tb->behaviour;
return b;
}
static enum trap_behaviour __compute_trap_behaviour(struct kvm_vcpu *vcpu,
const enum cgt_group_id id,
enum trap_behaviour b)
{
switch (id) {
const enum cgt_group_id *cgids;
case __RESERVED__ ... __MULTIPLE_CONTROL_BITS__ - 1:
if (likely(id != __RESERVED__))
b |= get_behaviour(vcpu, &coarse_trap_bits[id]);
break;
case __MULTIPLE_CONTROL_BITS__ ... __COMPLEX_CONDITIONS__ - 1:
/* Yes, this is recursive. Don't do anything stupid. */
cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__];
for (int i = 0; cgids[i] != __RESERVED__; i++)
b |= __compute_trap_behaviour(vcpu, cgids[i], b);
break;
default:
if (ARRAY_SIZE(ccc))
b |= ccc[id - __COMPLEX_CONDITIONS__](vcpu);
break;
}
return b;
}
static enum trap_behaviour compute_trap_behaviour(struct kvm_vcpu *vcpu,
const union trap_config tc)
{
enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY;
return __compute_trap_behaviour(vcpu, tc.cgt, b);
}
bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
{
union trap_config tc;
enum trap_behaviour b;
bool is_read;
u32 sysreg;
u64 esr;
if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
return false;
esr = kvm_vcpu_get_esr(vcpu);
sysreg = esr_sys64_to_sysreg(esr);
is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
tc = get_trap_config(sysreg);
/*
* A value of 0 for the whole entry means that we know nothing
* for this sysreg, and that it cannot be re-injected into the
* nested hypervisor. In this situation, let's cut it short.
*
* Note that ultimately, we could also make use of the xarray
* to store the index of the sysreg in the local descriptor
* array, avoiding another search... Hint, hint...
*/
if (!tc.val)
return false;
b = compute_trap_behaviour(vcpu, tc);
if (((b & BEHAVE_FORWARD_READ) && is_read) ||
((b & BEHAVE_FORWARD_WRITE) && !is_read))
goto inject;
return false;
inject:
trace_kvm_forward_sysreg_trap(vcpu, sysreg, is_read);
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
return true;
}
static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
{
u64 mode = spsr & PSR_MODE_MASK;

View File

@ -3177,6 +3177,9 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
trace_kvm_handle_sys_reg(esr);
if (__check_nv_sr_forward(vcpu))
return 1;
params = esr_sys64_to_params(esr);
params.regval = vcpu_get_reg(vcpu, Rt);
@ -3594,5 +3597,8 @@ int __init kvm_sys_reg_table_init(void)
if (!first_idreg)
return -EINVAL;
if (kvm_get_mode() == KVM_MODE_NV)
return populate_nv_trap_config();
return 0;
}

View File

@ -364,6 +364,32 @@ TRACE_EVENT(kvm_inject_nested_exception,
__entry->hcr_el2)
);
TRACE_EVENT(kvm_forward_sysreg_trap,
TP_PROTO(struct kvm_vcpu *vcpu, u32 sysreg, bool is_read),
TP_ARGS(vcpu, sysreg, is_read),
TP_STRUCT__entry(
__field(u64, pc)
__field(u32, sysreg)
__field(bool, is_read)
),
TP_fast_assign(
__entry->pc = *vcpu_pc(vcpu);
__entry->sysreg = sysreg;
__entry->is_read = is_read;
),
TP_printk("%llx %c (%d,%d,%d,%d,%d)",
__entry->pc,
__entry->is_read ? 'R' : 'W',
sys_reg_Op0(__entry->sysreg),
sys_reg_Op1(__entry->sysreg),
sys_reg_CRn(__entry->sysreg),
sys_reg_CRm(__entry->sysreg),
sys_reg_Op2(__entry->sysreg))
);
#endif /* _TRACE_ARM_ARM64_KVM_H */
#undef TRACE_INCLUDE_PATH