mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-27 12:57:53 +00:00
Merge branch kvm-arm64/s1ptw-write-fault into kvmarm-master/fixes
* kvm-arm64/s1ptw-write-fault: : . : Fix S1PTW fault handling that was until then always taken : as a write. From the cover letter: : : `Recent developments on the EFI front have resulted in guests that : simply won't boot if the page tables are in a read-only memslot and : that you're a bit unlucky in the way S2 gets paged in... The core : issue is related to the fact that we treat a S1PTW as a write, which : is close enough to what needs to be done. Until to get to RO memslots. : : The first patch fixes this and is definitely a stable candidate. It : splits the faulting of page tables in two steps (RO translation fault, : followed by a writable permission fault -- should it even happen). : The second one documents the slightly odd behaviour of PTW writes to : RO memslot, which do not result in a KVM_MMIO exit. The last patch is : totally optional, only tangentially related, and randomly repainting : stuff (maybe that's contagious, who knows)." : : . KVM: arm64: Convert FSC_* over to ESR_ELx_FSC_* KVM: arm64: Document the behaviour of S1PTW faults on RO memslots KVM: arm64: Fix S1PTW handling on RO memslots Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
commit
afbb1b1cae
7 changed files with 61 additions and 38 deletions
|
@ -1354,6 +1354,14 @@ the memory region are automatically reflected into the guest. For example, an
|
|||
mmap() that affects the region will be made visible immediately. Another
|
||||
example is madvise(MADV_DROP).
|
||||
|
||||
Note: On arm64, a write generated by the page-table walker (to update
|
||||
the Access and Dirty flags, for example) never results in a
|
||||
KVM_EXIT_MMIO exit when the slot has the KVM_MEM_READONLY flag. This
|
||||
is because KVM cannot provide the data that would be written by the
|
||||
page-table walker, making it impossible to emulate the access.
|
||||
Instead, an abort (data abort if the cause of the page-table update
|
||||
was a load or a store, instruction abort if it was an instruction
|
||||
fetch) is injected in the guest.
|
||||
|
||||
4.36 KVM_SET_TSS_ADDR
|
||||
---------------------
|
||||
|
|
|
@ -114,6 +114,15 @@
|
|||
#define ESR_ELx_FSC_ACCESS (0x08)
|
||||
#define ESR_ELx_FSC_FAULT (0x04)
|
||||
#define ESR_ELx_FSC_PERM (0x0C)
|
||||
#define ESR_ELx_FSC_SEA_TTW0 (0x14)
|
||||
#define ESR_ELx_FSC_SEA_TTW1 (0x15)
|
||||
#define ESR_ELx_FSC_SEA_TTW2 (0x16)
|
||||
#define ESR_ELx_FSC_SEA_TTW3 (0x17)
|
||||
#define ESR_ELx_FSC_SECC (0x18)
|
||||
#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
|
||||
#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
|
||||
#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
|
||||
#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
|
||||
|
||||
/* ISS field definitions for Data Aborts */
|
||||
#define ESR_ELx_ISV_SHIFT (24)
|
||||
|
|
|
@ -319,21 +319,6 @@
|
|||
BIT(18) | \
|
||||
GENMASK(16, 15))
|
||||
|
||||
/* For compatibility with fault code shared with 32-bit */
|
||||
#define FSC_FAULT ESR_ELx_FSC_FAULT
|
||||
#define FSC_ACCESS ESR_ELx_FSC_ACCESS
|
||||
#define FSC_PERM ESR_ELx_FSC_PERM
|
||||
#define FSC_SEA ESR_ELx_FSC_EXTABT
|
||||
#define FSC_SEA_TTW0 (0x14)
|
||||
#define FSC_SEA_TTW1 (0x15)
|
||||
#define FSC_SEA_TTW2 (0x16)
|
||||
#define FSC_SEA_TTW3 (0x17)
|
||||
#define FSC_SECC (0x18)
|
||||
#define FSC_SECC_TTW0 (0x1c)
|
||||
#define FSC_SECC_TTW1 (0x1d)
|
||||
#define FSC_SECC_TTW2 (0x1e)
|
||||
#define FSC_SECC_TTW3 (0x1f)
|
||||
|
||||
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
|
||||
#define HPFAR_MASK (~UL(0xf))
|
||||
/*
|
||||
|
|
|
@ -349,16 +349,16 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *v
|
|||
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
switch (kvm_vcpu_trap_get_fault(vcpu)) {
|
||||
case FSC_SEA:
|
||||
case FSC_SEA_TTW0:
|
||||
case FSC_SEA_TTW1:
|
||||
case FSC_SEA_TTW2:
|
||||
case FSC_SEA_TTW3:
|
||||
case FSC_SECC:
|
||||
case FSC_SECC_TTW0:
|
||||
case FSC_SECC_TTW1:
|
||||
case FSC_SECC_TTW2:
|
||||
case FSC_SECC_TTW3:
|
||||
case ESR_ELx_FSC_EXTABT:
|
||||
case ESR_ELx_FSC_SEA_TTW0:
|
||||
case ESR_ELx_FSC_SEA_TTW1:
|
||||
case ESR_ELx_FSC_SEA_TTW2:
|
||||
case ESR_ELx_FSC_SEA_TTW3:
|
||||
case ESR_ELx_FSC_SECC:
|
||||
case ESR_ELx_FSC_SECC_TTW0:
|
||||
case ESR_ELx_FSC_SECC_TTW1:
|
||||
case ESR_ELx_FSC_SECC_TTW2:
|
||||
case ESR_ELx_FSC_SECC_TTW3:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -373,8 +373,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
|
|||
|
||||
static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_vcpu_abt_iss1tw(vcpu))
|
||||
return true;
|
||||
if (kvm_vcpu_abt_iss1tw(vcpu)) {
|
||||
/*
|
||||
* Only a permission fault on a S1PTW should be
|
||||
* considered as a write. Otherwise, page tables baked
|
||||
* in a read-only memslot will result in an exception
|
||||
* being delivered in the guest.
|
||||
*
|
||||
* The drawback is that we end-up faulting twice if the
|
||||
* guest is using any of HW AF/DB: a translation fault
|
||||
* to map the page containing the PT (read only at
|
||||
* first), then a permission fault to allow the flags
|
||||
* to be set.
|
||||
*/
|
||||
switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
|
||||
case ESR_ELx_FSC_PERM:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (kvm_vcpu_trap_is_iabt(vcpu))
|
||||
return false;
|
||||
|
|
|
@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
|
|||
*/
|
||||
if (!(esr & ESR_ELx_S1PTW) &&
|
||||
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
|
||||
(esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
|
||||
(esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
|
||||
if (!__translate_far_to_hpfar(far, &hpfar))
|
||||
return false;
|
||||
} else {
|
||||
|
|
|
@ -367,7 +367,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
|||
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
|
||||
bool valid;
|
||||
|
||||
valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
|
||||
valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
|
||||
kvm_vcpu_dabt_isvalid(vcpu) &&
|
||||
!kvm_vcpu_abt_issea(vcpu) &&
|
||||
!kvm_vcpu_abt_iss1tw(vcpu);
|
||||
|
|
|
@ -1212,7 +1212,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
|
||||
VM_BUG_ON(write_fault && exec_fault);
|
||||
|
||||
if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
|
||||
if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
|
||||
kvm_err("Unexpected L2 read permission error\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
@ -1277,7 +1277,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
* only exception to this is when dirty logging is enabled at runtime
|
||||
* and a write fault needs to collapse a block entry into a table.
|
||||
*/
|
||||
if (fault_status != FSC_PERM || (logging_active && write_fault)) {
|
||||
if (fault_status != ESR_ELx_FSC_PERM ||
|
||||
(logging_active && write_fault)) {
|
||||
ret = kvm_mmu_topup_memory_cache(memcache,
|
||||
kvm_mmu_cache_min_pages(kvm));
|
||||
if (ret)
|
||||
|
@ -1342,7 +1343,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
* backed by a THP and thus use block mapping if possible.
|
||||
*/
|
||||
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
|
||||
if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
|
||||
if (fault_status == ESR_ELx_FSC_PERM &&
|
||||
fault_granule > PAGE_SIZE)
|
||||
vma_pagesize = fault_granule;
|
||||
else
|
||||
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
|
||||
|
@ -1350,7 +1352,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
&fault_ipa);
|
||||
}
|
||||
|
||||
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
|
||||
if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
|
||||
/* Check the VMM hasn't introduced a new disallowed VMA */
|
||||
if (kvm_vma_mte_allowed(vma)) {
|
||||
sanitise_mte_tags(kvm, pfn, vma_pagesize);
|
||||
|
@ -1376,7 +1378,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
* permissions only if vma_pagesize equals fault_granule. Otherwise,
|
||||
* kvm_pgtable_stage2_map() should be called to change block size.
|
||||
*/
|
||||
if (fault_status == FSC_PERM && vma_pagesize == fault_granule)
|
||||
if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
|
||||
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
|
||||
else
|
||||
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
|
||||
|
@ -1441,7 +1443,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
|||
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
|
||||
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
|
||||
|
||||
if (fault_status == FSC_FAULT) {
|
||||
if (fault_status == ESR_ELx_FSC_FAULT) {
|
||||
/* Beyond sanitised PARange (which is the IPA limit) */
|
||||
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
|
||||
kvm_inject_size_fault(vcpu);
|
||||
|
@ -1476,8 +1478,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
|||
kvm_vcpu_get_hfar(vcpu), fault_ipa);
|
||||
|
||||
/* Check the stage-2 fault is trans. fault or write fault */
|
||||
if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
|
||||
fault_status != FSC_ACCESS) {
|
||||
if (fault_status != ESR_ELx_FSC_FAULT &&
|
||||
fault_status != ESR_ELx_FSC_PERM &&
|
||||
fault_status != ESR_ELx_FSC_ACCESS) {
|
||||
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
|
||||
kvm_vcpu_trap_get_class(vcpu),
|
||||
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
|
||||
|
@ -1539,7 +1542,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
|||
/* Userspace should not be able to register out-of-bounds IPAs */
|
||||
VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
|
||||
|
||||
if (fault_status == FSC_ACCESS) {
|
||||
if (fault_status == ESR_ELx_FSC_ACCESS) {
|
||||
handle_access_fault(vcpu, fault_ipa);
|
||||
ret = 1;
|
||||
goto out_unlock;
|
||||
|
|
Loading…
Reference in a new issue