KVM/arm64 fixes for 6.2, take #1

- Fix the PMCR_EL0 reset value after the PMU rework
 
 - Correctly handle S2 fault triggered by a S1 page table walk
   by not always classifying it as a write, as this breaks on
   R/O memslots
 
 - Document why we cannot exit with KVM_EXIT_MMIO when taking
   a write fault from a S1 PTW on a R/O memslot
 
 - Put the Apple M2 on the naughty step for not being able to
   correctly implement the vgic SEIS feature, just liek the M1
   before it
 
 - Reviewer updates: Alex is stepping down, replaced by Zenghui
 -----BEGIN PGP SIGNATURE-----
 
 iQJDBAABCgAtFiEEn9UcU+C1Yxj9lZw9I9DQutE9ekMFAmO27gQPHG1hekBrZXJu
 ZWwub3JnAAoJECPQ0LrRPXpDwioP/A0UE7ujSxv3dlBstBhmtzOoX64pRufX01Kr
 1oF24M1VuTVLwl3pp1nWH10SVWv5kukYZJAJ/3tDJOaMt/Q9c0exPCPc95i2p/r7
 OC9j8rZVZnjGN6sAP5zazIT67tSanyLDeCC+j4J1pw20r2tB67LKSOoozEb5How7
 CX+Oa2OiEiI34jp33v3mFQ3VxY3714QUMBUK7n+L29IFXGmQp6dfbhn2iY3uNpoU
 YYrkPzBLUC1H//oCx0qoDDCXXeOKMGuWP1At5GIDz6ZSCBVpKdVbftCC59Dk7dDz
 7BdQ5JoEc15RTZajdopOog4RV4YHP8VszaClhCA1ML0Pd2Mf4UVLlPnn7F+3yR3r
 pMgjlOAlLJwHiwggJZ0EQ0wFdx9LuGeu3OwckGE/JxeEwaMdzGAEfcFoAGZV0ExZ
 7riiKS+NmtrkuE9wJfWOrpDiseymmUbuhHq+F/HDq/SP6UdezAylkcxZRuN/ZCRc
 9XVhTcWu/UPxoaSSd/sB4l9X8Ey/cZe28+kV7eE/m2g79bZKxHd4UUOUymb/aJxj
 og10A6i0B1DOWMtKJ9hEsB6wI6Hllrqcbo8ewX1znKoKbfHZDeU/N5D4ZvTz85sf
 zyqbsSZPDxMOwBPYTqZqG65tEWWw68HIJ9cqQzKDehN1Xm1coNIWSPrUnBMpSsWJ
 qDQNmIzf
 =XBtQ
 -----END PGP SIGNATURE-----

Merge tag 'kvmarm-fixes-6.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into kvm-master

KVM/arm64 fixes for 6.2, take #1

- Fix the PMCR_EL0 reset value after the PMU rework

- Correctly handle S2 fault triggered by a S1 page table walk
  by not always classifying it as a write, as this breaks on
  R/O memslots

- Document why we cannot exit with KVM_EXIT_MMIO when taking
  a write fault from a S1 PTW on a R/O memslot

- Put the Apple M2 on the naughty step for not being able to
  correctly implement the vgic SEIS feature, just liek the M1
  before it

- Reviewer updates: Alex is stepping down, replaced by Zenghui
This commit is contained in:
Paolo Bonzini 2023-01-11 13:31:53 -05:00
commit 71d0393576
11 changed files with 69 additions and 40 deletions

View File

@ -1354,6 +1354,14 @@ the memory region are automatically reflected into the guest. For example, an
mmap() that affects the region will be made visible immediately. Another
example is madvise(MADV_DROP).
Note: On arm64, a write generated by the page-table walker (to update
the Access and Dirty flags, for example) never results in a
KVM_EXIT_MMIO exit when the slot has the KVM_MEM_READONLY flag. This
is because KVM cannot provide the data that would be written by the
page-table walker, making it impossible to emulate the access.
Instead, an abort (data abort if the cause of the page-table update
was a load or a store, instruction abort if it was an instruction
fetch) is injected in the guest.
4.36 KVM_SET_TSS_ADDR
---------------------

View File

@ -11356,9 +11356,9 @@ F: virt/kvm/*
KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
M: Marc Zyngier <maz@kernel.org>
R: James Morse <james.morse@arm.com>
R: Alexandru Elisei <alexandru.elisei@arm.com>
R: Suzuki K Poulose <suzuki.poulose@arm.com>
R: Oliver Upton <oliver.upton@linux.dev>
R: Zenghui Yu <yuzenghui@huawei.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: kvmarm@lists.linux.dev
L: kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers)

View File

@ -124,6 +124,8 @@
#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
#define APPLE_CPU_PART_M2_BLIZZARD 0x032
#define APPLE_CPU_PART_M2_AVALANCHE 0x033
#define AMPERE_CPU_PART_AMPERE1 0xAC3
@ -177,6 +179,8 @@
#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */

View File

@ -114,6 +114,15 @@
#define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C)
#define ESR_ELx_FSC_SEA_TTW0 (0x14)
#define ESR_ELx_FSC_SEA_TTW1 (0x15)
#define ESR_ELx_FSC_SEA_TTW2 (0x16)
#define ESR_ELx_FSC_SEA_TTW3 (0x17)
#define ESR_ELx_FSC_SECC (0x18)
#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
/* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV_SHIFT (24)

View File

@ -319,21 +319,6 @@
BIT(18) | \
GENMASK(16, 15))
/* For compatibility with fault code shared with 32-bit */
#define FSC_FAULT ESR_ELx_FSC_FAULT
#define FSC_ACCESS ESR_ELx_FSC_ACCESS
#define FSC_PERM ESR_ELx_FSC_PERM
#define FSC_SEA ESR_ELx_FSC_EXTABT
#define FSC_SEA_TTW0 (0x14)
#define FSC_SEA_TTW1 (0x15)
#define FSC_SEA_TTW2 (0x16)
#define FSC_SEA_TTW3 (0x17)
#define FSC_SECC (0x18)
#define FSC_SECC_TTW0 (0x1c)
#define FSC_SECC_TTW1 (0x1d)
#define FSC_SECC_TTW2 (0x1e)
#define FSC_SECC_TTW3 (0x1f)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
/*

View File

@ -349,16 +349,16 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *v
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
{
switch (kvm_vcpu_trap_get_fault(vcpu)) {
case FSC_SEA:
case FSC_SEA_TTW0:
case FSC_SEA_TTW1:
case FSC_SEA_TTW2:
case FSC_SEA_TTW3:
case FSC_SECC:
case FSC_SECC_TTW0:
case FSC_SECC_TTW1:
case FSC_SECC_TTW2:
case FSC_SECC_TTW3:
case ESR_ELx_FSC_EXTABT:
case ESR_ELx_FSC_SEA_TTW0:
case ESR_ELx_FSC_SEA_TTW1:
case ESR_ELx_FSC_SEA_TTW2:
case ESR_ELx_FSC_SEA_TTW3:
case ESR_ELx_FSC_SECC:
case ESR_ELx_FSC_SECC_TTW0:
case ESR_ELx_FSC_SECC_TTW1:
case ESR_ELx_FSC_SECC_TTW2:
case ESR_ELx_FSC_SECC_TTW3:
return true;
default:
return false;
@ -373,8 +373,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
{
if (kvm_vcpu_abt_iss1tw(vcpu))
return true;
if (kvm_vcpu_abt_iss1tw(vcpu)) {
/*
* Only a permission fault on a S1PTW should be
* considered as a write. Otherwise, page tables baked
* in a read-only memslot will result in an exception
* being delivered in the guest.
*
* The drawback is that we end-up faulting twice if the
* guest is using any of HW AF/DB: a translation fault
* to map the page containing the PT (read only at
* first), then a permission fault to allow the flags
* to be set.
*/
switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
case ESR_ELx_FSC_PERM:
return true;
default:
return false;
}
}
if (kvm_vcpu_trap_is_iabt(vcpu))
return false;

View File

@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
*/
if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
(esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
(esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
} else {

View File

@ -367,7 +367,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
bool valid;
valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
kvm_vcpu_dabt_isvalid(vcpu) &&
!kvm_vcpu_abt_issea(vcpu) &&
!kvm_vcpu_abt_iss1tw(vcpu);

View File

@ -1212,7 +1212,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
VM_BUG_ON(write_fault && exec_fault);
if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
}
@ -1277,7 +1277,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* only exception to this is when dirty logging is enabled at runtime
* and a write fault needs to collapse a block entry into a table.
*/
if (fault_status != FSC_PERM || (logging_active && write_fault)) {
if (fault_status != ESR_ELx_FSC_PERM ||
(logging_active && write_fault)) {
ret = kvm_mmu_topup_memory_cache(memcache,
kvm_mmu_cache_min_pages(kvm));
if (ret)
@ -1342,7 +1343,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* backed by a THP and thus use block mapping if possible.
*/
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
if (fault_status == ESR_ELx_FSC_PERM &&
fault_granule > PAGE_SIZE)
vma_pagesize = fault_granule;
else
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
@ -1350,7 +1352,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
&fault_ipa);
}
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new disallowed VMA */
if (kvm_vma_mte_allowed(vma)) {
sanitise_mte_tags(kvm, pfn, vma_pagesize);
@ -1376,7 +1378,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* permissions only if vma_pagesize equals fault_granule. Otherwise,
* kvm_pgtable_stage2_map() should be called to change block size.
*/
if (fault_status == FSC_PERM && vma_pagesize == fault_granule)
if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
else
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
@ -1441,7 +1443,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
if (fault_status == FSC_FAULT) {
if (fault_status == ESR_ELx_FSC_FAULT) {
/* Beyond sanitised PARange (which is the IPA limit) */
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
kvm_inject_size_fault(vcpu);
@ -1476,8 +1478,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
fault_status != FSC_ACCESS) {
if (fault_status != ESR_ELx_FSC_FAULT &&
fault_status != ESR_ELx_FSC_PERM &&
fault_status != ESR_ELx_FSC_ACCESS) {
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
kvm_vcpu_trap_get_class(vcpu),
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@ -1539,7 +1542,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
/* Userspace should not be able to register out-of-bounds IPAs */
VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
if (fault_status == FSC_ACCESS) {
if (fault_status == ESR_ELx_FSC_ACCESS) {
handle_access_fault(vcpu, fault_ipa);
ret = 1;
goto out_unlock;

View File

@ -646,7 +646,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
return;
/* Only preserve PMCR_EL0.N, and reset the rest to 0 */
pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK;
pmcr = read_sysreg(pmcr_el0) & (ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
if (!kvm_supports_32bit_el0())
pmcr |= ARMV8_PMU_PMCR_LC;

View File

@ -616,6 +616,8 @@ static const struct midr_range broken_seis[] = {
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
{},
};