- Fix compilation without RISCV_ISA_ZICBOM
 
 - Fix kvm_riscv_vcpu_timer_pending() for Sstc
 
 ARM:
 
 - Fix a bug preventing restoring an ITS containing mappings
   for very large and very sparse device topology
 
 - Work around a relocation handling error when compiling
   the nVHE object with profile optimisation
 
 - Fix for stage-2 invalidation holding the VM MMU lock
   for too long by limiting the walk to the largest
   block mapping size
 
 - Enable stack protection and branch profiling for VHE
 
 - Two selftest fixes
 
 x86:
 
 - add compat implementation for KVM_X86_SET_MSR_FILTER ioctl
 
 selftests:
 
 - synchronize includes between include/uapi and tools/include/uapi
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmNT2hQUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroNeVwgAkGGk2F2SF5s+MQUQ9tDPxyuRbddN
 NPo/YRTKszKc8rK6d1TCbQi56I3e8Oa7kNkMF7CiBlAekB7B1r1ySg5qc+3lQebx
 moME30Ru4nmfqPcZ7971MA8Me7zZxGzvIviL5KIwm1ownGifdTsPZ9jCvu4EPdzv
 3dd10guH3GeBIq8QeQGEqNP4fticziwhE+IA3HZstcWsq96800Le7WNAgklfzdC+
 YTB81QU6whHv6N/7YvRcTbp+tER3VIKdFMmRD1FwC90flhXMbxTymESFXULfHCM2
 x/arGz2E31/QGgJo0/Yy2VPenr5ZMU57dL4SYWR02mwSfJQnJWb1cRdWnw==
 =rxQ7
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "RISC-V:

   - Fix compilation without RISCV_ISA_ZICBOM

   - Fix kvm_riscv_vcpu_timer_pending() for Sstc

  ARM:

   - Fix a bug preventing restoring an ITS containing mappings for very
     large and very sparse device topology

   - Work around a relocation handling error when compiling the nVHE
     object with profile optimisation

   - Fix for stage-2 invalidation holding the VM MMU lock for too long
     by limiting the walk to the largest block mapping size

   - Enable stack protection and branch profiling for VHE

   - Two selftest fixes

  x86:

   - add compat implementation for KVM_X86_SET_MSR_FILTER ioctl

  selftests:

   - synchronize includes between include/uapi and tools/include/uapi"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  tools: include: sync include/api/linux/kvm.h
  KVM: x86: Add compat handler for KVM_X86_SET_MSR_FILTER
  KVM: x86: Copy filter arg outside kvm_vm_ioctl_set_msr_filter()
  kvm: Add support for arch compat vm ioctls
  RISC-V: KVM: Fix kvm_riscv_vcpu_timer_pending() for Sstc
  RISC-V: Fix compilation without RISCV_ISA_ZICBOM
  KVM: arm64: vgic: Fix exit condition in scan_its_table()
  KVM: arm64: nvhe: Fix build with profile optimization
  KVM: selftests: Fix number of pages for memory slot in memslot_modification_stress_test
  KVM: arm64: selftests: Fix multiple versions of GIC creation
  KVM: arm64: Enable stack protection and branch profiling for VHE
  KVM: arm64: Limit stage2_apply_range() batch size to largest block
  KVM: arm64: Work out supported block level at compile time
This commit is contained in:
Linus Torvalds 2022-10-23 15:00:43 -07:00
commit 05b4ebd2c7
18 changed files with 185 additions and 104 deletions

View File

@ -13,6 +13,18 @@
#define KVM_PGTABLE_MAX_LEVELS 4U
/*
* The largest supported block sizes for KVM (no 52-bit PA support):
* - 4K (level 1): 1GB
* - 16K (level 2): 32MB
* - 64K (level 2): 512MB
*/
#ifdef CONFIG_ARM64_4K_PAGES
#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1U
#else
#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U
#endif
static inline u64 kvm_get_parange(u64 mmfr0)
{
u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
@ -58,11 +70,7 @@ static inline u64 kvm_granule_size(u32 level)
static inline bool kvm_level_supports_block_mapping(u32 level)
{
/*
* Reject invalid block mappings and don't bother with 4TB mappings for
* 52-bit PAs.
*/
return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
}
/**

View File

@ -10,13 +10,6 @@
#include <linux/pgtable.h>
/*
* PGDIR_SHIFT determines the size a top-level page table entry can map
* and depends on the number of levels in the page table. Compute the
* PGDIR_SHIFT for a given number of levels.
*/
#define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls))
/*
* The hardware supports concatenation of up to 16 tables at stage2 entry
* level and we use the feature whenever possible, which means we resolve 4
@ -30,11 +23,6 @@
#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr)
/* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the VM */
#define stage2_pgdir_shift(kvm) pt_levels_pgdir_shift(kvm_stage2_levels(kvm))
#define stage2_pgdir_size(kvm) (1ULL << stage2_pgdir_shift(kvm))
#define stage2_pgdir_mask(kvm) ~(stage2_pgdir_size(kvm) - 1)
/*
* kvm_mmmu_cache_min_pages() is the number of pages required to install
* a stage-2 translation. We pre-allocate the entry level page table at
@ -42,12 +30,4 @@
*/
#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1)
static inline phys_addr_t
stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
phys_addr_t boundary = (addr + stage2_pgdir_size(kvm)) & stage2_pgdir_mask(kvm);
return (boundary - 1 < end - 1) ? boundary : end;
}
#endif /* __ARM64_S2_PGTABLE_H_ */

View File

@ -5,9 +5,6 @@
incdir := $(srctree)/$(src)/include
subdir-asflags-y := -I$(incdir)
subdir-ccflags-y := -I$(incdir) \
-fno-stack-protector \
-DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN)
subdir-ccflags-y := -I$(incdir)
obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o

View File

@ -10,6 +10,9 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
# will explode instantly (Words of Marc Zyngier). So introduce a generic flag
# __DISABLE_TRACE_MMIO__ to disable MMIO tracing for nVHE KVM.
ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS -D__DISABLE_TRACE_MMIO__
ccflags-y += -fno-stack-protector \
-DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN)
hostprogs := gen-hyprel
HOST_EXTRACFLAGS += -I$(objtree)/include
@ -89,6 +92,10 @@ quiet_cmd_hypcopy = HYPCOPY $@
# Remove ftrace, Shadow Call Stack, and CFI CFLAGS.
# This is equivalent to the 'notrace', '__noscs', and '__nocfi' annotations.
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI), $(KBUILD_CFLAGS))
# Starting from 13.0.0 llvm emits SHT_REL section '.llvm.call-graph-profile'
# when profile optimization is applied. gen-hyprel does not support SHT_REL and
# causes a build failure. Remove profile optimization flags.
KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS))
# KVM nVHE code is run at a different exception code with a different map, so
# compiler instrumentation that inserts callbacks or checks into the code may

View File

@ -31,6 +31,13 @@ static phys_addr_t hyp_idmap_vector;
static unsigned long io_map_base;
static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end)
{
phys_addr_t size = kvm_granule_size(KVM_PGTABLE_MIN_BLOCK_LEVEL);
phys_addr_t boundary = ALIGN_DOWN(addr + size, size);
return (boundary - 1 < end - 1) ? boundary : end;
}
/*
* Release kvm_mmu_lock periodically if the memory region is large. Otherwise,
@ -52,7 +59,7 @@ static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr,
if (!pgt)
return -EINVAL;
next = stage2_pgd_addr_end(kvm, addr, end);
next = stage2_range_addr_end(addr, end);
ret = fn(pgt, addr, next - addr);
if (ret)
break;

View File

@ -2149,7 +2149,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
memset(entry, 0, esz);
while (len > 0) {
while (true) {
int next_offset;
size_t byte_offset;
@ -2162,6 +2162,9 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
return next_offset;
byte_offset = next_offset * esz;
if (byte_offset >= len)
break;
id += next_offset;
gpa += byte_offset;
len -= byte_offset;

View File

@ -42,16 +42,8 @@ void flush_icache_mm(struct mm_struct *mm, bool local);
#endif /* CONFIG_SMP */
/*
* The T-Head CMO errata internally probe the CBOM block size, but otherwise
* don't depend on Zicbom.
*/
extern unsigned int riscv_cbom_block_size;
#ifdef CONFIG_RISCV_ISA_ZICBOM
void riscv_init_cbom_blocksize(void);
#else
static inline void riscv_init_cbom_blocksize(void) { }
#endif
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
void riscv_noncoherent_supported(void);

View File

@ -45,6 +45,7 @@ int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu);
int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu);
void kvm_riscv_guest_timer_init(struct kvm *kvm);
void kvm_riscv_vcpu_timer_sync(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu);
bool kvm_riscv_vcpu_timer_pending(struct kvm_vcpu *vcpu);

View File

@ -708,6 +708,9 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
clear_bit(IRQ_VS_SOFT, &v->irqs_pending);
}
}
/* Sync-up timer CSRs */
kvm_riscv_vcpu_timer_sync(vcpu);
}
int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)

View File

@ -320,6 +320,21 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
kvm_riscv_vcpu_timer_unblocking(vcpu);
}
void kvm_riscv_vcpu_timer_sync(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_timer *t = &vcpu->arch.timer;
if (!t->sstc_enabled)
return;
#if defined(CONFIG_32BIT)
t->next_cycles = csr_read(CSR_VSTIMECMP);
t->next_cycles |= (u64)csr_read(CSR_VSTIMECMPH) << 32;
#else
t->next_cycles = csr_read(CSR_VSTIMECMP);
#endif
}
void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_timer *t = &vcpu->arch.timer;
@ -327,13 +342,11 @@ void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu)
if (!t->sstc_enabled)
return;
t = &vcpu->arch.timer;
#if defined(CONFIG_32BIT)
t->next_cycles = csr_read(CSR_VSTIMECMP);
t->next_cycles |= (u64)csr_read(CSR_VSTIMECMPH) << 32;
#else
t->next_cycles = csr_read(CSR_VSTIMECMP);
#endif
/*
* The vstimecmp CSRs are saved by kvm_riscv_vcpu_timer_sync()
* upon every VM exit so no need to save here.
*/
/* timer should be enabled for the remaining operations */
if (unlikely(!t->init_done))
return;

View File

@ -3,6 +3,7 @@
* Copyright (C) 2017 SiFive
*/
#include <linux/of.h>
#include <asm/cacheflush.h>
#ifdef CONFIG_SMP
@ -86,3 +87,40 @@ void flush_icache_pte(pte_t pte)
flush_icache_all();
}
#endif /* CONFIG_MMU */
unsigned int riscv_cbom_block_size;
EXPORT_SYMBOL_GPL(riscv_cbom_block_size);
void riscv_init_cbom_blocksize(void)
{
struct device_node *node;
unsigned long cbom_hartid;
u32 val, probed_block_size;
int ret;
probed_block_size = 0;
for_each_of_cpu_node(node) {
unsigned long hartid;
ret = riscv_of_processor_hartid(node, &hartid);
if (ret)
continue;
/* set block-size for cbom extension if available */
ret = of_property_read_u32(node, "riscv,cbom-block-size", &val);
if (ret)
continue;
if (!probed_block_size) {
probed_block_size = val;
cbom_hartid = hartid;
} else {
if (probed_block_size != val)
pr_warn("cbom-block-size mismatched between harts %lu and %lu\n",
cbom_hartid, hartid);
}
}
if (probed_block_size)
riscv_cbom_block_size = probed_block_size;
}

View File

@ -8,13 +8,8 @@
#include <linux/dma-direct.h>
#include <linux/dma-map-ops.h>
#include <linux/mm.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <asm/cacheflush.h>
unsigned int riscv_cbom_block_size;
EXPORT_SYMBOL_GPL(riscv_cbom_block_size);
static bool noncoherent_supported;
void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
@ -77,42 +72,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
dev->dma_coherent = coherent;
}
#ifdef CONFIG_RISCV_ISA_ZICBOM
void riscv_init_cbom_blocksize(void)
{
struct device_node *node;
unsigned long cbom_hartid;
u32 val, probed_block_size;
int ret;
probed_block_size = 0;
for_each_of_cpu_node(node) {
unsigned long hartid;
ret = riscv_of_processor_hartid(node, &hartid);
if (ret)
continue;
/* set block-size for cbom extension if available */
ret = of_property_read_u32(node, "riscv,cbom-block-size", &val);
if (ret)
continue;
if (!probed_block_size) {
probed_block_size = val;
cbom_hartid = hartid;
} else {
if (probed_block_size != val)
pr_warn("cbom-block-size mismatched between harts %lu and %lu\n",
cbom_hartid, hartid);
}
}
if (probed_block_size)
riscv_cbom_block_size = probed_block_size;
}
#endif
void riscv_noncoherent_supported(void)
{
WARN(!riscv_cbom_block_size,

View File

@ -6442,26 +6442,22 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
return 0;
}
static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
struct kvm_msr_filter *filter)
{
struct kvm_msr_filter __user *user_msr_filter = argp;
struct kvm_x86_msr_filter *new_filter, *old_filter;
struct kvm_msr_filter filter;
bool default_allow;
bool empty = true;
int r = 0;
u32 i;
if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
return -EFAULT;
if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
if (filter->flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
return -EINVAL;
for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
empty &= !filter.ranges[i].nmsrs;
for (i = 0; i < ARRAY_SIZE(filter->ranges); i++)
empty &= !filter->ranges[i].nmsrs;
default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY);
default_allow = !(filter->flags & KVM_MSR_FILTER_DEFAULT_DENY);
if (empty && !default_allow)
return -EINVAL;
@ -6469,8 +6465,8 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
if (!new_filter)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) {
r = kvm_add_msr_filter(new_filter, &filter->ranges[i]);
if (r) {
kvm_free_msr_filter(new_filter);
return r;
@ -6493,6 +6489,62 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
return 0;
}
#ifdef CONFIG_KVM_COMPAT
/* for KVM_X86_SET_MSR_FILTER */
struct kvm_msr_filter_range_compat {
__u32 flags;
__u32 nmsrs;
__u32 base;
__u32 bitmap;
};
struct kvm_msr_filter_compat {
__u32 flags;
struct kvm_msr_filter_range_compat ranges[KVM_MSR_FILTER_MAX_RANGES];
};
#define KVM_X86_SET_MSR_FILTER_COMPAT _IOW(KVMIO, 0xc6, struct kvm_msr_filter_compat)
long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
void __user *argp = (void __user *)arg;
struct kvm *kvm = filp->private_data;
long r = -ENOTTY;
switch (ioctl) {
case KVM_X86_SET_MSR_FILTER_COMPAT: {
struct kvm_msr_filter __user *user_msr_filter = argp;
struct kvm_msr_filter_compat filter_compat;
struct kvm_msr_filter filter;
int i;
if (copy_from_user(&filter_compat, user_msr_filter,
sizeof(filter_compat)))
return -EFAULT;
filter.flags = filter_compat.flags;
for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
struct kvm_msr_filter_range_compat *cr;
cr = &filter_compat.ranges[i];
filter.ranges[i] = (struct kvm_msr_filter_range) {
.flags = cr->flags,
.nmsrs = cr->nmsrs,
.base = cr->base,
.bitmap = (__u8 *)(ulong)cr->bitmap,
};
}
r = kvm_vm_ioctl_set_msr_filter(kvm, &filter);
break;
}
}
return r;
}
#endif
#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
static int kvm_arch_suspend_notifier(struct kvm *kvm)
{
@ -6915,9 +6967,16 @@ set_pit2_out:
case KVM_SET_PMU_EVENT_FILTER:
r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
break;
case KVM_X86_SET_MSR_FILTER:
r = kvm_vm_ioctl_set_msr_filter(kvm, argp);
case KVM_X86_SET_MSR_FILTER: {
struct kvm_msr_filter __user *user_msr_filter = argp;
struct kvm_msr_filter filter;
if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
return -EFAULT;
r = kvm_vm_ioctl_set_msr_filter(kvm, &filter);
break;
}
default:
r = -ENOTTY;
}

View File

@ -1390,6 +1390,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
struct kvm_enable_cap *cap);
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg);
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);

View File

@ -1177,6 +1177,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220
#define KVM_CAP_S390_ZPCI_OP 221
#define KVM_CAP_S390_CPU_TOPOLOGY 222
#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
#ifdef KVM_CAP_IRQ_ROUTING

View File

@ -662,8 +662,8 @@ int test_kvm_device(uint32_t gic_dev_type)
: KVM_DEV_TYPE_ARM_VGIC_V2;
if (!__kvm_test_create_device(v.vm, other)) {
ret = __kvm_test_create_device(v.vm, other);
TEST_ASSERT(ret && (errno == EINVAL || errno == EEXIST),
ret = __kvm_create_device(v.vm, other);
TEST_ASSERT(ret < 0 && (errno == EINVAL || errno == EEXIST),
"create GIC device while other version exists");
}

View File

@ -67,7 +67,7 @@ struct memslot_antagonist_args {
static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
uint64_t nr_modifications)
{
const uint64_t pages = 1;
uint64_t pages = max_t(int, vm->page_size, getpagesize()) / vm->page_size;
uint64_t gpa;
int i;

View File

@ -4839,6 +4839,12 @@ struct compat_kvm_clear_dirty_log {
};
};
long __weak kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
return -ENOTTY;
}
static long kvm_vm_compat_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@ -4847,6 +4853,11 @@ static long kvm_vm_compat_ioctl(struct file *filp,
if (kvm->mm != current->mm || kvm->vm_dead)
return -EIO;
r = kvm_arch_vm_compat_ioctl(filp, ioctl, arg);
if (r != -ENOTTY)
return r;
switch (ioctl) {
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
case KVM_CLEAR_DIRTY_LOG: {