linux-stable/arch/x86/kvm/debugfs.c

197 lines
4.9 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Kernel-based Virtual Machine driver for Linux
*
* Copyright 2016 Red Hat, Inc. and/or its affiliates.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kvm_host.h>
#include <linux/debugfs.h>
#include "lapic.h"
#include "mmu.h"
#include "mmu/mmu_internal.h"
static int vcpu_get_timer_advance_ns(void *data, u64 *val)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
*val = vcpu->arch.apic->lapic_timer.timer_advance_ns;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(vcpu_timer_advance_ns_fops, vcpu_get_timer_advance_ns, NULL, "%llu\n");
static int vcpu_get_guest_mode(void *data, u64 *val)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
*val = vcpu->stat.guest_mode;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(vcpu_guest_mode_fops, vcpu_get_guest_mode, NULL, "%lld\n");
static int vcpu_get_tsc_offset(void *data, u64 *val)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
*val = vcpu->arch.tsc_offset;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_offset_fops, vcpu_get_tsc_offset, NULL, "%lld\n");
static int vcpu_get_tsc_scaling_ratio(void *data, u64 *val)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
*val = vcpu->arch.tsc_scaling_ratio;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_fops, vcpu_get_tsc_scaling_ratio, NULL, "%llu\n");
static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val)
{
*val = kvm_caps.tsc_scaling_ratio_frac_bits;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n");
void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
{
debugfs_create_file("guest_mode", 0444, debugfs_dentry, vcpu,
&vcpu_guest_mode_fops);
debugfs_create_file("tsc-offset", 0444, debugfs_dentry, vcpu,
&vcpu_tsc_offset_fops);
if (lapic_in_kernel(vcpu))
debugfs_create_file("lapic_timer_advance_ns", 0444,
debugfs_dentry, vcpu,
&vcpu_timer_advance_ns_fops);
if (kvm_caps.has_tsc_control) {
debugfs_create_file("tsc-scaling-ratio", 0444,
debugfs_dentry, vcpu,
&vcpu_tsc_scaling_fops);
debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444,
debugfs_dentry, vcpu,
&vcpu_tsc_scaling_frac_fops);
}
}
/*
* This covers statistics <1024 (11=log(1024)+1), which should be enough to
* cover RMAP_RECYCLE_THRESHOLD.
*/
#define RMAP_LOG_SIZE 11
static const char *kvm_lpage_str[KVM_NR_PAGE_SIZES] = { "4K", "2M", "1G" };
static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
{
struct kvm_rmap_head *rmap;
struct kvm *kvm = m->private;
struct kvm_memory_slot *slot;
struct kvm_memslots *slots;
unsigned int lpage_size, index;
/* Still small enough to be on the stack */
unsigned int *log[KVM_NR_PAGE_SIZES], *cur;
int i, j, k, l, ret;
if (!kvm_memslots_have_rmaps(kvm))
return 0;
ret = -ENOMEM;
memset(log, 0, sizeof(log));
for (i = 0; i < KVM_NR_PAGE_SIZES; i++) {
log[i] = kcalloc(RMAP_LOG_SIZE, sizeof(unsigned int), GFP_KERNEL);
if (!log[i])
goto out;
}
mutex_lock(&kvm->slots_lock);
write_lock(&kvm->mmu_lock);
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
KVM: Keep memslots in tree-based structures instead of array-based ones The current memslot code uses a (reverse gfn-ordered) memslot array for keeping track of them. Because the memslot array that is currently in use cannot be modified every memslot management operation (create, delete, move, change flags) has to make a copy of the whole array so it has a scratch copy to work on. Strictly speaking, however, it is only necessary to make copy of the memslot that is being modified, copying all the memslots currently present is just a limitation of the array-based memslot implementation. Two memslot sets, however, are still needed so the VM continues to run on the currently active set while the requested operation is being performed on the second, currently inactive one. In order to have two memslot sets, but only one copy of actual memslots it is necessary to split out the memslot data from the memslot sets. The memslots themselves should be also kept independent of each other so they can be individually added or deleted. These two memslot sets should normally point to the same set of memslots. They can, however, be desynchronized when performing a memslot management operation by replacing the memslot to be modified by its copy. After the operation is complete, both memslot sets once again point to the same, common set of memslot data. This commit implements the aforementioned idea. For tracking of gfns an ordinary rbtree is used since memslots cannot overlap in the guest address space and so this data structure is sufficient for ensuring that lookups are done quickly. The "last used slot" mini-caches (both per-slot set one and per-vCPU one), that keep track of the last found-by-gfn memslot, are still present in the new code. Co-developed-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com>
2021-12-06 19:54:30 +00:00
int bkt;
slots = __kvm_memslots(kvm, i);
KVM: Keep memslots in tree-based structures instead of array-based ones The current memslot code uses a (reverse gfn-ordered) memslot array for keeping track of them. Because the memslot array that is currently in use cannot be modified every memslot management operation (create, delete, move, change flags) has to make a copy of the whole array so it has a scratch copy to work on. Strictly speaking, however, it is only necessary to make copy of the memslot that is being modified, copying all the memslots currently present is just a limitation of the array-based memslot implementation. Two memslot sets, however, are still needed so the VM continues to run on the currently active set while the requested operation is being performed on the second, currently inactive one. In order to have two memslot sets, but only one copy of actual memslots it is necessary to split out the memslot data from the memslot sets. The memslots themselves should be also kept independent of each other so they can be individually added or deleted. These two memslot sets should normally point to the same set of memslots. They can, however, be desynchronized when performing a memslot management operation by replacing the memslot to be modified by its copy. After the operation is complete, both memslot sets once again point to the same, common set of memslot data. This commit implements the aforementioned idea. For tracking of gfns an ordinary rbtree is used since memslots cannot overlap in the guest address space and so this data structure is sufficient for ensuring that lookups are done quickly. The "last used slot" mini-caches (both per-slot set one and per-vCPU one), that keep track of the last found-by-gfn memslot, are still present in the new code. Co-developed-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com>
2021-12-06 19:54:30 +00:00
kvm_for_each_memslot(slot, bkt, slots)
for (k = 0; k < KVM_NR_PAGE_SIZES; k++) {
rmap = slot->arch.rmap[k];
lpage_size = kvm_mmu_slot_lpages(slot, k + 1);
cur = log[k];
for (l = 0; l < lpage_size; l++) {
index = ffs(pte_list_count(&rmap[l]));
if (WARN_ON_ONCE(index >= RMAP_LOG_SIZE))
index = RMAP_LOG_SIZE - 1;
cur[index]++;
}
}
}
write_unlock(&kvm->mmu_lock);
mutex_unlock(&kvm->slots_lock);
/* index=0 counts no rmap; index=1 counts 1 rmap */
seq_printf(m, "Rmap_Count:\t0\t1\t");
for (i = 2; i < RMAP_LOG_SIZE; i++) {
j = 1 << (i - 1);
k = (1 << i) - 1;
seq_printf(m, "%d-%d\t", j, k);
}
seq_printf(m, "\n");
for (i = 0; i < KVM_NR_PAGE_SIZES; i++) {
seq_printf(m, "Level=%s:\t", kvm_lpage_str[i]);
cur = log[i];
for (j = 0; j < RMAP_LOG_SIZE; j++)
seq_printf(m, "%d\t", cur[j]);
seq_printf(m, "\n");
}
ret = 0;
out:
for (i = 0; i < KVM_NR_PAGE_SIZES; i++)
kfree(log[i]);
return ret;
}
static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file)
{
struct kvm *kvm = inode->i_private;
int r;
if (!kvm_get_kvm_safe(kvm))
return -ENOENT;
r = single_open(file, kvm_mmu_rmaps_stat_show, kvm);
if (r < 0)
kvm_put_kvm(kvm);
return r;
}
static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
{
struct kvm *kvm = inode->i_private;
kvm_put_kvm(kvm);
return single_release(inode, file);
}
static const struct file_operations mmu_rmaps_stat_fops = {
KVM: Set file_operations.owner appropriately for all such structures Set .owner for all KVM-owned filed types so that the KVM module is pinned until any files with callbacks back into KVM are completely freed. Using "struct kvm" as a proxy for the module, i.e. keeping KVM-the-module alive while there are active VMs, doesn't provide full protection. Userspace can invoke delete_module() the instant the last reference to KVM is put. If KVM itself puts the last reference, e.g. via kvm_destroy_vm(), then it's possible for KVM to be preempted and deleted/unloaded before KVM fully exits, e.g. when the task running kvm_destroy_vm() is scheduled back in, it will jump to a code page that is no longer mapped. Note, file types that can call into sub-module code, e.g. kvm-intel.ko or kvm-amd.ko on x86, must use the module pointer passed to kvm_init(), not THIS_MODULE (which points at kvm.ko). KVM assumes that if /dev/kvm is reachable, e.g. VMs are active, then the vendor module is loaded. To reduce the probability of forgetting to set .owner entirely, use THIS_MODULE for stats files where KVM does not call back into vendor code. This reverts commit 70375c2d8fa3fb9b0b59207a9c5df1e2e1205c10, and fixes several other file types that have been buggy since their introduction. Fixes: 70375c2d8fa3 ("Revert "KVM: set owner of cpu and vm file operations"") Fixes: 3bcd0662d66f ("KVM: X86: Introduce mmu_rmaps_stat per-vm debugfs file") Reported-by: Al Viro <viro@zeniv.linux.org.uk> Link: https://lore.kernel.org/all/20231010003746.GN800259@ZenIV Link: https://lore.kernel.org/r/20231018204624.1905300-2-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
2023-10-18 20:46:22 +00:00
.owner = THIS_MODULE,
.open = kvm_mmu_rmaps_stat_open,
.read = seq_read,
.llseek = seq_lseek,
.release = kvm_mmu_rmaps_stat_release,
};
void kvm_arch_create_vm_debugfs(struct kvm *kvm)
{
debugfs_create_file("mmu_rmaps_stat", 0644, kvm->debugfs_dentry, kvm,
&mmu_rmaps_stat_fops);
}