mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-29 22:02:02 +00:00
KVM x86 MMU changes for 6.3:
- Fix and cleanup the range-based TLB flushing code, used when KVM is running on Hyper-V - A few one-off cleanups -----BEGIN PGP SIGNATURE----- iQJGBAABCgAwFiEEMHr+pfEFOIzK+KY1YJEiAU0MEvkFAmPsEC4SHHNlYW5qY0Bn b29nbGUuY29tAAoJEGCRIgFNDBL5lxgQAJrV3ri3UlK4kplKrrjjalHCD45H1z2Q fuSalDMMQUbsZI+/rMjdzwCitc9+cQXPg7aCUzWfZgMFATxWUKexoY8SrR4i5rxK jJLKo1AQbIdxYKLq0qHGI+6Uf8LQ4LT3UGiwDi54Ad51+5B0t525mMHBZpJ53Ung XVNOtA7nMUNQIV8x5iFqL5Cj/uhMz5dYGfAKlm/05G8HT7U5gWs9YsN63pUOIDGx HsytBV/ZWd1KYmAqTdVjW4MM+J4bi5lbqhkz5/9HRF/GypqjF+XzzzWONJgLrzr0 pEiNW37RDyW+4PrS6UEs6jadGIMGHRLHb+9oZol7Jri34c9oeEUErhLx/BW+WstX czP4OyzqkCHSbymPvy9xV3leUesC7r+Tz6yHp4sI9Ppm+u/ZGpkuMoZR/bwz+LvC Rn3SrweH0FCgblK/foxeR+2/Vm3SNxBTKKw4nTUw0W5BtXI5suu1Po3rLMBrGKDW 9h1ST0uVmsnMpMC5XheE2dRyXc92TqCw53DNRydA6kPkzPJsd3Va3GU9fk4qI1cG iVUDATViTUBfSq9jbb1PmHjGNI/Vc8XAu5TPiwqC6Q1wFXm3zTlCTrzKl8q7Hu0G A9Q3vhxJMCY58Rxz0yLt+WTzTctuteyTr5arRT8asfZmI4df8U7RvZb3deQ9C6TK jdqKUtGJK9ZX =7T3y -----END PGP SIGNATURE----- Merge tag 'kvm-x86-mmu-6.3' of https://github.com/kvm-x86/linux into HEAD KVM x86 MMU changes for 6.3: - Fix and cleanup the range-based TLB flushing code, used when KVM is running on Hyper-V - A few one-off cleanups
This commit is contained in:
commit
1c5ec0d433
7 changed files with 63 additions and 48 deletions
|
@ -44,6 +44,7 @@
|
|||
#include <linux/uaccess.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/kern_levels.h>
|
||||
#include <linux/kstrtox.h>
|
||||
#include <linux/kthread.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
|
@ -269,6 +270,17 @@ void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
|
|||
kvm_flush_remote_tlbs_with_range(kvm, &range);
|
||||
}
|
||||
|
||||
static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
|
||||
|
||||
/* Flush the range of guest memory mapped by the given SPTE. */
|
||||
static void kvm_flush_remote_tlbs_sptep(struct kvm *kvm, u64 *sptep)
|
||||
{
|
||||
struct kvm_mmu_page *sp = sptep_to_sp(sptep);
|
||||
gfn_t gfn = kvm_mmu_page_get_gfn(sp, spte_index(sptep));
|
||||
|
||||
kvm_flush_remote_tlbs_gfn(kvm, gfn, sp->role.level);
|
||||
}
|
||||
|
||||
static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
|
||||
unsigned int access)
|
||||
{
|
||||
|
@ -813,7 +825,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|||
kvm_mmu_gfn_disallow_lpage(slot, gfn);
|
||||
|
||||
if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
|
||||
kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
|
||||
kvm_flush_remote_tlbs_gfn(kvm, gfn, PG_LEVEL_4K);
|
||||
}
|
||||
|
||||
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
|
@ -1187,8 +1199,7 @@ static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush)
|
|||
drop_spte(kvm, sptep);
|
||||
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs_with_address(kvm, sp->gfn,
|
||||
KVM_PAGES_PER_HPAGE(sp->role.level));
|
||||
kvm_flush_remote_tlbs_sptep(kvm, sptep);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1469,7 +1480,7 @@ static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
|||
}
|
||||
|
||||
if (need_flush && kvm_available_flush_tlb_with_range()) {
|
||||
kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
|
||||
kvm_flush_remote_tlbs_gfn(kvm, gfn, level);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1639,8 +1650,7 @@ static void __rmap_add(struct kvm *kvm,
|
|||
kvm->stat.max_mmu_rmap_size = rmap_count;
|
||||
if (rmap_count > RMAP_RECYCLE_THRESHOLD) {
|
||||
kvm_zap_all_rmap_sptes(kvm, rmap_head);
|
||||
kvm_flush_remote_tlbs_with_address(
|
||||
kvm, sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level));
|
||||
kvm_flush_remote_tlbs_gfn(kvm, gfn, sp->role.level);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2405,7 +2415,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||
return;
|
||||
|
||||
drop_parent_pte(child, sptep);
|
||||
kvm_flush_remote_tlbs_with_address(vcpu->kvm, child->gfn, 1);
|
||||
kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2889,8 +2899,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
|
|||
}
|
||||
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn,
|
||||
KVM_PAGES_PER_HPAGE(level));
|
||||
kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
|
||||
|
||||
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
|
||||
|
||||
|
@ -3169,7 +3178,7 @@ static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
|||
if (fault->nx_huge_page_workaround_enabled)
|
||||
disallowed_hugepage_adjust(fault, *it.sptep, it.level);
|
||||
|
||||
base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
|
||||
base_gfn = gfn_round_for_level(fault->gfn, it.level);
|
||||
if (it.level == fault->goal_level)
|
||||
break;
|
||||
|
||||
|
@ -4440,7 +4449,8 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
|||
if (shadow_memtype_mask && kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
|
||||
for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) {
|
||||
int page_num = KVM_PAGES_PER_HPAGE(fault->max_level);
|
||||
gfn_t base = fault->gfn & ~(page_num - 1);
|
||||
gfn_t base = gfn_round_for_level(fault->gfn,
|
||||
fault->max_level);
|
||||
|
||||
if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num))
|
||||
break;
|
||||
|
@ -4556,10 +4566,12 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
|
|||
struct kvm_mmu *mmu = vcpu->arch.mmu;
|
||||
union kvm_mmu_page_role new_role = mmu->root_role;
|
||||
|
||||
if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role)) {
|
||||
/* kvm_mmu_ensure_valid_pgd will set up a new root. */
|
||||
/*
|
||||
* Return immediately if no usable root was found, kvm_mmu_reload()
|
||||
* will establish a valid root prior to the next VM-Enter.
|
||||
*/
|
||||
if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role))
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* It's possible that the cached previous root page is obsolete because
|
||||
|
@ -6518,8 +6530,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
|
|||
kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
|
||||
|
||||
if (kvm_available_flush_tlb_with_range())
|
||||
kvm_flush_remote_tlbs_with_address(kvm, sp->gfn,
|
||||
KVM_PAGES_PER_HPAGE(sp->role.level));
|
||||
kvm_flush_remote_tlbs_sptep(kvm, sptep);
|
||||
else
|
||||
need_tlb_flush = 1;
|
||||
|
||||
|
@ -6752,7 +6763,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
|
|||
new_val = 1;
|
||||
else if (sysfs_streq(val, "auto"))
|
||||
new_val = get_nx_auto_mode();
|
||||
else if (strtobool(val, &new_val) < 0)
|
||||
else if (kstrtobool(val, &new_val) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
__set_nx_huge_pages(new_val);
|
||||
|
|
|
@ -156,6 +156,11 @@ static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp)
|
|||
return kvm_x86_ops.cpu_dirty_log_size && sp->role.guest_mode;
|
||||
}
|
||||
|
||||
static inline gfn_t gfn_round_for_level(gfn_t gfn, int level)
|
||||
{
|
||||
return gfn & -KVM_PAGES_PER_HPAGE(level);
|
||||
}
|
||||
|
||||
int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, bool can_unsync, bool prefetch);
|
||||
|
||||
|
@ -164,8 +169,17 @@ void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
|
|||
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, u64 gfn,
|
||||
int min_level);
|
||||
|
||||
void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
|
||||
u64 start_gfn, u64 pages);
|
||||
|
||||
/* Flush the given page (huge or not) of guest memory. */
|
||||
static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level)
|
||||
{
|
||||
kvm_flush_remote_tlbs_with_address(kvm, gfn_round_for_level(gfn, level),
|
||||
KVM_PAGES_PER_HPAGE(level));
|
||||
}
|
||||
|
||||
unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
|
||||
|
||||
extern int nx_huge_pages;
|
||||
|
|
|
@ -642,12 +642,12 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
|
|||
if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
|
||||
goto out_gpte_changed;
|
||||
|
||||
for (shadow_walk_init(&it, vcpu, fault->addr);
|
||||
shadow_walk_okay(&it) && it.level > gw->level;
|
||||
shadow_walk_next(&it)) {
|
||||
for_each_shadow_entry(vcpu, fault->addr, it) {
|
||||
gfn_t table_gfn;
|
||||
|
||||
clear_sp_write_flooding_count(it.sptep);
|
||||
if (it.level == gw->level)
|
||||
break;
|
||||
|
||||
table_gfn = gw->table_gfn[it.level - 2];
|
||||
access = gw->pt_access[it.level - 2];
|
||||
|
@ -692,8 +692,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
|
|||
trace_kvm_mmu_spte_requested(fault);
|
||||
|
||||
for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
|
||||
clear_sp_write_flooding_count(it.sptep);
|
||||
|
||||
/*
|
||||
* We cannot overwrite existing page tables with an NX
|
||||
* large page, as the leaf could be executable.
|
||||
|
@ -701,7 +699,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
|
|||
if (fault->nx_huge_page_workaround_enabled)
|
||||
disallowed_hugepage_adjust(fault, *it.sptep, it.level);
|
||||
|
||||
base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
|
||||
base_gfn = gfn_round_for_level(fault->gfn, it.level);
|
||||
if (it.level == fault->goal_level)
|
||||
break;
|
||||
|
||||
|
@ -929,8 +927,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
|
|||
|
||||
mmu_page_zap_pte(vcpu->kvm, sp, sptep, NULL);
|
||||
if (is_shadow_present_pte(old_spte))
|
||||
kvm_flush_remote_tlbs_with_address(vcpu->kvm,
|
||||
sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level));
|
||||
kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep);
|
||||
|
||||
if (!rmap_can_add(vcpu))
|
||||
break;
|
||||
|
|
|
@ -147,9 +147,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
|||
WARN_ON_ONCE(!pte_access && !shadow_present_mask);
|
||||
|
||||
if (sp->role.ad_disabled)
|
||||
spte |= SPTE_TDP_AD_DISABLED_MASK;
|
||||
spte |= SPTE_TDP_AD_DISABLED;
|
||||
else if (kvm_mmu_page_ad_need_write_protect(sp))
|
||||
spte |= SPTE_TDP_AD_WRPROT_ONLY_MASK;
|
||||
spte |= SPTE_TDP_AD_WRPROT_ONLY;
|
||||
|
||||
/*
|
||||
* For the EPT case, shadow_present_mask is 0 if hardware
|
||||
|
@ -317,7 +317,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
|
|||
shadow_user_mask | shadow_x_mask | shadow_me_value;
|
||||
|
||||
if (ad_disabled)
|
||||
spte |= SPTE_TDP_AD_DISABLED_MASK;
|
||||
spte |= SPTE_TDP_AD_DISABLED;
|
||||
else
|
||||
spte |= shadow_accessed_mask;
|
||||
|
||||
|
|
|
@ -28,10 +28,10 @@
|
|||
*/
|
||||
#define SPTE_TDP_AD_SHIFT 52
|
||||
#define SPTE_TDP_AD_MASK (3ULL << SPTE_TDP_AD_SHIFT)
|
||||
#define SPTE_TDP_AD_ENABLED_MASK (0ULL << SPTE_TDP_AD_SHIFT)
|
||||
#define SPTE_TDP_AD_DISABLED_MASK (1ULL << SPTE_TDP_AD_SHIFT)
|
||||
#define SPTE_TDP_AD_WRPROT_ONLY_MASK (2ULL << SPTE_TDP_AD_SHIFT)
|
||||
static_assert(SPTE_TDP_AD_ENABLED_MASK == 0);
|
||||
#define SPTE_TDP_AD_ENABLED (0ULL << SPTE_TDP_AD_SHIFT)
|
||||
#define SPTE_TDP_AD_DISABLED (1ULL << SPTE_TDP_AD_SHIFT)
|
||||
#define SPTE_TDP_AD_WRPROT_ONLY (2ULL << SPTE_TDP_AD_SHIFT)
|
||||
static_assert(SPTE_TDP_AD_ENABLED == 0);
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
|
||||
#define SPTE_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1))
|
||||
|
@ -164,7 +164,7 @@ extern u64 __read_mostly shadow_me_value;
|
|||
extern u64 __read_mostly shadow_me_mask;
|
||||
|
||||
/*
|
||||
* SPTEs in MMUs without A/D bits are marked with SPTE_TDP_AD_DISABLED_MASK;
|
||||
* SPTEs in MMUs without A/D bits are marked with SPTE_TDP_AD_DISABLED;
|
||||
* shadow_acc_track_mask is the set of bits to be cleared in non-accessed
|
||||
* pages.
|
||||
*/
|
||||
|
@ -266,18 +266,18 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
|
|||
static inline bool spte_ad_enabled(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED_MASK;
|
||||
return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED;
|
||||
}
|
||||
|
||||
static inline bool spte_ad_need_write_protect(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
/*
|
||||
* This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED_MASK is '0',
|
||||
* This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED is '0',
|
||||
* and non-TDP SPTEs will never set these bits. Optimize for 64-bit
|
||||
* TDP and do the A/D type check unconditionally.
|
||||
*/
|
||||
return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_ENABLED_MASK;
|
||||
return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_ENABLED;
|
||||
}
|
||||
|
||||
static inline u64 spte_shadow_accessed_mask(u64 spte)
|
||||
|
|
|
@ -16,11 +16,6 @@ static void tdp_iter_refresh_sptep(struct tdp_iter *iter)
|
|||
iter->old_spte = kvm_tdp_mmu_read_spte(iter->sptep);
|
||||
}
|
||||
|
||||
static gfn_t round_gfn_for_level(gfn_t gfn, int level)
|
||||
{
|
||||
return gfn & -KVM_PAGES_PER_HPAGE(level);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the TDP iterator to the root PT and allow it to continue its
|
||||
* traversal over the paging structure from there.
|
||||
|
@ -31,7 +26,7 @@ void tdp_iter_restart(struct tdp_iter *iter)
|
|||
iter->yielded_gfn = iter->next_last_level_gfn;
|
||||
iter->level = iter->root_level;
|
||||
|
||||
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
|
||||
iter->gfn = gfn_round_for_level(iter->next_last_level_gfn, iter->level);
|
||||
tdp_iter_refresh_sptep(iter);
|
||||
|
||||
iter->valid = true;
|
||||
|
@ -98,7 +93,7 @@ static bool try_step_down(struct tdp_iter *iter)
|
|||
|
||||
iter->level--;
|
||||
iter->pt_path[iter->level - 1] = child_pt;
|
||||
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
|
||||
iter->gfn = gfn_round_for_level(iter->next_last_level_gfn, iter->level);
|
||||
tdp_iter_refresh_sptep(iter);
|
||||
|
||||
return true;
|
||||
|
@ -140,7 +135,7 @@ static bool try_step_up(struct tdp_iter *iter)
|
|||
return false;
|
||||
|
||||
iter->level++;
|
||||
iter->gfn = round_gfn_for_level(iter->gfn, iter->level);
|
||||
iter->gfn = gfn_round_for_level(iter->gfn, iter->level);
|
||||
tdp_iter_refresh_sptep(iter);
|
||||
|
||||
return true;
|
||||
|
|
|
@ -680,8 +680,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
kvm_flush_remote_tlbs_with_address(kvm, iter->gfn,
|
||||
KVM_PAGES_PER_HPAGE(iter->level));
|
||||
kvm_flush_remote_tlbs_gfn(kvm, iter->gfn, iter->level);
|
||||
|
||||
/*
|
||||
* No other thread can overwrite the removed SPTE as they must either
|
||||
|
@ -1080,8 +1079,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
|
|||
return RET_PF_RETRY;
|
||||
else if (is_shadow_present_pte(iter->old_spte) &&
|
||||
!is_last_spte(iter->old_spte, iter->level))
|
||||
kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
|
||||
KVM_PAGES_PER_HPAGE(iter->level + 1));
|
||||
kvm_flush_remote_tlbs_gfn(vcpu->kvm, iter->gfn, iter->level);
|
||||
|
||||
/*
|
||||
* If the page fault was caused by a write but the page is write
|
||||
|
|
Loading…
Reference in a new issue