mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-29 13:53:33 +00:00
KVM x86 MMU changes for 6.10:
- Process TDP MMU SPTEs that are are zapped while holding mmu_lock for read after replacing REMOVED_SPTE with '0' and flushing remote TLBs, which allows vCPU tasks to repopulate the zapped region while the zapper finishes tearing down the old, defunct page tables. - Fix a longstanding, likely benign-in-practice race where KVM could fail to detect a write from kvm_mmu_track_write() to a shadowed GPTE if the GPTE is first page table being shadowed. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmY+pUMACgkQOlYIJqCj N/2U6A//T3twYSURCUhM/3QYHDoH2RSldxQFs9i4+wJvXdvu4/VK08q1jPltTifm 6QoloLzJq34rSPPsYAvKSicfhC9Trxz+Cks6oe2wJrDvNNzco+mksC0owj2FsdeO 8pLh2VGqdmRU64afpnjTRneONJCsxTxHsoVdVEDSMhWiiFX9jj74QS2AbMB/XIli rFHK70kpEBTHGzg9E84xcjZb5DBB9+8jIGryWMtXfTAWHC0IO9gSAybLEoVAHZFL lUUGpeAs4P97mX28fQFqMm3ZffKE3hfHRfjEoW5BefnZeXYaABwF586I/w7QTjQI yHLgvh10a0a0X1hcCsDQFgy81uOLkbVDPUcBOTTY59DXT7Zp2il5bwcMvNBfaaUZ olR0auaeOxjPz4/WXd9JOZLaNJYCZqhEQnbEnt0RYcJ4MDULOocbD+D//+3yWPNp Dd6t8x73qXqa6GbtwOYWkMENwiDObTZaYBxTUhTd1z6gWpIeXx2fK8RRZ7/+/psF Pf/dzSvwOrXUpISQEVn6Q5sRlBS5nzd1vIWRoVe+pze2WYM3SX9E/3SksMCm+TRz Is8e+05HvjiaMpZeEjRjbUbBgpQakZYJ1TEwGbC6GLP/PUkssUluiDaQDxCwLPoQ bDb/I4NxDUbr0TaEvPszJuA1we8jGpQceq6wUo7n/mX2jC78Syo= =Izml -----END PGP SIGNATURE----- Merge tag 'kvm-x86-mmu-6.10' of https://github.com/kvm-x86/linux into HEAD KVM x86 MMU changes for 6.10: - Process TDP MMU SPTEs that are are zapped while holding mmu_lock for read after replacing REMOVED_SPTE with '0' and flushing remote TLBs, which allows vCPU tasks to repopulate the zapped region while the zapper finishes tearing down the old, defunct page tables. - Fix a longstanding, likely benign-in-practice race where KVM could fail to detect a write from kvm_mmu_track_write() to a shadowed GPTE if the GPTE is first page table being shadowed.
This commit is contained in:
commit
5a1c72e07e
2 changed files with 66 additions and 29 deletions
|
@ -831,6 +831,15 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|||
gfn_t gfn;
|
||||
|
||||
kvm->arch.indirect_shadow_pages++;
|
||||
/*
|
||||
* Ensure indirect_shadow_pages is elevated prior to re-reading guest
|
||||
* child PTEs in FNAME(gpte_changed), i.e. guarantee either in-flight
|
||||
* emulated writes are visible before re-reading guest PTEs, or that
|
||||
* an emulated write will see the elevated count and acquire mmu_lock
|
||||
* to update SPTEs. Pairs with the smp_mb() in kvm_mmu_track_write().
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
gfn = sp->gfn;
|
||||
slots = kvm_memslots_for_spte_role(kvm, sp->role);
|
||||
slot = __gfn_to_memslot(slots, gfn);
|
||||
|
@ -5787,10 +5796,15 @@ void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
|||
bool flush = false;
|
||||
|
||||
/*
|
||||
* If we don't have indirect shadow pages, it means no page is
|
||||
* write-protected, so we can exit simply.
|
||||
* When emulating guest writes, ensure the written value is visible to
|
||||
* any task that is handling page faults before checking whether or not
|
||||
* KVM is shadowing a guest PTE. This ensures either KVM will create
|
||||
* the correct SPTE in the page fault handler, or this task will see
|
||||
* a non-zero indirect_shadow_pages. Pairs with the smp_mb() in
|
||||
* account_shadowed().
|
||||
*/
|
||||
if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
|
||||
smp_mb();
|
||||
if (!vcpu->kvm->arch.indirect_shadow_pages)
|
||||
return;
|
||||
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
|
|
|
@ -530,6 +530,31 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
|||
kvm_set_pfn_accessed(spte_to_pfn(old_spte));
|
||||
}
|
||||
|
||||
static inline int __tdp_mmu_set_spte_atomic(struct tdp_iter *iter, u64 new_spte)
|
||||
{
|
||||
u64 *sptep = rcu_dereference(iter->sptep);
|
||||
|
||||
/*
|
||||
* The caller is responsible for ensuring the old SPTE is not a REMOVED
|
||||
* SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE,
|
||||
* and pre-checking before inserting a new SPTE is advantageous as it
|
||||
* avoids unnecessary work.
|
||||
*/
|
||||
WARN_ON_ONCE(iter->yielded || is_removed_spte(iter->old_spte));
|
||||
|
||||
/*
|
||||
* Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
|
||||
* does not hold the mmu_lock. On failure, i.e. if a different logical
|
||||
* CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with
|
||||
* the current value, so the caller operates on fresh data, e.g. if it
|
||||
* retries tdp_mmu_set_spte_atomic()
|
||||
*/
|
||||
if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte))
|
||||
return -EBUSY;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically
|
||||
* and handle the associated bookkeeping. Do not mark the page dirty
|
||||
|
@ -551,27 +576,13 @@ static inline int tdp_mmu_set_spte_atomic(struct kvm *kvm,
|
|||
struct tdp_iter *iter,
|
||||
u64 new_spte)
|
||||
{
|
||||
u64 *sptep = rcu_dereference(iter->sptep);
|
||||
|
||||
/*
|
||||
* The caller is responsible for ensuring the old SPTE is not a REMOVED
|
||||
* SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE,
|
||||
* and pre-checking before inserting a new SPTE is advantageous as it
|
||||
* avoids unnecessary work.
|
||||
*/
|
||||
WARN_ON_ONCE(iter->yielded || is_removed_spte(iter->old_spte));
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held_read(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
* Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
|
||||
* does not hold the mmu_lock. On failure, i.e. if a different logical
|
||||
* CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with
|
||||
* the current value, so the caller operates on fresh data, e.g. if it
|
||||
* retries tdp_mmu_set_spte_atomic()
|
||||
*/
|
||||
if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte))
|
||||
return -EBUSY;
|
||||
ret = __tdp_mmu_set_spte_atomic(iter, new_spte);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
|
||||
new_spte, iter->level, true);
|
||||
|
@ -584,13 +595,17 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
|
|||
{
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held_read(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
* Freeze the SPTE by setting it to a special,
|
||||
* non-present value. This will stop other threads from
|
||||
* immediately installing a present entry in its place
|
||||
* before the TLBs are flushed.
|
||||
* Freeze the SPTE by setting it to a special, non-present value. This
|
||||
* will stop other threads from immediately installing a present entry
|
||||
* in its place before the TLBs are flushed.
|
||||
*
|
||||
* Delay processing of the zapped SPTE until after TLBs are flushed and
|
||||
* the REMOVED_SPTE is replaced (see below).
|
||||
*/
|
||||
ret = tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE);
|
||||
ret = __tdp_mmu_set_spte_atomic(iter, REMOVED_SPTE);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -599,12 +614,20 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
|
|||
/*
|
||||
* No other thread can overwrite the removed SPTE as they must either
|
||||
* wait on the MMU lock or use tdp_mmu_set_spte_atomic() which will not
|
||||
* overwrite the special removed SPTE value. No bookkeeping is needed
|
||||
* here since the SPTE is going from non-present to non-present. Use
|
||||
* the raw write helper to avoid an unnecessary check on volatile bits.
|
||||
* overwrite the special removed SPTE value. Use the raw write helper to
|
||||
* avoid an unnecessary check on volatile bits.
|
||||
*/
|
||||
__kvm_tdp_mmu_write_spte(iter->sptep, SHADOW_NONPRESENT_VALUE);
|
||||
|
||||
/*
|
||||
* Process the zapped SPTE after flushing TLBs, and after replacing
|
||||
* REMOVED_SPTE with 0. This minimizes the amount of time vCPUs are
|
||||
* blocked by the REMOVED_SPTE and reduces contention on the child
|
||||
* SPTEs.
|
||||
*/
|
||||
handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
|
||||
0, iter->level, true);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue