mmu_notifiers: rename invalidate_range notifier

There are two main use cases for mmu notifiers.  One is by KVM which uses
mmu_notifier_invalidate_range_start()/end() to manage a software TLB.

The other is to manage hardware TLBs which need to use the
invalidate_range() callback because HW can establish new TLB entries at
any time.  Hence using start/end() can lead to memory corruption as these
callbacks happen too soon/late during page unmap.

mmu notifier users should therefore either use the start()/end() callbacks
or the invalidate_range() callbacks.  To make this usage clearer rename
the invalidate_range() callback to arch_invalidate_secondary_tlbs() and
update documention.

Link: https://lkml.kernel.org/r/6f77248cd25545c8020a54b4e567e8b72be4dca1.1690292440.git-series.apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: Andrew Donnellan <ajd@linux.ibm.com>
Cc: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Cc: Frederic Barrat <fbarrat@linux.ibm.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Nicolin Chen <nicolinc@nvidia.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Will Deacon <will@kernel.org>
Cc: Zhi Wang <zhi.wang.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Alistair Popple 2023-07-25 23:42:07 +10:00 committed by Andrew Morton
parent ec8832d007
commit 1af5a81099
13 changed files with 76 additions and 63 deletions

View File

@ -253,7 +253,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
__tlbi(aside1is, asid); __tlbi(aside1is, asid);
__tlbi_user(aside1is, asid); __tlbi_user(aside1is, asid);
dsb(ish); dsb(ish);
mmu_notifier_invalidate_range(mm, 0, -1UL); mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
} }
static inline void __flush_tlb_page_nosync(struct mm_struct *mm, static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
@ -265,7 +265,7 @@ static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
addr = __TLBI_VADDR(uaddr, ASID(mm)); addr = __TLBI_VADDR(uaddr, ASID(mm));
__tlbi(vale1is, addr); __tlbi(vale1is, addr);
__tlbi_user(vale1is, addr); __tlbi_user(vale1is, addr);
mmu_notifier_invalidate_range(mm, uaddr & PAGE_MASK, mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK,
(uaddr & PAGE_MASK) + PAGE_SIZE); (uaddr & PAGE_MASK) + PAGE_SIZE);
} }
@ -400,7 +400,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
scale++; scale++;
} }
dsb(ish); dsb(ish);
mmu_notifier_invalidate_range(vma->vm_mm, start, end); mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
} }
static inline void flush_tlb_range(struct vm_area_struct *vma, static inline void flush_tlb_range(struct vm_area_struct *vma,

View File

@ -39,7 +39,7 @@ void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long st
radix__flush_tlb_pwc_range_psize(vma->vm_mm, start, end, psize); radix__flush_tlb_pwc_range_psize(vma->vm_mm, start, end, psize);
else else
radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize); radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
mmu_notifier_invalidate_range(vma->vm_mm, start, end); mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
} }
void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,

View File

@ -987,7 +987,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
} }
} }
preempt_enable(); preempt_enable();
mmu_notifier_invalidate_range(mm, 0, -1UL); mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
} }
EXPORT_SYMBOL(radix__flush_tlb_mm); EXPORT_SYMBOL(radix__flush_tlb_mm);
@ -1021,7 +1021,7 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
} }
preempt_enable(); preempt_enable();
mmu_notifier_invalidate_range(mm, 0, -1UL); mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
} }
void radix__flush_all_mm(struct mm_struct *mm) void radix__flush_all_mm(struct mm_struct *mm)
@ -1230,7 +1230,7 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
} }
out: out:
preempt_enable(); preempt_enable();
mmu_notifier_invalidate_range(mm, start, end); mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
} }
void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
@ -1395,7 +1395,7 @@ static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
} }
out: out:
preempt_enable(); preempt_enable();
mmu_notifier_invalidate_range(mm, start, end); mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
} }
void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,

View File

@ -283,7 +283,7 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b
{ {
inc_mm_tlb_gen(mm); inc_mm_tlb_gen(mm);
cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
mmu_notifier_invalidate_range(mm, 0, -1UL); mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
} }
static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm) static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)

View File

@ -1037,7 +1037,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
put_flush_tlb_info(); put_flush_tlb_info();
put_cpu(); put_cpu();
mmu_notifier_invalidate_range(mm, start, end); mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
} }

View File

@ -355,9 +355,9 @@ static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
return container_of(mn, struct pasid_state, mn); return container_of(mn, struct pasid_state, mn);
} }
static void mn_invalidate_range(struct mmu_notifier *mn, static void mn_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
struct mm_struct *mm, struct mm_struct *mm,
unsigned long start, unsigned long end) unsigned long start, unsigned long end)
{ {
struct pasid_state *pasid_state; struct pasid_state *pasid_state;
struct device_state *dev_state; struct device_state *dev_state;
@ -391,8 +391,8 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
} }
static const struct mmu_notifier_ops iommu_mn = { static const struct mmu_notifier_ops iommu_mn = {
.release = mn_release, .release = mn_release,
.invalidate_range = mn_invalidate_range, .arch_invalidate_secondary_tlbs = mn_arch_invalidate_secondary_tlbs,
}; };
static void set_pri_tag_status(struct pasid_state *pasid_state, static void set_pri_tag_status(struct pasid_state *pasid_state,

View File

@ -186,9 +186,10 @@ static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd)
} }
} }
static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn, static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
struct mm_struct *mm, struct mm_struct *mm,
unsigned long start, unsigned long end) unsigned long start,
unsigned long end)
{ {
struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
struct arm_smmu_domain *smmu_domain = smmu_mn->domain; struct arm_smmu_domain *smmu_domain = smmu_mn->domain;
@ -247,9 +248,9 @@ static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn)
} }
static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = {
.invalidate_range = arm_smmu_mm_invalidate_range, .arch_invalidate_secondary_tlbs = arm_smmu_mm_arch_invalidate_secondary_tlbs,
.release = arm_smmu_mm_release, .release = arm_smmu_mm_release,
.free_notifier = arm_smmu_mmu_notifier_free, .free_notifier = arm_smmu_mmu_notifier_free,
}; };
/* Allocate or get existing MMU notifier for this {domain, mm} pair */ /* Allocate or get existing MMU notifier for this {domain, mm} pair */

View File

@ -219,9 +219,9 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
} }
/* Pages have been freed at this point */ /* Pages have been freed at this point */
static void intel_invalidate_range(struct mmu_notifier *mn, static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
struct mm_struct *mm, struct mm_struct *mm,
unsigned long start, unsigned long end) unsigned long start, unsigned long end)
{ {
struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
@ -256,7 +256,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
static const struct mmu_notifier_ops intel_mmuops = { static const struct mmu_notifier_ops intel_mmuops = {
.release = intel_mm_release, .release = intel_mm_release,
.invalidate_range = intel_invalidate_range, .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
}; };
static DEFINE_MUTEX(pasid_mutex); static DEFINE_MUTEX(pasid_mutex);

View File

@ -491,9 +491,9 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle)
} }
EXPORT_SYMBOL_GPL(ocxl_link_release); EXPORT_SYMBOL_GPL(ocxl_link_release);
static void invalidate_range(struct mmu_notifier *mn, static void arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
struct mm_struct *mm, struct mm_struct *mm,
unsigned long start, unsigned long end) unsigned long start, unsigned long end)
{ {
struct pe_data *pe_data = container_of(mn, struct pe_data, mmu_notifier); struct pe_data *pe_data = container_of(mn, struct pe_data, mmu_notifier);
struct ocxl_link *link = pe_data->link; struct ocxl_link *link = pe_data->link;
@ -509,7 +509,7 @@ static void invalidate_range(struct mmu_notifier *mn,
} }
static const struct mmu_notifier_ops ocxl_mmu_notifier_ops = { static const struct mmu_notifier_ops ocxl_mmu_notifier_ops = {
.invalidate_range = invalidate_range, .arch_invalidate_secondary_tlbs = arch_invalidate_secondary_tlbs,
}; };
static u64 calculate_cfg_state(bool kernel) static u64 calculate_cfg_state(bool kernel)

View File

@ -187,27 +187,27 @@ struct mmu_notifier_ops {
const struct mmu_notifier_range *range); const struct mmu_notifier_range *range);
/* /*
* invalidate_range() is either called between * arch_invalidate_secondary_tlbs() is used to manage a non-CPU TLB
* invalidate_range_start() and invalidate_range_end() when the * which shares page-tables with the CPU. The
* VM has to free pages that where unmapped, but before the * invalidate_range_start()/end() callbacks should not be implemented as
* pages are actually freed, or outside of _start()/_end() when * invalidate_secondary_tlbs() already catches the points in time when
* a (remote) TLB is necessary. * an external TLB needs to be flushed.
* *
* If invalidate_range() is used to manage a non-CPU TLB with * This requires arch_invalidate_secondary_tlbs() to be called while
* shared page-tables, it not necessary to implement the * holding the ptl spin-lock and therefore this callback is not allowed
* invalidate_range_start()/end() notifiers, as * to sleep.
* invalidate_range() already catches the points in time when an
* external TLB range needs to be flushed. For more in depth
* discussion on this see Documentation/mm/mmu_notifier.rst
* *
* Note that this function might be called with just a sub-range * This is called by architecture code whenever invalidating a TLB
* of what was passed to invalidate_range_start()/end(), if * entry. It is assumed that any secondary TLB has the same rules for
* called between those functions. * when invalidations are required. If this is not the case architecture
* code will need to call this explicitly when required for secondary
* TLB invalidation.
*/ */
void (*invalidate_range)(struct mmu_notifier *subscription, void (*arch_invalidate_secondary_tlbs)(
struct mm_struct *mm, struct mmu_notifier *subscription,
unsigned long start, struct mm_struct *mm,
unsigned long end); unsigned long start,
unsigned long end);
/* /*
* These callbacks are used with the get/put interface to manage the * These callbacks are used with the get/put interface to manage the
@ -396,8 +396,8 @@ extern void __mmu_notifier_change_pte(struct mm_struct *mm,
unsigned long address, pte_t pte); unsigned long address, pte_t pte);
extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r); extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r);
extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r); extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r);
extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, extern void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
unsigned long start, unsigned long end); unsigned long start, unsigned long end);
extern bool extern bool
mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range); mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range);
@ -483,11 +483,11 @@ mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
__mmu_notifier_invalidate_range_end(range); __mmu_notifier_invalidate_range_end(range);
} }
static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
unsigned long start, unsigned long end) unsigned long start, unsigned long end)
{ {
if (mm_has_notifiers(mm)) if (mm_has_notifiers(mm))
__mmu_notifier_invalidate_range(mm, start, end); __mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
} }
static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm) static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm)
@ -664,7 +664,7 @@ void mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
{ {
} }
static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
unsigned long start, unsigned long end) unsigned long start, unsigned long end)
{ {
} }

View File

@ -2120,8 +2120,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
if (is_huge_zero_pmd(*pmd)) { if (is_huge_zero_pmd(*pmd)) {
/* /*
* FIXME: Do we want to invalidate secondary mmu by calling * FIXME: Do we want to invalidate secondary mmu by calling
* mmu_notifier_invalidate_range() see comments below inside * mmu_notifier_arch_invalidate_secondary_tlbs() see comments below
* __split_huge_pmd() ? * inside __split_huge_pmd() ?
* *
* We are going from a zero huge page write protected to zero * We are going from a zero huge page write protected to zero
* small page also write protected so it does not seems useful * small page also write protected so it does not seems useful

View File

@ -6649,8 +6649,9 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
else else
flush_hugetlb_tlb_range(vma, start, end); flush_hugetlb_tlb_range(vma, start, end);
/* /*
* No need to call mmu_notifier_invalidate_range() we are downgrading * No need to call mmu_notifier_arch_invalidate_secondary_tlbs() we are
* page table protection not changing it to point to a new page. * downgrading page table protection not changing it to point to a new
* page.
* *
* See Documentation/mm/mmu_notifier.rst * See Documentation/mm/mmu_notifier.rst
*/ */
@ -7294,7 +7295,7 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
i_mmap_unlock_write(vma->vm_file->f_mapping); i_mmap_unlock_write(vma->vm_file->f_mapping);
hugetlb_vma_unlock_write(vma); hugetlb_vma_unlock_write(vma);
/* /*
* No need to call mmu_notifier_invalidate_range(), see * No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see
* Documentation/mm/mmu_notifier.rst. * Documentation/mm/mmu_notifier.rst.
*/ */
mmu_notifier_invalidate_range_end(&range); mmu_notifier_invalidate_range_end(&range);

View File

@ -585,8 +585,8 @@ void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
lock_map_release(&__mmu_notifier_invalidate_range_start_map); lock_map_release(&__mmu_notifier_invalidate_range_start_map);
} }
void __mmu_notifier_invalidate_range(struct mm_struct *mm, void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
unsigned long start, unsigned long end) unsigned long start, unsigned long end)
{ {
struct mmu_notifier *subscription; struct mmu_notifier *subscription;
int id; int id;
@ -595,9 +595,10 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm,
hlist_for_each_entry_rcu(subscription, hlist_for_each_entry_rcu(subscription,
&mm->notifier_subscriptions->list, hlist, &mm->notifier_subscriptions->list, hlist,
srcu_read_lock_held(&srcu)) { srcu_read_lock_held(&srcu)) {
if (subscription->ops->invalidate_range) if (subscription->ops->arch_invalidate_secondary_tlbs)
subscription->ops->invalidate_range(subscription, mm, subscription->ops->arch_invalidate_secondary_tlbs(
start, end); subscription, mm,
start, end);
} }
srcu_read_unlock(&srcu, id); srcu_read_unlock(&srcu, id);
} }
@ -616,6 +617,16 @@ int __mmu_notifier_register(struct mmu_notifier *subscription,
mmap_assert_write_locked(mm); mmap_assert_write_locked(mm);
BUG_ON(atomic_read(&mm->mm_users) <= 0); BUG_ON(atomic_read(&mm->mm_users) <= 0);
/*
* Subsystems should only register for invalidate_secondary_tlbs() or
* invalidate_range_start()/end() callbacks, not both.
*/
if (WARN_ON_ONCE(subscription &&
(subscription->ops->arch_invalidate_secondary_tlbs &&
(subscription->ops->invalidate_range_start ||
subscription->ops->invalidate_range_end))))
return -EINVAL;
if (!mm->notifier_subscriptions) { if (!mm->notifier_subscriptions) {
/* /*
* kmalloc cannot be called under mm_take_all_locks(), but we * kmalloc cannot be called under mm_take_all_locks(), but we