diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 01f6c2ca7ecd..9c567d243f61 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -202,7 +202,24 @@ static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, int percpu) { *ptep = pte; - asm volatile("ptesync" : : : "memory"); + + /* + * The architecture suggests a ptesync after setting the pte, which + * orders the store that updates the pte with subsequent page table + * walk accesses which may load the pte. Without this it may be + * possible for a subsequent access to result in spurious fault. + * + * This is not necessary for correctness, because a spurious fault + * is tolerated by the page fault handler, and this store will + * eventually be seen. In testing, there was no noticable increase + * in user faults on POWER9. Avoiding ptesync here is a significant + * win for things like fork. If a future microarchitecture benefits + * from ptesync, it should probably go into update_mmu_cache, rather + * than set_pte_at (which is used to set ptes unrelated to faults). + * + * Spurious faults to vmalloc region are not tolerated, so there is + * a ptesync in flush_cache_vmap. + */ } static inline int radix__pmd_bad(pmd_t pmd) diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 11843e37d9cf..e9662648e72d 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -26,6 +26,19 @@ #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) +#ifdef CONFIG_BOOK3S_64 +/* + * Book3s has no ptesync after setting a pte, so without this ptesync it's + * possible for a kernel virtual mapping access to return a spurious fault + * if it's accessed right after the pte is set. The page fault handler does + * not expect this type of fault. flush_cache_vmap is not exactly the right + * place to put this, but it seems to work well enough. + */ +#define flush_cache_vmap(start, end) do { asm volatile("ptesync"); } while (0) +#else +#define flush_cache_vmap(start, end) do { } while (0) +#endif + #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) do { } while (0) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index d6f74cbf0fed..96f68c5aa1f5 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -1115,5 +1115,5 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, * an access fault, which is defined by the architectue. */ } - asm volatile("ptesync" : : : "memory"); + /* See ptesync comment in radix__set_pte_at */ }