linux-stable/arch/mips/mm/cache.c

218 lines
6.1 KiB
C
Raw Normal View History

/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 1994 - 2003, 06, 07 by Ralf Baechle (ralf@linux-mips.org)
* Copyright (C) 2007 MIPS Technologies, Inc.
*/
#include <linux/fs.h>
#include <linux/fcntl.h>
#include <linux/kernel.h>
#include <linux/linkage.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/syscalls.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <asm/bcache.h>
#include <asm/cacheflush.h>
#include <asm/processor.h>
#include <asm/cpu.h>
#include <asm/cpu-features.h>
#include <asm/setup.h>
#include <asm/pgtable.h>
/* Cache operations. */
void (*flush_cache_all)(void);
void (*__flush_cache_all)(void);
EXPORT_SYMBOL_GPL(__flush_cache_all);
void (*flush_cache_mm)(struct mm_struct *mm);
void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page,
unsigned long pfn);
void (*flush_icache_range)(unsigned long start, unsigned long end);
EXPORT_SYMBOL_GPL(flush_icache_range);
void (*local_flush_icache_range)(unsigned long start, unsigned long end);
EXPORT_SYMBOL_GPL(local_flush_icache_range);
void (*__flush_icache_user_range)(unsigned long start, unsigned long end);
void (*__local_flush_icache_user_range)(unsigned long start, unsigned long end);
EXPORT_SYMBOL_GPL(__local_flush_icache_user_range);
void (*__flush_cache_vmap)(void);
void (*__flush_cache_vunmap)(void);
void (*__flush_kernel_vmap_range)(unsigned long vaddr, int size);
EXPORT_SYMBOL_GPL(__flush_kernel_vmap_range);
/* MIPS specific cache operations */
void (*flush_data_cache_page)(unsigned long addr);
void (*flush_icache_all)(void);
EXPORT_SYMBOL(flush_data_cache_page);
EXPORT_SYMBOL(flush_icache_all);
/*
* Dummy cache handling routine
*/
void cache_noop(void) {}
#ifdef CONFIG_BOARD_SCACHE
static struct bcache_ops no_sc_ops = {
.bc_enable = (void *)cache_noop,
.bc_disable = (void *)cache_noop,
.bc_wback_inv = (void *)cache_noop,
.bc_inv = (void *)cache_noop
};
struct bcache_ops *bcops = &no_sc_ops;
#endif
#ifdef CONFIG_DMA_NONCOHERENT
/* DMA cache operations. */
void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size);
void (*_dma_cache_wback)(unsigned long start, unsigned long size);
void (*_dma_cache_inv)(unsigned long start, unsigned long size);
#endif /* CONFIG_DMA_NONCOHERENT */
/*
* We could optimize the case where the cache argument is not BCACHE but
* that seems very atypical use ...
*/
SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
unsigned int, cache)
{
if (bytes == 0)
return 0;
Remove 'type' argument from access_ok() function Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument of the user address range verification function since we got rid of the old racy i386-only code to walk page tables by hand. It existed because the original 80386 would not honor the write protect bit when in kernel mode, so you had to do COW by hand before doing any user access. But we haven't supported that in a long time, and these days the 'type' argument is a purely historical artifact. A discussion about extending 'user_access_begin()' to do the range checking resulted this patch, because there is no way we're going to move the old VERIFY_xyz interface to that model. And it's best done at the end of the merge window when I've done most of my merges, so let's just get this done once and for all. This patch was mostly done with a sed-script, with manual fix-ups for the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form. There were a couple of notable cases: - csky still had the old "verify_area()" name as an alias. - the iter_iov code had magical hardcoded knowledge of the actual values of VERIFY_{READ,WRITE} (not that they mattered, since nothing really used it) - microblaze used the type argument for a debug printout but other than those oddities this should be a total no-op patch. I tried to fix up all architectures, did fairly extensive grepping for access_ok() uses, and the changes are trivial, but I may have missed something. Any missed conversion should be trivially fixable, though. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 02:57:57 +00:00
if (!access_ok((void __user *) addr, bytes))
return -EFAULT;
__flush_icache_user_range(addr, addr + bytes);
return 0;
}
void __flush_dcache_pages(struct page *page, unsigned int nr)
{
struct folio *folio = page_folio(page);
struct address_space *mapping = folio_flush_mapping(folio);
unsigned long addr;
unsigned int i;
if (mapping && !mapping_mapped(mapping)) {
folio_set_dcache_dirty(folio);
return;
}
/*
* We could delay the flush for the !page_mapping case too. But that
* case is for exec env/arg pages and those are %99 certainly going to
* get faulted into the tlb (and thus flushed) anyways.
*/
for (i = 0; i < nr; i++) {
addr = (unsigned long)kmap_local_page(nth_page(page, i));
flush_data_cache_page(addr);
kunmap_local((void *)addr);
}
}
EXPORT_SYMBOL(__flush_dcache_pages);
void __flush_anon_page(struct page *page, unsigned long vmaddr)
{
unsigned long addr = (unsigned long) page_address(page);
struct folio *folio = page_folio(page);
if (pages_do_alias(addr, vmaddr)) {
if (folio_mapped(folio) && !folio_test_dcache_dirty(folio)) {
void *kaddr;
kaddr = kmap_coherent(page, vmaddr);
flush_data_cache_page((unsigned long)kaddr);
kunmap_coherent();
} else
flush_data_cache_page(addr);
}
}
EXPORT_SYMBOL(__flush_anon_page);
MIPS: Sync icache & dcache in set_pte_at It's possible for pages to become visible prior to update_mmu_cache running if a thread within the same address space preempts the current thread or runs simultaneously on another CPU. That is, the following scenario is possible: CPU0 CPU1 write to page flush_dcache_page flush_icache_page set_pte_at map page update_mmu_cache If CPU1 maps the page in between CPU0's set_pte_at, which marks it valid & visible, and update_mmu_cache where the dcache flush occurs then CPU1s icache will fill from stale data (unless it fills from the dcache, in which case all is good, but most MIPS CPUs don't have this property). Commit 4d46a67a3eb8 ("MIPS: Fix race condition in lazy cache flushing.") attempted to fix that by performing the dcache flush in flush_icache_page such that it occurs before the set_pte_at call makes the page visible. However it has the problem that not all code that writes to pages exposed to userland call flush_icache_page. There are many callers of set_pte_at under mm/ and only 2 of them do call flush_icache_page. Thus the race window between a page becoming visible & being coherent between the icache & dcache remains open in some cases. To illustrate some of the cases, a WARN was added to __update_cache with this patch applied that triggered in cases where a page about to be flushed from the dcache was not the last page provided to flush_icache_page. That is, backtraces were obtained for cases in which the race window is left open without this patch. The 2 standout examples follow. When forking a process: [ 15.271842] [<80417630>] __update_cache+0xcc/0x188 [ 15.277274] [<80530394>] copy_page_range+0x56c/0x6ac [ 15.282861] [<8042936c>] copy_process.part.54+0xd40/0x17ac [ 15.289028] [<80429f80>] do_fork+0xe4/0x420 [ 15.293747] [<80413808>] handle_sys+0x128/0x14c When exec'ing an ELF binary: [ 14.445964] [<80417630>] __update_cache+0xcc/0x188 [ 14.451369] [<80538d88>] move_page_tables+0x414/0x498 [ 14.457075] [<8055d848>] setup_arg_pages+0x220/0x318 [ 14.462685] [<805b0f38>] load_elf_binary+0x530/0x12a0 [ 14.468374] [<8055ec3c>] search_binary_handler+0xbc/0x214 [ 14.474444] [<8055f6c0>] do_execveat_common+0x43c/0x67c [ 14.480324] [<8055f938>] do_execve+0x38/0x44 [ 14.485137] [<80413808>] handle_sys+0x128/0x14c These code paths write into a page, call flush_dcache_page then call set_pte_at without flush_icache_page inbetween. The end result is that the icache can become corrupted & userland processes may execute unexpected or invalid code, typically resulting in a reserved instruction exception, a trap or a segfault. Fix this race condition fully by performing any cache maintenance required to keep the icache & dcache in sync in set_pte_at, before the page is made valid. This has the added bonus of ensuring the cache maintenance always happens in one location, rather than being duplicated in flush_icache_page & update_mmu_cache. It also matches the way other architectures solve the same problem (see arm, ia64 & powerpc). Signed-off-by: Paul Burton <paul.burton@imgtec.com> Reported-by: Ionela Voinescu <ionela.voinescu@imgtec.com> Cc: Lars Persson <lars.persson@axis.com> Fixes: 4d46a67a3eb8 ("MIPS: Fix race condition in lazy cache flushing.") Cc: Steven J. Hill <sjhill@realitydiluted.com> Cc: David Daney <david.daney@cavium.com> Cc: Huacai Chen <chenhc@lemote.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable <stable@vger.kernel.org> # v4.1+ Patchwork: https://patchwork.linux-mips.org/patch/12722/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-03-01 02:37:59 +00:00
void __update_cache(unsigned long address, pte_t pte)
{
struct folio *folio;
unsigned long pfn, addr;
MIPS: Sync icache & dcache in set_pte_at It's possible for pages to become visible prior to update_mmu_cache running if a thread within the same address space preempts the current thread or runs simultaneously on another CPU. That is, the following scenario is possible: CPU0 CPU1 write to page flush_dcache_page flush_icache_page set_pte_at map page update_mmu_cache If CPU1 maps the page in between CPU0's set_pte_at, which marks it valid & visible, and update_mmu_cache where the dcache flush occurs then CPU1s icache will fill from stale data (unless it fills from the dcache, in which case all is good, but most MIPS CPUs don't have this property). Commit 4d46a67a3eb8 ("MIPS: Fix race condition in lazy cache flushing.") attempted to fix that by performing the dcache flush in flush_icache_page such that it occurs before the set_pte_at call makes the page visible. However it has the problem that not all code that writes to pages exposed to userland call flush_icache_page. There are many callers of set_pte_at under mm/ and only 2 of them do call flush_icache_page. Thus the race window between a page becoming visible & being coherent between the icache & dcache remains open in some cases. To illustrate some of the cases, a WARN was added to __update_cache with this patch applied that triggered in cases where a page about to be flushed from the dcache was not the last page provided to flush_icache_page. That is, backtraces were obtained for cases in which the race window is left open without this patch. The 2 standout examples follow. When forking a process: [ 15.271842] [<80417630>] __update_cache+0xcc/0x188 [ 15.277274] [<80530394>] copy_page_range+0x56c/0x6ac [ 15.282861] [<8042936c>] copy_process.part.54+0xd40/0x17ac [ 15.289028] [<80429f80>] do_fork+0xe4/0x420 [ 15.293747] [<80413808>] handle_sys+0x128/0x14c When exec'ing an ELF binary: [ 14.445964] [<80417630>] __update_cache+0xcc/0x188 [ 14.451369] [<80538d88>] move_page_tables+0x414/0x498 [ 14.457075] [<8055d848>] setup_arg_pages+0x220/0x318 [ 14.462685] [<805b0f38>] load_elf_binary+0x530/0x12a0 [ 14.468374] [<8055ec3c>] search_binary_handler+0xbc/0x214 [ 14.474444] [<8055f6c0>] do_execveat_common+0x43c/0x67c [ 14.480324] [<8055f938>] do_execve+0x38/0x44 [ 14.485137] [<80413808>] handle_sys+0x128/0x14c These code paths write into a page, call flush_dcache_page then call set_pte_at without flush_icache_page inbetween. The end result is that the icache can become corrupted & userland processes may execute unexpected or invalid code, typically resulting in a reserved instruction exception, a trap or a segfault. Fix this race condition fully by performing any cache maintenance required to keep the icache & dcache in sync in set_pte_at, before the page is made valid. This has the added bonus of ensuring the cache maintenance always happens in one location, rather than being duplicated in flush_icache_page & update_mmu_cache. It also matches the way other architectures solve the same problem (see arm, ia64 & powerpc). Signed-off-by: Paul Burton <paul.burton@imgtec.com> Reported-by: Ionela Voinescu <ionela.voinescu@imgtec.com> Cc: Lars Persson <lars.persson@axis.com> Fixes: 4d46a67a3eb8 ("MIPS: Fix race condition in lazy cache flushing.") Cc: Steven J. Hill <sjhill@realitydiluted.com> Cc: David Daney <david.daney@cavium.com> Cc: Huacai Chen <chenhc@lemote.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable <stable@vger.kernel.org> # v4.1+ Patchwork: https://patchwork.linux-mips.org/patch/12722/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-03-01 02:37:59 +00:00
int exec = !pte_no_exec(pte) && !cpu_has_ic_fills_f_dc;
unsigned int i;
pfn = pte_pfn(pte);
[MIPS] Retire flush_icache_page from mm use. On the 34K the redundant cache operations were causing excessive stalls resulting in realtime code running on the second VPE missing its deadline. For all other platforms this patch is just a significant performance improvment as illustrated by below benchmark numbers. Processor, Processes - times in microseconds - smaller is better ------------------------------------------------------------------------------ Host OS Mhz null null open slct sig sig fork exec sh call I/O stat clos TCP inst hndl proc proc proc --------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 25Kf 2.6.18-rc4 533 0.49 1.16 7.57 33.4 30.5 1.34 12.4 5497 17.K 54.K 25Kf 2.6.18-rc4-p 533 0.49 1.16 6.68 23.0 30.7 1.36 8.55 5030 16.K 48.K 4Kc 2.6.18-rc4 80 4.21 15.0 131. 289. 261. 16.5 258. 18.K 70.K 227K 4Kc 2.6.18-rc4-p 80 4.34 13.1 128. 285. 262. 18.2 258. 12.K 52.K 176K 34Kc 2.6.18-rc4 40 5.01 14.0 61.6 90.0 477. 17.9 94.7 29.K 108K 342K 34Kc 2.6.18-rc4-p 40 4.98 13.9 61.2 89.7 475. 17.6 93.7 8758 44.K 158K BCM1480 2.6.18-rc4 700 0.28 0.60 3.68 5.92 16.0 0.78 5.08 931. 3163 15.K BCM1480 2.6.18-rc4-p 700 0.28 0.61 3.65 5.85 16.0 0.79 5.20 395. 1464 8385 TX49-16K 2.6.18-rc3 197 0.73 2.41 19.0 37.8 82.9 2.94 17.5 4438 14.K 56.K TX49-16K 2.6.18-rc3-p 197 0.73 2.40 19.9 36.3 82.9 2.94 23.4 2577 9103 38.K TX49-32K 2.6.18-rc3 396 0.36 1.19 6.80 11.8 41.0 1.46 8.17 2738 8465 32.K TX49-32K 2.6.18-rc3-p 396 0.36 1.19 6.82 10.2 41.0 1.46 8.18 1330 4638 18.K Original patch by me with enhancements by Atsushi Nemoto. Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
2006-08-12 15:40:08 +00:00
if (unlikely(!pfn_valid(pfn)))
return;
folio = page_folio(pfn_to_page(pfn));
address &= PAGE_MASK;
address -= offset_in_folio(folio, pfn << PAGE_SHIFT);
if (folio_test_dcache_dirty(folio)) {
for (i = 0; i < folio_nr_pages(folio); i++) {
addr = (unsigned long)kmap_local_folio(folio, i);
if (exec || pages_do_alias(addr, address))
flush_data_cache_page(addr);
kunmap_local((void *)addr);
address += PAGE_SIZE;
}
folio_clear_dcache_dirty(folio);
}
}
unsigned long _page_cachable_default;
EXPORT_SYMBOL(_page_cachable_default);
#define PM(p) __pgprot(_page_cachable_default | (p))
mips/mm: enable ARCH_HAS_VM_GET_PAGE_PROT This enables ARCH_HAS_VM_GET_PAGE_PROT on the platform and exports standard vm_get_page_prot() implementation via DECLARE_VM_GET_PAGE_PROT, which looks up a private and static protection_map[] array. Subsequently all __SXXX and __PXXX macros can be dropped which are no longer needed. Link: https://lkml.kernel.org/r/20220711070600.2378316-21-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Brian Cain <bcain@quicinc.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Chris Zankel <chris@zankel.net> Cc: "David S. Miller" <davem@davemloft.net> Cc: Dinh Nguyen <dinguyen@kernel.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Guo Ren <guoren@kernel.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Jeff Dike <jdike@addtoit.com> Cc: Jonas Bonn <jonas@southpole.se> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Richard Henderson <rth@twiddle.net> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Sam Ravnborg <sam@ravnborg.org> Cc: Stafford Horne <shorne@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vineet Gupta <vgupta@kernel.org> Cc: WANG Xuerui <kernel@xen0n.name> Cc: Will Deacon <will@kernel.org> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2022-07-11 07:05:54 +00:00
static pgprot_t protection_map[16] __ro_after_init;
DECLARE_VM_GET_PAGE_PROT
static inline void setup_protection_map(void)
{
protection_map[0] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
protection_map[1] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
protection_map[2] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
protection_map[3] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
protection_map[4] = PM(_PAGE_PRESENT);
protection_map[5] = PM(_PAGE_PRESENT);
protection_map[6] = PM(_PAGE_PRESENT);
protection_map[7] = PM(_PAGE_PRESENT);
protection_map[8] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
protection_map[9] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
_PAGE_NO_READ);
protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
protection_map[12] = PM(_PAGE_PRESENT);
protection_map[13] = PM(_PAGE_PRESENT);
protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE);
protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE);
}
#undef PM
MIPS: Delete __cpuinit/__CPUINIT usage from MIPS code commit 3747069b25e419f6b51395f48127e9812abc3596 upstream. The __cpuinit type of throwaway sections might have made sense some time ago when RAM was more constrained, but now the savings do not offset the cost and complications. For example, the fix in commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time") is a good example of the nasty type of bugs that can be created with improper use of the various __init prefixes. After a discussion on LKML[1] it was decided that cpuinit should go the way of devinit and be phased out. Once all the users are gone, we can then finally remove the macros themselves from linux/init.h. Note that some harmless section mismatch warnings may result, since notify_cpu_starting() and cpu_up() are arch independent (kernel/cpu.c) and are flagged as __cpuinit -- so if we remove the __cpuinit from the arch specific callers, we will also get section mismatch warnings. As an intermediate step, we intend to turn the linux/init.h cpuinit related content into no-ops as early as possible, since that will get rid of these warnings. In any case, they are temporary and harmless. Here, we remove all the MIPS __cpuinit from C code and __CPUINIT from asm files. MIPS is interesting in this respect, because there are also uasm users hiding behind their own renamed versions of the __cpuinit macros. [1] https://lkml.org/lkml/2013/5/20/589 [ralf@linux-mips.org: Folded in Paul's followup fix.] Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5494/ Patchwork: https://patchwork.linux-mips.org/patch/5495/ Patchwork: https://patchwork.linux-mips.org/patch/5509/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2013-06-18 13:38:59 +00:00
void cpu_cache_init(void)
{
if (IS_ENABLED(CONFIG_CPU_R3000) && cpu_has_3k_cache)
r3k_cache_init();
if (IS_ENABLED(CONFIG_CPU_R4K_CACHE_TLB) && cpu_has_4k_cache)
r4k_cache_init();
if (IS_ENABLED(CONFIG_CPU_CAVIUM_OCTEON) && cpu_has_octeon_cache)
octeon_cache_init();
setup_protection_map();
}