mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-10-02 07:04:24 +00:00
6713b8f11a
commita873dfe103
upstream. Patch series "Copy-on-write poison recovery", v3. Part 1 deals with the process that triggered the copy on write fault with a store to a shared read-only page. That process is send a SIGBUS with the usual machine check decoration to specify the virtual address of the lost page, together with the scope. Part 2 sets up to asynchronously take the page with the uncorrected error offline to prevent additional machine check faults. H/t to Miaohe Lin <linmiaohe@huawei.com> and Shuai Xue <xueshuai@linux.alibaba.com> for pointing me to the existing function to queue a call to memory_failure(). On x86 there is some duplicate reporting (because the error is also signalled by the memory controller as well as by the core that triggered the machine check). Console logs look like this: This patch (of 2): If the kernel is copying a page as the result of a copy-on-write fault and runs into an uncorrectable error, Linux will crash because it does not have recovery code for this case where poison is consumed by the kernel. It is easy to set up a test case. Just inject an error into a private page, fork(2), and have the child process write to the page. I wrapped that neatly into a test at: git://git.kernel.org/pub/scm/linux/kernel/git/aegl/ras-tools.git just enable ACPI error injection and run: # ./einj_mem-uc -f copy-on-write Add a new copy_user_highpage_mc() function that uses copy_mc_to_kernel() on architectures where that is available (currently x86 and powerpc). When an error is detected during the page copy, return VM_FAULT_HWPOISON to caller of wp_page_copy(). This propagates up the call stack. Both x86 and powerpc have code in their fault handler to deal with this code by sending a SIGBUS to the application. Note that this patch avoids a system crash and signals the process that triggered the copy-on-write action. It does not take any action for the memory error that is still in the shared page. To handle that a call to memory_failure() is needed. But this cannot be done from wp_page_copy() because it holds mmap_lock(). Perhaps the architecture fault handlers can deal with this loose end in a subsequent patch? On Intel/x86 this loose end will often be handled automatically because the memory controller provides an additional notification of the h/w poison in memory, the handler for this will call memory_failure(). This isn't a 100% solution. If there are multiple errors, not all may be logged in this way. Cc: <stable@vger.kernel.org> [tony.luck@intel.com: add call to kmsan_unpoison_memory(), per Miaohe Lin] Link: https://lkml.kernel.org/r/20221031201029.102123-2-tony.luck@intel.com Link: https://lkml.kernel.org/r/20221021200120.175753-1-tony.luck@intel.com Link: https://lkml.kernel.org/r/20221021200120.175753-2-tony.luck@intel.com Signed-off-by: Tony Luck <tony.luck@intel.com> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Naoya Horiguchi <naoya.horiguchi@nec.com> Reviewed-by: Miaohe Lin <linmiaohe@huawei.com> Reviewed-by: Alexander Potapenko <glider@google.com> Tested-by: Shuai Xue <xueshuai@linux.alibaba.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> [ Due to missing commitsc89357e27f
("mm: support GUP-triggered unsharing of anonymous pages")662ce1dc9c
("delayacct: track delays from write-protect copy")b073d7f8ae
("mm: kmsan: maintain KMSAN metadata for page operations") The impact ofc89357e27f
is a name change from cow_user_page() to __wp_page_copy_user(). The impact of662ce1dc9c
is the introduction of a new feature of tracking write-protect copy in delayacct. The impact ofb073d7f8ae
is an introduction of KASAN feature. None of these commits establishes meaningful dependency, hence resolve by ignoring them. - jane] Signed-off-by: Jane Chu <jane.chu@oracle.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
354 lines
9.5 KiB
C
354 lines
9.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_HIGHMEM_H
|
|
#define _LINUX_HIGHMEM_H
|
|
|
|
#include <linux/fs.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/bug.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/hardirq.h>
|
|
|
|
#include <asm/cacheflush.h>
|
|
|
|
#include "highmem-internal.h"
|
|
|
|
/**
|
|
* kmap - Map a page for long term usage
|
|
* @page: Pointer to the page to be mapped
|
|
*
|
|
* Returns: The virtual address of the mapping
|
|
*
|
|
* Can only be invoked from preemptible task context because on 32bit
|
|
* systems with CONFIG_HIGHMEM enabled this function might sleep.
|
|
*
|
|
* For systems with CONFIG_HIGHMEM=n and for pages in the low memory area
|
|
* this returns the virtual address of the direct kernel mapping.
|
|
*
|
|
* The returned virtual address is globally visible and valid up to the
|
|
* point where it is unmapped via kunmap(). The pointer can be handed to
|
|
* other contexts.
|
|
*
|
|
* For highmem pages on 32bit systems this can be slow as the mapping space
|
|
* is limited and protected by a global lock. In case that there is no
|
|
* mapping slot available the function blocks until a slot is released via
|
|
* kunmap().
|
|
*/
|
|
static inline void *kmap(struct page *page);
|
|
|
|
/**
|
|
* kunmap - Unmap the virtual address mapped by kmap()
|
|
* @addr: Virtual address to be unmapped
|
|
*
|
|
* Counterpart to kmap(). A NOOP for CONFIG_HIGHMEM=n and for mappings of
|
|
* pages in the low memory area.
|
|
*/
|
|
static inline void kunmap(struct page *page);
|
|
|
|
/**
|
|
* kmap_to_page - Get the page for a kmap'ed address
|
|
* @addr: The address to look up
|
|
*
|
|
* Returns: The page which is mapped to @addr.
|
|
*/
|
|
static inline struct page *kmap_to_page(void *addr);
|
|
|
|
/**
|
|
* kmap_flush_unused - Flush all unused kmap mappings in order to
|
|
* remove stray mappings
|
|
*/
|
|
static inline void kmap_flush_unused(void);
|
|
|
|
/**
|
|
* kmap_local_page - Map a page for temporary usage
|
|
* @page: Pointer to the page to be mapped
|
|
*
|
|
* Returns: The virtual address of the mapping
|
|
*
|
|
* Can be invoked from any context.
|
|
*
|
|
* Requires careful handling when nesting multiple mappings because the map
|
|
* management is stack based. The unmap has to be in the reverse order of
|
|
* the map operation:
|
|
*
|
|
* addr1 = kmap_local_page(page1);
|
|
* addr2 = kmap_local_page(page2);
|
|
* ...
|
|
* kunmap_local(addr2);
|
|
* kunmap_local(addr1);
|
|
*
|
|
* Unmapping addr1 before addr2 is invalid and causes malfunction.
|
|
*
|
|
* Contrary to kmap() mappings the mapping is only valid in the context of
|
|
* the caller and cannot be handed to other contexts.
|
|
*
|
|
* On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the
|
|
* virtual address of the direct mapping. Only real highmem pages are
|
|
* temporarily mapped.
|
|
*
|
|
* While it is significantly faster than kmap() for the higmem case it
|
|
* comes with restrictions about the pointer validity. Only use when really
|
|
* necessary.
|
|
*
|
|
* On HIGHMEM enabled systems mapping a highmem page has the side effect of
|
|
* disabling migration in order to keep the virtual address stable across
|
|
* preemption. No caller of kmap_local_page() can rely on this side effect.
|
|
*/
|
|
static inline void *kmap_local_page(struct page *page);
|
|
|
|
/**
|
|
* kmap_atomic - Atomically map a page for temporary usage - Deprecated!
|
|
* @page: Pointer to the page to be mapped
|
|
*
|
|
* Returns: The virtual address of the mapping
|
|
*
|
|
* Effectively a wrapper around kmap_local_page() which disables pagefaults
|
|
* and preemption.
|
|
*
|
|
* Do not use in new code. Use kmap_local_page() instead.
|
|
*/
|
|
static inline void *kmap_atomic(struct page *page);
|
|
|
|
/**
|
|
* kunmap_atomic - Unmap the virtual address mapped by kmap_atomic()
|
|
* @addr: Virtual address to be unmapped
|
|
*
|
|
* Counterpart to kmap_atomic().
|
|
*
|
|
* Effectively a wrapper around kunmap_local() which additionally undoes
|
|
* the side effects of kmap_atomic(), i.e. reenabling pagefaults and
|
|
* preemption.
|
|
*/
|
|
|
|
/* Highmem related interfaces for management code */
|
|
static inline unsigned int nr_free_highpages(void);
|
|
static inline unsigned long totalhigh_pages(void);
|
|
|
|
#ifndef ARCH_HAS_FLUSH_ANON_PAGE
|
|
static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#ifndef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE
|
|
static inline void flush_kernel_vmap_range(void *vaddr, int size)
|
|
{
|
|
}
|
|
static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
/* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */
|
|
#ifndef clear_user_highpage
|
|
static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
|
|
{
|
|
void *addr = kmap_atomic(page);
|
|
clear_user_page(addr, vaddr, page);
|
|
kunmap_atomic(addr);
|
|
}
|
|
#endif
|
|
|
|
#ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
|
|
/**
|
|
* alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move
|
|
* @vma: The VMA the page is to be allocated for
|
|
* @vaddr: The virtual address the page will be inserted into
|
|
*
|
|
* This function will allocate a page for a VMA that the caller knows will
|
|
* be able to migrate in the future using move_pages() or reclaimed
|
|
*
|
|
* An architecture may override this function by defining
|
|
* __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE and providing their own
|
|
* implementation.
|
|
*/
|
|
static inline struct page *
|
|
alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
|
|
unsigned long vaddr)
|
|
{
|
|
struct page *page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
|
|
|
|
if (page)
|
|
clear_user_highpage(page, vaddr);
|
|
|
|
return page;
|
|
}
|
|
#endif
|
|
|
|
static inline void clear_highpage(struct page *page)
|
|
{
|
|
void *kaddr = kmap_atomic(page);
|
|
clear_page(kaddr);
|
|
kunmap_atomic(kaddr);
|
|
}
|
|
|
|
#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
|
|
|
|
static inline void tag_clear_highpage(struct page *page)
|
|
{
|
|
}
|
|
|
|
#endif
|
|
|
|
/*
|
|
* If we pass in a base or tail page, we can zero up to PAGE_SIZE.
|
|
* If we pass in a head page, we can zero up to the size of the compound page.
|
|
*/
|
|
#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
|
void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
|
|
unsigned start2, unsigned end2);
|
|
#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
|
|
static inline void zero_user_segments(struct page *page,
|
|
unsigned start1, unsigned end1,
|
|
unsigned start2, unsigned end2)
|
|
{
|
|
void *kaddr = kmap_atomic(page);
|
|
unsigned int i;
|
|
|
|
BUG_ON(end1 > page_size(page) || end2 > page_size(page));
|
|
|
|
if (end1 > start1)
|
|
memset(kaddr + start1, 0, end1 - start1);
|
|
|
|
if (end2 > start2)
|
|
memset(kaddr + start2, 0, end2 - start2);
|
|
|
|
kunmap_atomic(kaddr);
|
|
for (i = 0; i < compound_nr(page); i++)
|
|
flush_dcache_page(page + i);
|
|
}
|
|
#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
|
|
|
|
static inline void zero_user_segment(struct page *page,
|
|
unsigned start, unsigned end)
|
|
{
|
|
zero_user_segments(page, start, end, 0, 0);
|
|
}
|
|
|
|
static inline void zero_user(struct page *page,
|
|
unsigned start, unsigned size)
|
|
{
|
|
zero_user_segments(page, start, start + size, 0, 0);
|
|
}
|
|
|
|
#ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE
|
|
|
|
static inline void copy_user_highpage(struct page *to, struct page *from,
|
|
unsigned long vaddr, struct vm_area_struct *vma)
|
|
{
|
|
char *vfrom, *vto;
|
|
|
|
vfrom = kmap_atomic(from);
|
|
vto = kmap_atomic(to);
|
|
copy_user_page(vto, vfrom, vaddr, to);
|
|
kunmap_atomic(vto);
|
|
kunmap_atomic(vfrom);
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef copy_mc_to_kernel
|
|
static inline int copy_mc_user_highpage(struct page *to, struct page *from,
|
|
unsigned long vaddr, struct vm_area_struct *vma)
|
|
{
|
|
unsigned long ret;
|
|
char *vfrom, *vto;
|
|
|
|
vfrom = kmap_local_page(from);
|
|
vto = kmap_local_page(to);
|
|
ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE);
|
|
kunmap_local(vto);
|
|
kunmap_local(vfrom);
|
|
|
|
return ret;
|
|
}
|
|
#else
|
|
static inline int copy_mc_user_highpage(struct page *to, struct page *from,
|
|
unsigned long vaddr, struct vm_area_struct *vma)
|
|
{
|
|
copy_user_highpage(to, from, vaddr, vma);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifndef __HAVE_ARCH_COPY_HIGHPAGE
|
|
|
|
static inline void copy_highpage(struct page *to, struct page *from)
|
|
{
|
|
char *vfrom, *vto;
|
|
|
|
vfrom = kmap_atomic(from);
|
|
vto = kmap_atomic(to);
|
|
copy_page(vto, vfrom);
|
|
kunmap_atomic(vto);
|
|
kunmap_atomic(vfrom);
|
|
}
|
|
|
|
#endif
|
|
|
|
static inline void memcpy_page(struct page *dst_page, size_t dst_off,
|
|
struct page *src_page, size_t src_off,
|
|
size_t len)
|
|
{
|
|
char *dst = kmap_local_page(dst_page);
|
|
char *src = kmap_local_page(src_page);
|
|
|
|
VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE);
|
|
memcpy(dst + dst_off, src + src_off, len);
|
|
kunmap_local(src);
|
|
kunmap_local(dst);
|
|
}
|
|
|
|
static inline void memmove_page(struct page *dst_page, size_t dst_off,
|
|
struct page *src_page, size_t src_off,
|
|
size_t len)
|
|
{
|
|
char *dst = kmap_local_page(dst_page);
|
|
char *src = kmap_local_page(src_page);
|
|
|
|
VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE);
|
|
memmove(dst + dst_off, src + src_off, len);
|
|
kunmap_local(src);
|
|
kunmap_local(dst);
|
|
}
|
|
|
|
static inline void memset_page(struct page *page, size_t offset, int val,
|
|
size_t len)
|
|
{
|
|
char *addr = kmap_local_page(page);
|
|
|
|
VM_BUG_ON(offset + len > PAGE_SIZE);
|
|
memset(addr + offset, val, len);
|
|
kunmap_local(addr);
|
|
}
|
|
|
|
static inline void memcpy_from_page(char *to, struct page *page,
|
|
size_t offset, size_t len)
|
|
{
|
|
char *from = kmap_local_page(page);
|
|
|
|
VM_BUG_ON(offset + len > PAGE_SIZE);
|
|
memcpy(to, from + offset, len);
|
|
kunmap_local(from);
|
|
}
|
|
|
|
static inline void memcpy_to_page(struct page *page, size_t offset,
|
|
const char *from, size_t len)
|
|
{
|
|
char *to = kmap_local_page(page);
|
|
|
|
VM_BUG_ON(offset + len > PAGE_SIZE);
|
|
memcpy(to + offset, from, len);
|
|
flush_dcache_page(page);
|
|
kunmap_local(to);
|
|
}
|
|
|
|
static inline void memzero_page(struct page *page, size_t offset, size_t len)
|
|
{
|
|
char *addr = kmap_local_page(page);
|
|
memset(addr + offset, 0, len);
|
|
flush_dcache_page(page);
|
|
kunmap_local(addr);
|
|
}
|
|
|
|
#endif /* _LINUX_HIGHMEM_H */
|