mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-11-01 17:08:10 +00:00
763802b53a
Commit3f8fd02b1b
("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") introduced a call to vmalloc_sync_all() in the vunmap() code-path. While this change was necessary to maintain correctness on x86-32-pae kernels, it also adds additional cycles for architectures that don't need it. Specifically on x86-64 with CONFIG_VMAP_STACK=y some people reported severe performance regressions in micro-benchmarks because it now also calls the x86-64 implementation of vmalloc_sync_all() on vunmap(). But the vmalloc_sync_all() implementation on x86-64 is only needed for newly created mappings. To avoid the unnecessary work on x86-64 and to gain the performance back, split up vmalloc_sync_all() into two functions: * vmalloc_sync_mappings(), and * vmalloc_sync_unmappings() Most call-sites to vmalloc_sync_all() only care about new mappings being synchronized. The only exception is the new call-site added in the above mentioned commit. Shile Zhang directed us to a report of an 80% regression in reaim throughput. Fixes:3f8fd02b1b
("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") Reported-by: kernel test robot <oliver.sang@intel.com> Reported-by: Shile Zhang <shile.zhang@linux.alibaba.com> Signed-off-by: Joerg Roedel <jroedel@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Tested-by: Borislav Petkov <bp@suse.de> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> [GHES] Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: <stable@vger.kernel.org> Link: http://lkml.kernel.org/r/20191009124418.8286-1-joro@8bytes.org Link: https://lists.01.org/hyperkitty/list/lkp@lists.01.org/thread/4D3JPPHBNOSPFK2KEPC6KGKS6J25AIDB/ Link: http://lkml.kernel.org/r/20191113095530.228959-1-shile.zhang@linux.alibaba.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
254 lines
7.7 KiB
C
254 lines
7.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_VMALLOC_H
|
|
#define _LINUX_VMALLOC_H
|
|
|
|
#include <linux/spinlock.h>
|
|
#include <linux/init.h>
|
|
#include <linux/list.h>
|
|
#include <linux/llist.h>
|
|
#include <asm/page.h> /* pgprot_t */
|
|
#include <linux/rbtree.h>
|
|
#include <linux/overflow.h>
|
|
|
|
#include <asm/vmalloc.h>
|
|
|
|
struct vm_area_struct; /* vma defining user mapping in mm_types.h */
|
|
struct notifier_block; /* in notifier.h */
|
|
|
|
/* bits in flags of vmalloc's vm_struct below */
|
|
#define VM_IOREMAP 0x00000001 /* ioremap() and friends */
|
|
#define VM_ALLOC 0x00000002 /* vmalloc() */
|
|
#define VM_MAP 0x00000004 /* vmap()ed pages */
|
|
#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */
|
|
#define VM_DMA_COHERENT 0x00000010 /* dma_alloc_coherent */
|
|
#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */
|
|
#define VM_NO_GUARD 0x00000040 /* don't add guard page */
|
|
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
|
|
|
|
/*
|
|
* VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC.
|
|
*
|
|
* If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after
|
|
* shadow memory has been mapped. It's used to handle allocation errors so that
|
|
* we don't try to poision shadow on free if it was never allocated.
|
|
*
|
|
* Otherwise, VM_KASAN is set for kasan_module_alloc() allocations and used to
|
|
* determine which allocations need the module shadow freed.
|
|
*/
|
|
|
|
/*
|
|
* Memory with VM_FLUSH_RESET_PERMS cannot be freed in an interrupt or with
|
|
* vfree_atomic().
|
|
*/
|
|
#define VM_FLUSH_RESET_PERMS 0x00000100 /* Reset direct map and flush TLB on unmap */
|
|
|
|
/* bits [20..32] reserved for arch specific ioremap internals */
|
|
|
|
/*
|
|
* Maximum alignment for ioremap() regions.
|
|
* Can be overriden by arch-specific value.
|
|
*/
|
|
#ifndef IOREMAP_MAX_ORDER
|
|
#define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */
|
|
#endif
|
|
|
|
struct vm_struct {
|
|
struct vm_struct *next;
|
|
void *addr;
|
|
unsigned long size;
|
|
unsigned long flags;
|
|
struct page **pages;
|
|
unsigned int nr_pages;
|
|
phys_addr_t phys_addr;
|
|
const void *caller;
|
|
};
|
|
|
|
struct vmap_area {
|
|
unsigned long va_start;
|
|
unsigned long va_end;
|
|
|
|
struct rb_node rb_node; /* address sorted rbtree */
|
|
struct list_head list; /* address sorted list */
|
|
|
|
/*
|
|
* The following three variables can be packed, because
|
|
* a vmap_area object is always one of the three states:
|
|
* 1) in "free" tree (root is vmap_area_root)
|
|
* 2) in "busy" tree (root is free_vmap_area_root)
|
|
* 3) in purge list (head is vmap_purge_list)
|
|
*/
|
|
union {
|
|
unsigned long subtree_max_size; /* in "free" tree */
|
|
struct vm_struct *vm; /* in "busy" tree */
|
|
struct llist_node purge_list; /* in purge list */
|
|
};
|
|
};
|
|
|
|
/*
|
|
* Highlevel APIs for driver use
|
|
*/
|
|
extern void vm_unmap_ram(const void *mem, unsigned int count);
|
|
extern void *vm_map_ram(struct page **pages, unsigned int count,
|
|
int node, pgprot_t prot);
|
|
extern void vm_unmap_aliases(void);
|
|
|
|
#ifdef CONFIG_MMU
|
|
extern void __init vmalloc_init(void);
|
|
extern unsigned long vmalloc_nr_pages(void);
|
|
#else
|
|
static inline void vmalloc_init(void)
|
|
{
|
|
}
|
|
static inline unsigned long vmalloc_nr_pages(void) { return 0; }
|
|
#endif
|
|
|
|
extern void *vmalloc(unsigned long size);
|
|
extern void *vzalloc(unsigned long size);
|
|
extern void *vmalloc_user(unsigned long size);
|
|
extern void *vmalloc_node(unsigned long size, int node);
|
|
extern void *vzalloc_node(unsigned long size, int node);
|
|
extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags);
|
|
extern void *vmalloc_exec(unsigned long size);
|
|
extern void *vmalloc_32(unsigned long size);
|
|
extern void *vmalloc_32_user(unsigned long size);
|
|
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
|
|
extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
|
|
unsigned long start, unsigned long end, gfp_t gfp_mask,
|
|
pgprot_t prot, unsigned long vm_flags, int node,
|
|
const void *caller);
|
|
#ifndef CONFIG_MMU
|
|
extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags);
|
|
static inline void *__vmalloc_node_flags_caller(unsigned long size, int node,
|
|
gfp_t flags, void *caller)
|
|
{
|
|
return __vmalloc_node_flags(size, node, flags);
|
|
}
|
|
#else
|
|
extern void *__vmalloc_node_flags_caller(unsigned long size,
|
|
int node, gfp_t flags, void *caller);
|
|
#endif
|
|
|
|
extern void vfree(const void *addr);
|
|
extern void vfree_atomic(const void *addr);
|
|
|
|
extern void *vmap(struct page **pages, unsigned int count,
|
|
unsigned long flags, pgprot_t prot);
|
|
extern void vunmap(const void *addr);
|
|
|
|
extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
|
|
unsigned long uaddr, void *kaddr,
|
|
unsigned long size);
|
|
|
|
extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
|
|
unsigned long pgoff);
|
|
void vmalloc_sync_mappings(void);
|
|
void vmalloc_sync_unmappings(void);
|
|
|
|
/*
|
|
* Lowlevel-APIs (not for driver use!)
|
|
*/
|
|
|
|
static inline size_t get_vm_area_size(const struct vm_struct *area)
|
|
{
|
|
if (!(area->flags & VM_NO_GUARD))
|
|
/* return actual size without guard page */
|
|
return area->size - PAGE_SIZE;
|
|
else
|
|
return area->size;
|
|
|
|
}
|
|
|
|
extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
|
|
extern struct vm_struct *get_vm_area_caller(unsigned long size,
|
|
unsigned long flags, const void *caller);
|
|
extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
|
|
unsigned long start, unsigned long end);
|
|
extern struct vm_struct *__get_vm_area_caller(unsigned long size,
|
|
unsigned long flags,
|
|
unsigned long start, unsigned long end,
|
|
const void *caller);
|
|
extern struct vm_struct *remove_vm_area(const void *addr);
|
|
extern struct vm_struct *find_vm_area(const void *addr);
|
|
|
|
extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
|
|
struct page **pages);
|
|
#ifdef CONFIG_MMU
|
|
extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
|
|
pgprot_t prot, struct page **pages);
|
|
extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size);
|
|
extern void unmap_kernel_range(unsigned long addr, unsigned long size);
|
|
static inline void set_vm_flush_reset_perms(void *addr)
|
|
{
|
|
struct vm_struct *vm = find_vm_area(addr);
|
|
|
|
if (vm)
|
|
vm->flags |= VM_FLUSH_RESET_PERMS;
|
|
}
|
|
#else
|
|
static inline int
|
|
map_kernel_range_noflush(unsigned long start, unsigned long size,
|
|
pgprot_t prot, struct page **pages)
|
|
{
|
|
return size >> PAGE_SHIFT;
|
|
}
|
|
static inline void
|
|
unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
|
|
{
|
|
}
|
|
static inline void
|
|
unmap_kernel_range(unsigned long addr, unsigned long size)
|
|
{
|
|
}
|
|
static inline void set_vm_flush_reset_perms(void *addr)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
/* Allocate/destroy a 'vmalloc' VM area. */
|
|
extern struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes);
|
|
extern void free_vm_area(struct vm_struct *area);
|
|
|
|
/* for /dev/kmem */
|
|
extern long vread(char *buf, char *addr, unsigned long count);
|
|
extern long vwrite(char *buf, char *addr, unsigned long count);
|
|
|
|
/*
|
|
* Internals. Dont't use..
|
|
*/
|
|
extern struct list_head vmap_area_list;
|
|
extern __init void vm_area_add_early(struct vm_struct *vm);
|
|
extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
|
|
|
|
#ifdef CONFIG_SMP
|
|
# ifdef CONFIG_MMU
|
|
struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
|
|
const size_t *sizes, int nr_vms,
|
|
size_t align);
|
|
|
|
void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
|
|
# else
|
|
static inline struct vm_struct **
|
|
pcpu_get_vm_areas(const unsigned long *offsets,
|
|
const size_t *sizes, int nr_vms,
|
|
size_t align)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void
|
|
pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
|
|
{
|
|
}
|
|
# endif
|
|
#endif
|
|
|
|
#ifdef CONFIG_MMU
|
|
#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
|
|
#else
|
|
#define VMALLOC_TOTAL 0UL
|
|
#endif
|
|
|
|
int register_vmap_purge_notifier(struct notifier_block *nb);
|
|
int unregister_vmap_purge_notifier(struct notifier_block *nb);
|
|
|
|
#endif /* _LINUX_VMALLOC_H */
|