mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-11-01 00:48:50 +00:00
ksm: the mm interface to ksm
This patch presents the mm interface to a dummy version of ksm.c, for better scrutiny of that interface: the real ksm.c follows later. When CONFIG_KSM is not set, madvise(2) reject MADV_MERGEABLE and MADV_UNMERGEABLE with EINVAL, since that seems more helpful than pretending that they can be serviced. But when CONFIG_KSM=y, accept them even if KSM is not currently running, and even on areas which KSM will not touch (e.g. hugetlb or shared file or special driver mappings). Like other madvices, report ENOMEM despite success if any area in the range is unmapped, and use EAGAIN to report out of memory. Define vma flag VM_MERGEABLE to identify an area on which KSM may try merging pages: leave it to ksm_madvise() to decide whether to set it. Define mm flag MMF_VM_MERGEABLE to identify an mm which might contain VM_MERGEABLE areas, to minimize callouts when forking or exiting. Based upon earlier patches by Chris Wright and Izik Eidus. Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Signed-off-by: Chris Wright <chrisw@redhat.com> Signed-off-by: Izik Eidus <ieidus@redhat.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Avi Kivity <avi@redhat.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
d19f352484
commit
f8af4da3b4
8 changed files with 147 additions and 1 deletions
50
include/linux/ksm.h
Normal file
50
include/linux/ksm.h
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
#ifndef __LINUX_KSM_H
|
||||||
|
#define __LINUX_KSM_H
|
||||||
|
/*
|
||||||
|
* Memory merging support.
|
||||||
|
*
|
||||||
|
* This code enables dynamic sharing of identical pages found in different
|
||||||
|
* memory areas, even if they are not shared by fork().
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/bitops.h>
|
||||||
|
#include <linux/mm.h>
|
||||||
|
#include <linux/sched.h>
|
||||||
|
|
||||||
|
#ifdef CONFIG_KSM
|
||||||
|
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
|
||||||
|
unsigned long end, int advice, unsigned long *vm_flags);
|
||||||
|
int __ksm_enter(struct mm_struct *mm);
|
||||||
|
void __ksm_exit(struct mm_struct *mm);
|
||||||
|
|
||||||
|
static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
|
||||||
|
{
|
||||||
|
if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
|
||||||
|
return __ksm_enter(mm);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ksm_exit(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
|
||||||
|
__ksm_exit(mm);
|
||||||
|
}
|
||||||
|
#else /* !CONFIG_KSM */
|
||||||
|
|
||||||
|
static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
|
||||||
|
unsigned long end, int advice, unsigned long *vm_flags)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ksm_exit(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif /* !CONFIG_KSM */
|
||||||
|
|
||||||
|
#endif
|
|
@ -103,6 +103,7 @@ extern unsigned int kobjsize(const void *objp);
|
||||||
#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
|
#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
|
||||||
#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */
|
#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */
|
||||||
#define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */
|
#define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */
|
||||||
|
#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */
|
||||||
|
|
||||||
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
|
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
|
||||||
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
|
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
|
||||||
|
|
|
@ -434,7 +434,9 @@ extern int get_dumpable(struct mm_struct *mm);
|
||||||
/* dumpable bits */
|
/* dumpable bits */
|
||||||
#define MMF_DUMPABLE 0 /* core dump is permitted */
|
#define MMF_DUMPABLE 0 /* core dump is permitted */
|
||||||
#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */
|
#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */
|
||||||
|
|
||||||
#define MMF_DUMPABLE_BITS 2
|
#define MMF_DUMPABLE_BITS 2
|
||||||
|
#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
|
||||||
|
|
||||||
/* coredump filter bits */
|
/* coredump filter bits */
|
||||||
#define MMF_DUMP_ANON_PRIVATE 2
|
#define MMF_DUMP_ANON_PRIVATE 2
|
||||||
|
@ -444,6 +446,7 @@ extern int get_dumpable(struct mm_struct *mm);
|
||||||
#define MMF_DUMP_ELF_HEADERS 6
|
#define MMF_DUMP_ELF_HEADERS 6
|
||||||
#define MMF_DUMP_HUGETLB_PRIVATE 7
|
#define MMF_DUMP_HUGETLB_PRIVATE 7
|
||||||
#define MMF_DUMP_HUGETLB_SHARED 8
|
#define MMF_DUMP_HUGETLB_SHARED 8
|
||||||
|
|
||||||
#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
|
#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
|
||||||
#define MMF_DUMP_FILTER_BITS 7
|
#define MMF_DUMP_FILTER_BITS 7
|
||||||
#define MMF_DUMP_FILTER_MASK \
|
#define MMF_DUMP_FILTER_MASK \
|
||||||
|
@ -457,6 +460,10 @@ extern int get_dumpable(struct mm_struct *mm);
|
||||||
#else
|
#else
|
||||||
# define MMF_DUMP_MASK_DEFAULT_ELF 0
|
# define MMF_DUMP_MASK_DEFAULT_ELF 0
|
||||||
#endif
|
#endif
|
||||||
|
/* leave room for more dump flags */
|
||||||
|
#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
|
||||||
|
|
||||||
|
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
|
||||||
|
|
||||||
struct sighand_struct {
|
struct sighand_struct {
|
||||||
atomic_t count;
|
atomic_t count;
|
||||||
|
|
|
@ -49,6 +49,7 @@
|
||||||
#include <linux/ftrace.h>
|
#include <linux/ftrace.h>
|
||||||
#include <linux/profile.h>
|
#include <linux/profile.h>
|
||||||
#include <linux/rmap.h>
|
#include <linux/rmap.h>
|
||||||
|
#include <linux/ksm.h>
|
||||||
#include <linux/acct.h>
|
#include <linux/acct.h>
|
||||||
#include <linux/tsacct_kern.h>
|
#include <linux/tsacct_kern.h>
|
||||||
#include <linux/cn_proc.h>
|
#include <linux/cn_proc.h>
|
||||||
|
@ -299,6 +300,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
|
||||||
rb_link = &mm->mm_rb.rb_node;
|
rb_link = &mm->mm_rb.rb_node;
|
||||||
rb_parent = NULL;
|
rb_parent = NULL;
|
||||||
pprev = &mm->mmap;
|
pprev = &mm->mmap;
|
||||||
|
retval = ksm_fork(mm, oldmm);
|
||||||
|
if (retval)
|
||||||
|
goto out;
|
||||||
|
|
||||||
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
|
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
|
||||||
struct file *file;
|
struct file *file;
|
||||||
|
@ -435,7 +439,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
|
||||||
atomic_set(&mm->mm_count, 1);
|
atomic_set(&mm->mm_count, 1);
|
||||||
init_rwsem(&mm->mmap_sem);
|
init_rwsem(&mm->mmap_sem);
|
||||||
INIT_LIST_HEAD(&mm->mmlist);
|
INIT_LIST_HEAD(&mm->mmlist);
|
||||||
mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
|
mm->flags = (current->mm) ?
|
||||||
|
(current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
|
||||||
mm->core_state = NULL;
|
mm->core_state = NULL;
|
||||||
mm->nr_ptes = 0;
|
mm->nr_ptes = 0;
|
||||||
set_mm_counter(mm, file_rss, 0);
|
set_mm_counter(mm, file_rss, 0);
|
||||||
|
@ -496,6 +501,7 @@ void mmput(struct mm_struct *mm)
|
||||||
|
|
||||||
if (atomic_dec_and_test(&mm->mm_users)) {
|
if (atomic_dec_and_test(&mm->mm_users)) {
|
||||||
exit_aio(mm);
|
exit_aio(mm);
|
||||||
|
ksm_exit(mm);
|
||||||
exit_mmap(mm);
|
exit_mmap(mm);
|
||||||
set_mm_exe_file(mm, NULL);
|
set_mm_exe_file(mm, NULL);
|
||||||
if (!list_empty(&mm->mmlist)) {
|
if (!list_empty(&mm->mmlist)) {
|
||||||
|
|
11
mm/Kconfig
11
mm/Kconfig
|
@ -214,6 +214,17 @@ config HAVE_MLOCKED_PAGE_BIT
|
||||||
config MMU_NOTIFIER
|
config MMU_NOTIFIER
|
||||||
bool
|
bool
|
||||||
|
|
||||||
|
config KSM
|
||||||
|
bool "Enable KSM for page merging"
|
||||||
|
depends on MMU
|
||||||
|
help
|
||||||
|
Enable Kernel Samepage Merging: KSM periodically scans those areas
|
||||||
|
of an application's address space that an app has advised may be
|
||||||
|
mergeable. When it finds pages of identical content, it replaces
|
||||||
|
the many instances by a single resident page with that content, so
|
||||||
|
saving memory until one or another app needs to modify the content.
|
||||||
|
Recommended for use with KVM, or with other duplicative applications.
|
||||||
|
|
||||||
config DEFAULT_MMAP_MIN_ADDR
|
config DEFAULT_MMAP_MIN_ADDR
|
||||||
int "Low address space to protect from user allocation"
|
int "Low address space to protect from user allocation"
|
||||||
default 4096
|
default 4096
|
||||||
|
|
|
@ -25,6 +25,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
|
||||||
obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
|
obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
|
||||||
obj-$(CONFIG_SLOB) += slob.o
|
obj-$(CONFIG_SLOB) += slob.o
|
||||||
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
|
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
|
||||||
|
obj-$(CONFIG_KSM) += ksm.o
|
||||||
obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
|
obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
|
||||||
obj-$(CONFIG_SLAB) += slab.o
|
obj-$(CONFIG_SLAB) += slab.o
|
||||||
obj-$(CONFIG_SLUB) += slub.o
|
obj-$(CONFIG_SLUB) += slub.o
|
||||||
|
|
56
mm/ksm.c
Normal file
56
mm/ksm.c
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
/*
|
||||||
|
* Initial dummy version just to illustrate KSM's interface to other files.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/errno.h>
|
||||||
|
#include <linux/mman.h>
|
||||||
|
#include <linux/ksm.h>
|
||||||
|
|
||||||
|
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
|
||||||
|
unsigned long end, int advice, unsigned long *vm_flags)
|
||||||
|
{
|
||||||
|
struct mm_struct *mm = vma->vm_mm;
|
||||||
|
|
||||||
|
switch (advice) {
|
||||||
|
case MADV_MERGEABLE:
|
||||||
|
/*
|
||||||
|
* Be somewhat over-protective for now!
|
||||||
|
*/
|
||||||
|
if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
|
||||||
|
VM_PFNMAP | VM_IO | VM_DONTEXPAND |
|
||||||
|
VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
|
||||||
|
VM_MIXEDMAP | VM_SAO))
|
||||||
|
return 0; /* just ignore the advice */
|
||||||
|
|
||||||
|
if (!test_bit(MMF_VM_MERGEABLE, &mm->flags))
|
||||||
|
if (__ksm_enter(mm) < 0)
|
||||||
|
return -EAGAIN;
|
||||||
|
|
||||||
|
*vm_flags |= VM_MERGEABLE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case MADV_UNMERGEABLE:
|
||||||
|
if (!(*vm_flags & VM_MERGEABLE))
|
||||||
|
return 0; /* just ignore the advice */
|
||||||
|
|
||||||
|
/* Unmerge any merged pages here */
|
||||||
|
|
||||||
|
*vm_flags &= ~VM_MERGEABLE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int __ksm_enter(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
/* Allocate a structure to track mm and link it into KSM's list */
|
||||||
|
set_bit(MMF_VM_MERGEABLE, &mm->flags);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void __ksm_exit(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
/* Unlink and free all KSM's structures which track this mm */
|
||||||
|
clear_bit(MMF_VM_MERGEABLE, &mm->flags);
|
||||||
|
}
|
14
mm/madvise.c
14
mm/madvise.c
|
@ -11,6 +11,7 @@
|
||||||
#include <linux/mempolicy.h>
|
#include <linux/mempolicy.h>
|
||||||
#include <linux/hugetlb.h>
|
#include <linux/hugetlb.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
|
#include <linux/ksm.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Any behaviour which results in changes to the vma->vm_flags needs to
|
* Any behaviour which results in changes to the vma->vm_flags needs to
|
||||||
|
@ -63,6 +64,12 @@ static long madvise_behavior(struct vm_area_struct * vma,
|
||||||
}
|
}
|
||||||
new_flags &= ~VM_DONTCOPY;
|
new_flags &= ~VM_DONTCOPY;
|
||||||
break;
|
break;
|
||||||
|
case MADV_MERGEABLE:
|
||||||
|
case MADV_UNMERGEABLE:
|
||||||
|
error = ksm_madvise(vma, start, end, behavior, &new_flags);
|
||||||
|
if (error)
|
||||||
|
goto out;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_flags == vma->vm_flags) {
|
if (new_flags == vma->vm_flags) {
|
||||||
|
@ -239,6 +246,10 @@ madvise_behavior_valid(int behavior)
|
||||||
case MADV_REMOVE:
|
case MADV_REMOVE:
|
||||||
case MADV_WILLNEED:
|
case MADV_WILLNEED:
|
||||||
case MADV_DONTNEED:
|
case MADV_DONTNEED:
|
||||||
|
#ifdef CONFIG_KSM
|
||||||
|
case MADV_MERGEABLE:
|
||||||
|
case MADV_UNMERGEABLE:
|
||||||
|
#endif
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -273,6 +284,9 @@ madvise_behavior_valid(int behavior)
|
||||||
* MADV_DONTFORK - omit this area from child's address space when forking:
|
* MADV_DONTFORK - omit this area from child's address space when forking:
|
||||||
* typically, to avoid COWing pages pinned by get_user_pages().
|
* typically, to avoid COWing pages pinned by get_user_pages().
|
||||||
* MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
|
* MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
|
||||||
|
* MADV_MERGEABLE - the application recommends that KSM try to merge pages in
|
||||||
|
* this area with pages of identical content from other such areas.
|
||||||
|
* MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others.
|
||||||
*
|
*
|
||||||
* return values:
|
* return values:
|
||||||
* zero - success
|
* zero - success
|
||||||
|
|
Loading…
Reference in a new issue