Merge branch 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux

Pull SLAB changes from Pekka Enberg:
 "Most of the changes included are from Christoph Lameter's "common
  slab" patch series that unifies common parts of SLUB, SLAB, and SLOB
  allocators.  The unification is needed for Glauber Costa's "kmem
  memcg" work that will hopefully appear for v3.7.

  The rest of the changes are fixes and speedups by various people."

* 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux: (32 commits)
  mm: Fix build warning in kmem_cache_create()
  slob: Fix early boot kernel crash
  mm, slub: ensure irqs are enabled for kmemcheck
  mm, sl[aou]b: Move kmem_cache_create mutex handling to common code
  mm, sl[aou]b: Use a common mutex definition
  mm, sl[aou]b: Common definition for boot state of the slab allocators
  mm, sl[aou]b: Extract common code for kmem_cache_create()
  slub: remove invalid reference to list iterator variable
  mm: Fix signal SIGFPE in slabinfo.c.
  slab: move FULL state transition to an initcall
  slab: Fix a typo in commit 8c138b "slab: Get rid of obj_size macro"
  mm, slab: Build fix for recent kmem_cache changes
  slab: rename gfpflags to allocflags
  slub: refactoring unfreeze_partials()
  slub: use __cmpxchg_double_slab() at interrupt disabled place
  slab/mempolicy: always use local policy from interrupt context
  slab: Get rid of obj_size macro
  mm, sl[aou]b: Extract common fields from struct kmem_cache
  slab: Remove some accessors
  slab: Use page struct fields instead of casting
  ...
This commit is contained in:
Linus Torvalds 2012-07-30 11:32:24 -07:00
commit 720d85075b
13 changed files with 608 additions and 616 deletions

View file

@ -215,7 +215,7 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
extern bool mempolicy_nodemask_intersects(struct task_struct *tsk,
const nodemask_t *mask);
extern unsigned slab_node(struct mempolicy *policy);
extern unsigned slab_node(void);
extern enum zone_type policy_zone;

View file

@ -53,7 +53,7 @@ struct page {
struct {
union {
pgoff_t index; /* Our offset within mapping. */
void *freelist; /* slub first free object */
void *freelist; /* slub/slob first free object */
};
union {
@ -91,11 +91,12 @@ struct page {
*/
atomic_t _mapcount;
struct {
struct { /* SLUB */
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
};
int units; /* SLOB */
};
atomic_t _count; /* Usage count, see below. */
};
@ -117,6 +118,12 @@ struct page {
short int pobjects;
#endif
};
struct list_head list; /* slobs list of pages */
struct { /* slab fields */
struct kmem_cache *slab_cache;
struct slab *slab_page;
};
};
/* Remainder is not double word aligned */

View file

@ -92,6 +92,30 @@
#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \
(unsigned long)ZERO_SIZE_PTR)
/*
* Common fields provided in kmem_cache by all slab allocators
* This struct is either used directly by the allocator (SLOB)
* or the allocator must include definitions for all fields
* provided in kmem_cache_common in their definition of kmem_cache.
*
* Once we can do anonymous structs (C11 standard) we could put a
* anonymous struct definition in these allocators so that the
* separate allocations in the kmem_cache structure of SLAB and
* SLUB is no longer needed.
*/
#ifdef CONFIG_SLOB
struct kmem_cache {
unsigned int object_size;/* The original size of the object */
unsigned int size; /* The aligned/padded/added on size */
unsigned int align; /* Alignment as calculated */
unsigned long flags; /* Active flags on the slab */
const char *name; /* Slab name for sysfs */
int refcount; /* Use counter */
void (*ctor)(void *); /* Called on object slot creation */
struct list_head list; /* List of all slab caches on the system */
};
#endif
/*
* struct kmem_cache related prototypes
*/

View file

@ -27,7 +27,7 @@ struct kmem_cache {
unsigned int limit;
unsigned int shared;
unsigned int buffer_size;
unsigned int size;
u32 reciprocal_buffer_size;
/* 2) touched by every alloc & free from the backend */
@ -39,7 +39,7 @@ struct kmem_cache {
unsigned int gfporder;
/* force GFP flags, e.g. GFP_DMA */
gfp_t gfpflags;
gfp_t allocflags;
size_t colour; /* cache colouring range */
unsigned int colour_off; /* colour offset */
@ -52,7 +52,10 @@ struct kmem_cache {
/* 4) cache creation/removal */
const char *name;
struct list_head next;
struct list_head list;
int refcount;
int object_size;
int align;
/* 5) statistics */
#ifdef CONFIG_DEBUG_SLAB
@ -73,12 +76,11 @@ struct kmem_cache {
/*
* If debugging is enabled, then the allocator can add additional
* fields and/or padding to every object. buffer_size contains the total
* fields and/or padding to every object. size contains the total
* object size including these internal fields, the following two
* variables contain the offset to the user object and its size.
*/
int obj_offset;
int obj_size;
#endif /* CONFIG_DEBUG_SLAB */
/* 6) per-cpu/per-node data, touched during every alloc/free */

View file

@ -48,7 +48,6 @@ struct kmem_cache_cpu {
unsigned long tid; /* Globally unique transaction id */
struct page *page; /* The slab from which we are allocating */
struct page *partial; /* Partially allocated frozen slabs */
int node; /* The node of the page (or -1 for debug) */
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
@ -83,7 +82,7 @@ struct kmem_cache {
unsigned long flags;
unsigned long min_partial;
int size; /* The size of an object including meta data */
int objsize; /* The size of an object without meta data */
int object_size; /* The size of an object without meta data */
int offset; /* Free pointer offset. */
int cpu_partial; /* Number of per cpu partial objects to keep around */
struct kmem_cache_order_objects oo;

View file

@ -16,7 +16,8 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
page_isolation.o mm_init.o mmu_context.o percpu.o \
compaction.o $(mmu-y)
compaction.o slab_common.o $(mmu-y)
obj-y += init-mm.o
ifdef CONFIG_NO_BOOTMEM

View file

@ -1602,8 +1602,14 @@ static unsigned interleave_nodes(struct mempolicy *policy)
* task can change it's policy. The system default policy requires no
* such protection.
*/
unsigned slab_node(struct mempolicy *policy)
unsigned slab_node(void)
{
struct mempolicy *policy;
if (in_interrupt())
return numa_node_id();
policy = current->mempolicy;
if (!policy || policy->flags & MPOL_F_LOCAL)
return numa_node_id();

406
mm/slab.c

File diff suppressed because it is too large Load diff

33
mm/slab.h Normal file
View file

@ -0,0 +1,33 @@
#ifndef MM_SLAB_H
#define MM_SLAB_H
/*
* Internal slab definitions
*/
/*
* State of the slab allocator.
*
* This is used to describe the states of the allocator during bootup.
* Allocators use this to gradually bootstrap themselves. Most allocators
* have the problem that the structures used for managing slab caches are
* allocated from slab caches themselves.
*/
enum slab_state {
DOWN, /* No slab functionality yet */
PARTIAL, /* SLUB: kmem_cache_node available */
PARTIAL_ARRAYCACHE, /* SLAB: kmalloc size for arraycache available */
PARTIAL_L3, /* SLAB: kmalloc size for l3 struct available */
UP, /* Slab caches usable but not all extras yet */
FULL /* Everything is working */
};
extern enum slab_state slab_state;
/* The slab cache mutex protects the management structures during changes */
extern struct mutex slab_mutex;
extern struct list_head slab_caches;
struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
size_t align, unsigned long flags, void (*ctor)(void *));
#endif

120
mm/slab_common.c Normal file
View file

@ -0,0 +1,120 @@
/*
* Slab allocator functions that are independent of the allocator strategy
*
* (C) 2012 Christoph Lameter <cl@linux.com>
*/
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/poison.h>
#include <linux/interrupt.h>
#include <linux/memory.h>
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/cpu.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
#include "slab.h"
enum slab_state slab_state;
LIST_HEAD(slab_caches);
DEFINE_MUTEX(slab_mutex);
/*
* kmem_cache_create - Create a cache.
* @name: A string which is used in /proc/slabinfo to identify this cache.
* @size: The size of objects to be created in this cache.
* @align: The required alignment for the objects.
* @flags: SLAB flags
* @ctor: A constructor for the objects.
*
* Returns a ptr to the cache on success, NULL on failure.
* Cannot be called within a interrupt, but can be interrupted.
* The @ctor is run when new pages are allocated by the cache.
*
* The flags are
*
* %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
* to catch references to uninitialised memory.
*
* %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
* for buffer overruns.
*
* %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
* cacheline. This can be beneficial if you're counting cycles as closely
* as davem.
*/
struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align,
unsigned long flags, void (*ctor)(void *))
{
struct kmem_cache *s = NULL;
#ifdef CONFIG_DEBUG_VM
if (!name || in_interrupt() || size < sizeof(void *) ||
size > KMALLOC_MAX_SIZE) {
printk(KERN_ERR "kmem_cache_create(%s) integrity check"
" failed\n", name);
goto out;
}
#endif
get_online_cpus();
mutex_lock(&slab_mutex);
#ifdef CONFIG_DEBUG_VM
list_for_each_entry(s, &slab_caches, list) {
char tmp;
int res;
/*
* This happens when the module gets unloaded and doesn't
* destroy its slab cache and no-one else reuses the vmalloc
* area of the module. Print a warning.
*/
res = probe_kernel_address(s->name, tmp);
if (res) {
printk(KERN_ERR
"Slab cache with size %d has lost its name\n",
s->object_size);
continue;
}
if (!strcmp(s->name, name)) {
printk(KERN_ERR "kmem_cache_create(%s): Cache name"
" already exists.\n",
name);
dump_stack();
s = NULL;
goto oops;
}
}
WARN_ON(strchr(name, ' ')); /* It confuses parsers */
#endif
s = __kmem_cache_create(name, size, align, flags, ctor);
#ifdef CONFIG_DEBUG_VM
oops:
#endif
mutex_unlock(&slab_mutex);
put_online_cpus();
#ifdef CONFIG_DEBUG_VM
out:
#endif
if (!s && (flags & SLAB_PANIC))
panic("kmem_cache_create: Failed to create slab '%s'\n", name);
return s;
}
EXPORT_SYMBOL(kmem_cache_create);
int slab_is_available(void)
{
return slab_state >= UP;
}

152
mm/slob.c
View file

@ -59,6 +59,8 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include "slab.h"
#include <linux/mm.h>
#include <linux/swap.h> /* struct reclaim_state */
#include <linux/cache.h>
@ -91,36 +93,6 @@ struct slob_block {
};
typedef struct slob_block slob_t;
/*
* We use struct page fields to manage some slob allocation aspects,
* however to avoid the horrible mess in include/linux/mm_types.h, we'll
* just define our own struct page type variant here.
*/
struct slob_page {
union {
struct {
unsigned long flags; /* mandatory */
atomic_t _count; /* mandatory */
slobidx_t units; /* free units left in page */
unsigned long pad[2];
slob_t *free; /* first free slob_t in page */
struct list_head list; /* linked list of free pages */
};
struct page page;
};
};
static inline void struct_slob_page_wrong_size(void)
{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); }
/*
* free_slob_page: call before a slob_page is returned to the page allocator.
*/
static inline void free_slob_page(struct slob_page *sp)
{
reset_page_mapcount(&sp->page);
sp->page.mapping = NULL;
}
/*
* All partially free slob pages go on these lists.
*/
@ -130,47 +102,24 @@ static LIST_HEAD(free_slob_small);
static LIST_HEAD(free_slob_medium);
static LIST_HEAD(free_slob_large);
/*
* is_slob_page: True for all slob pages (false for bigblock pages)
*/
static inline int is_slob_page(struct slob_page *sp)
{
return PageSlab((struct page *)sp);
}
static inline void set_slob_page(struct slob_page *sp)
{
__SetPageSlab((struct page *)sp);
}
static inline void clear_slob_page(struct slob_page *sp)
{
__ClearPageSlab((struct page *)sp);
}
static inline struct slob_page *slob_page(const void *addr)
{
return (struct slob_page *)virt_to_page(addr);
}
/*
* slob_page_free: true for pages on free_slob_pages list.
*/
static inline int slob_page_free(struct slob_page *sp)
static inline int slob_page_free(struct page *sp)
{
return PageSlobFree((struct page *)sp);
return PageSlobFree(sp);
}
static void set_slob_page_free(struct slob_page *sp, struct list_head *list)
static void set_slob_page_free(struct page *sp, struct list_head *list)
{
list_add(&sp->list, list);
__SetPageSlobFree((struct page *)sp);
__SetPageSlobFree(sp);
}
static inline void clear_slob_page_free(struct slob_page *sp)
static inline void clear_slob_page_free(struct page *sp)
{
list_del(&sp->list);
__ClearPageSlobFree((struct page *)sp);
__ClearPageSlobFree(sp);
}
#define SLOB_UNIT sizeof(slob_t)
@ -267,12 +216,12 @@ static void slob_free_pages(void *b, int order)
/*
* Allocate a slob block within a given slob_page sp.
*/
static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
static void *slob_page_alloc(struct page *sp, size_t size, int align)
{
slob_t *prev, *cur, *aligned = NULL;
int delta = 0, units = SLOB_UNITS(size);
for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) {
for (prev = NULL, cur = sp->freelist; ; prev = cur, cur = slob_next(cur)) {
slobidx_t avail = slob_units(cur);
if (align) {
@ -296,12 +245,12 @@ static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
if (prev)
set_slob(prev, slob_units(prev), next);
else
sp->free = next;
sp->freelist = next;
} else { /* fragment */
if (prev)
set_slob(prev, slob_units(prev), cur + units);
else
sp->free = cur + units;
sp->freelist = cur + units;
set_slob(cur + units, avail - units, next);
}
@ -320,7 +269,7 @@ static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
*/
static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
{
struct slob_page *sp;
struct page *sp;
struct list_head *prev;
struct list_head *slob_list;
slob_t *b = NULL;
@ -341,7 +290,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
* If there's a node specification, search for a partial
* page with a matching node id in the freelist.
*/
if (node != -1 && page_to_nid(&sp->page) != node)
if (node != -1 && page_to_nid(sp) != node)
continue;
#endif
/* Enough room on this page? */
@ -369,12 +318,12 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node);
if (!b)
return NULL;
sp = slob_page(b);
set_slob_page(sp);
sp = virt_to_page(b);
__SetPageSlab(sp);
spin_lock_irqsave(&slob_lock, flags);
sp->units = SLOB_UNITS(PAGE_SIZE);
sp->free = b;
sp->freelist = b;
INIT_LIST_HEAD(&sp->list);
set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
set_slob_page_free(sp, slob_list);
@ -392,7 +341,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
*/
static void slob_free(void *block, int size)
{
struct slob_page *sp;
struct page *sp;
slob_t *prev, *next, *b = (slob_t *)block;
slobidx_t units;
unsigned long flags;
@ -402,7 +351,7 @@ static void slob_free(void *block, int size)
return;
BUG_ON(!size);
sp = slob_page(block);
sp = virt_to_page(block);
units = SLOB_UNITS(size);
spin_lock_irqsave(&slob_lock, flags);
@ -412,8 +361,8 @@ static void slob_free(void *block, int size)
if (slob_page_free(sp))
clear_slob_page_free(sp);
spin_unlock_irqrestore(&slob_lock, flags);
clear_slob_page(sp);
free_slob_page(sp);
__ClearPageSlab(sp);
reset_page_mapcount(sp);
slob_free_pages(b, 0);
return;
}
@ -421,7 +370,7 @@ static void slob_free(void *block, int size)
if (!slob_page_free(sp)) {
/* This slob page is about to become partially free. Easy! */
sp->units = units;
sp->free = b;
sp->freelist = b;
set_slob(b, units,
(void *)((unsigned long)(b +
SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
@ -441,15 +390,15 @@ static void slob_free(void *block, int size)
*/
sp->units += units;
if (b < sp->free) {
if (b + units == sp->free) {
units += slob_units(sp->free);
sp->free = slob_next(sp->free);
if (b < (slob_t *)sp->freelist) {
if (b + units == sp->freelist) {
units += slob_units(sp->freelist);
sp->freelist = slob_next(sp->freelist);
}
set_slob(b, units, sp->free);
sp->free = b;
set_slob(b, units, sp->freelist);
sp->freelist = b;
} else {
prev = sp->free;
prev = sp->freelist;
next = slob_next(prev);
while (b > next) {
prev = next;
@ -522,7 +471,7 @@ EXPORT_SYMBOL(__kmalloc_node);
void kfree(const void *block)
{
struct slob_page *sp;
struct page *sp;
trace_kfree(_RET_IP_, block);
@ -530,43 +479,36 @@ void kfree(const void *block)
return;
kmemleak_free(block);
sp = slob_page(block);
if (is_slob_page(sp)) {
sp = virt_to_page(block);
if (PageSlab(sp)) {
int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
unsigned int *m = (unsigned int *)(block - align);
slob_free(m, *m + align);
} else
put_page(&sp->page);
put_page(sp);
}
EXPORT_SYMBOL(kfree);
/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
size_t ksize(const void *block)
{
struct slob_page *sp;
struct page *sp;
BUG_ON(!block);
if (unlikely(block == ZERO_SIZE_PTR))
return 0;
sp = slob_page(block);
if (is_slob_page(sp)) {
sp = virt_to_page(block);
if (PageSlab(sp)) {
int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
unsigned int *m = (unsigned int *)(block - align);
return SLOB_UNITS(*m) * SLOB_UNIT;
} else
return sp->page.private;
return sp->private;
}
EXPORT_SYMBOL(ksize);
struct kmem_cache {
unsigned int size, align;
unsigned long flags;
const char *name;
void (*ctor)(void *);
};
struct kmem_cache *kmem_cache_create(const char *name, size_t size,
struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
size_t align, unsigned long flags, void (*ctor)(void *))
{
struct kmem_cache *c;
@ -589,13 +531,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
c->align = ARCH_SLAB_MINALIGN;
if (c->align < align)
c->align = align;
} else if (flags & SLAB_PANIC)
panic("Cannot create slab cache %s\n", name);
kmemleak_alloc(c, sizeof(struct kmem_cache), 1, GFP_KERNEL);
kmemleak_alloc(c, sizeof(struct kmem_cache), 1, GFP_KERNEL);
c->refcount = 1;
}
return c;
}
EXPORT_SYMBOL(kmem_cache_create);
void kmem_cache_destroy(struct kmem_cache *c)
{
@ -678,19 +619,12 @@ int kmem_cache_shrink(struct kmem_cache *d)
}
EXPORT_SYMBOL(kmem_cache_shrink);
static unsigned int slob_ready __read_mostly;
int slab_is_available(void)
{
return slob_ready;
}
void __init kmem_cache_init(void)
{
slob_ready = 1;
slab_state = UP;
}
void __init kmem_cache_init_late(void)
{
/* Nothing to do */
slab_state = FULL;
}

436
mm/slub.c

File diff suppressed because it is too large Load diff

View file

@ -437,34 +437,34 @@ static void slab_stats(struct slabinfo *s)
printf("Fastpath %8lu %8lu %3lu %3lu\n",
s->alloc_fastpath, s->free_fastpath,
s->alloc_fastpath * 100 / total_alloc,
s->free_fastpath * 100 / total_free);
total_free ? s->free_fastpath * 100 / total_free : 0);
printf("Slowpath %8lu %8lu %3lu %3lu\n",
total_alloc - s->alloc_fastpath, s->free_slowpath,
(total_alloc - s->alloc_fastpath) * 100 / total_alloc,
s->free_slowpath * 100 / total_free);
total_free ? s->free_slowpath * 100 / total_free : 0);
printf("Page Alloc %8lu %8lu %3lu %3lu\n",
s->alloc_slab, s->free_slab,
s->alloc_slab * 100 / total_alloc,
s->free_slab * 100 / total_free);
total_free ? s->free_slab * 100 / total_free : 0);
printf("Add partial %8lu %8lu %3lu %3lu\n",
s->deactivate_to_head + s->deactivate_to_tail,
s->free_add_partial,
(s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc,
s->free_add_partial * 100 / total_free);
total_free ? s->free_add_partial * 100 / total_free : 0);
printf("Remove partial %8lu %8lu %3lu %3lu\n",
s->alloc_from_partial, s->free_remove_partial,
s->alloc_from_partial * 100 / total_alloc,
s->free_remove_partial * 100 / total_free);
total_free ? s->free_remove_partial * 100 / total_free : 0);
printf("Cpu partial list %8lu %8lu %3lu %3lu\n",
s->cpu_partial_alloc, s->cpu_partial_free,
s->cpu_partial_alloc * 100 / total_alloc,
s->cpu_partial_free * 100 / total_free);
total_free ? s->cpu_partial_free * 100 / total_free : 0);
printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n",
s->deactivate_remote_frees, s->free_frozen,
s->deactivate_remote_frees * 100 / total_alloc,
s->free_frozen * 100 / total_free);
total_free ? s->free_frozen * 100 / total_free : 0);
printf("Total %8lu %8lu\n\n", total_alloc, total_free);