slab changes for 6.4

-----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEe7vIQRWZI0iWSE3xu+CwddJFiJoFAmRCSGEACgkQu+CwddJF
 iJpA2wgAkwMP++Znd8JU3iQ4N53lv18euNuEMLTOY+jk7zXHvsRX8KyzLmsohUKO
 SSGVi1Om785AidOsJhARJawW7AWYuJ5l7ri+FyskTwrTUcMC4UZ/IT2tB22lRsXi
 0f3lgbdArZbj7aq7AVO9N7bh9rgVUHa/RHIwXzMp0sc9nekne9t+FFv7tyRnr7cc
 SMp/FdMZqbt9pVf0Uwud1BpdgER7QqQaSfaxITL7D2oJTePRZVWiXerrr4hMcQl1
 s6kgUgKdlaYmIx2N8eP1Nmp7undtwHo1C8dLLWKGCEuEAaXIxtXUtaUWFFmBDzH9
 Fv6qswNFcfwiLNPsY+xi9iA+vlGKAg==
 =T0EM
 -----END PGP SIGNATURE-----

Merge tag 'slab-for-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull slab updates from Vlastimil Babka:
 "The main change is naturally the SLOB removal. Since its deprecation
  in 6.2 I've seen no complaints so hopefully SLUB_(TINY) works well for
  everyone and we can proceed.

  Besides the code cleanup, the main immediate benefit will be allowing
  kfree() family of function to work on kmem_cache_alloc() objects,
  which was incompatible with SLOB. This includes kfree_rcu() which had
  no kmem_cache_free_rcu() counterpart yet and now it shouldn't be
  necessary anymore.

  Besides that, there are several small code and comment improvements
  from Thomas, Thorsten and Vernon"

* tag 'slab-for-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
  mm/slab: document kfree() as allowed for kmem_cache_alloc() objects
  mm/slob: remove slob.c
  mm/slab: remove CONFIG_SLOB code from slab common code
  mm, pagemap: remove SLOB and SLQB from comments and documentation
  mm, page_flags: remove PG_slob_free
  mm/slob: remove CONFIG_SLOB
  mm/slub: fix help comment of SLUB_DEBUG
  mm: slub: make kobj_type structure constant
  slab: Adjust comment after refactoring of gfp.h
This commit is contained in:
Linus Torvalds 2023-04-25 13:00:41 -07:00
commit 736b378b29
16 changed files with 32 additions and 917 deletions

View File

@ -91,9 +91,9 @@ Short descriptions to the page flags
The page is being locked for exclusive access, e.g. by undergoing read/write
IO.
7 - SLAB
The page is managed by the SLAB/SLOB/SLUB/SLQB kernel memory allocator.
When compound page is used, SLUB/SLQB will only set this flag on the head
page; SLOB will not flag it at all.
The page is managed by the SLAB/SLUB kernel memory allocator.
When compound page is used, either will only set this flag on the head
page.
10 - BUDDY
A free memory block managed by the buddy system allocator.
The buddy system organizes free memory in blocks of various orders.

View File

@ -170,7 +170,16 @@ should be used if a part of the cache might be copied to the userspace.
After the cache is created kmem_cache_alloc() and its convenience
wrappers can allocate memory from that cache.
When the allocated memory is no longer needed it must be freed. You can
use kvfree() for the memory allocated with `kmalloc`, `vmalloc` and
`kvmalloc`. The slab caches should be freed with kmem_cache_free(). And
don't forget to destroy the cache with kmem_cache_destroy().
When the allocated memory is no longer needed it must be freed.
Objects allocated by `kmalloc` can be freed by `kfree` or `kvfree`. Objects
allocated by `kmem_cache_alloc` can be freed with `kmem_cache_free`, `kfree`
or `kvfree`, where the latter two might be more convenient thanks to not
needing the kmem_cache pointer.
The same rules apply to _bulk and _rcu flavors of freeing functions.
Memory allocated by `vmalloc` can be freed with `vfree` or `kvfree`.
Memory allocated by `kvmalloc` can be freed with `kvfree`.
Caches created by `kmem_cache_create` should be freed with
`kmem_cache_destroy` only after freeing all the allocated objects first.

View File

@ -125,7 +125,7 @@ u64 stable_page_flags(struct page *page)
/*
* pseudo flags for the well known (anonymous) memory mapped pages
*
* Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the
* Note that page->_mapcount is overloaded in SLAB, so the
* simple test in page_mapped() is not enough.
*/
if (!PageSlab(page) && page_mapped(page))
@ -165,9 +165,8 @@ u64 stable_page_flags(struct page *page)
/*
* Caveats on high order pages: page->_refcount will only be set
* -1 on the head page; SLUB/SLQB do the same for PG_slab;
* SLOB won't set PG_slab at all on compound pages.
* Caveats on high order pages: PG_buddy and PG_slab will only be set
* on the head page.
*/
if (PageBuddy(page))
u |= 1 << KPF_BUDDY;
@ -185,7 +184,7 @@ u64 stable_page_flags(struct page *page)
u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
u |= kpf_copy_bit(k, KPF_SLAB, PG_slab);
if (PageTail(page) && PageSlab(compound_head(page)))
if (PageTail(page) && PageSlab(page))
u |= 1 << KPF_SLAB;
u |= kpf_copy_bit(k, KPF_ERROR, PG_error);

View File

@ -174,9 +174,6 @@ enum pageflags {
/* Remapped by swiotlb-xen. */
PG_xen_remapped = PG_owner_priv_1,
/* SLOB */
PG_slob_free = PG_private,
#ifdef CONFIG_MEMORY_FAILURE
/*
* Compound pages. Stored in first tail page's flags.
@ -483,7 +480,6 @@ PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
PAGEFLAG(Workingset, workingset, PF_HEAD)
TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
__PAGEFLAG(Slab, slab, PF_NO_TAIL)
__PAGEFLAG(SlobFree, slob_free, PF_NO_TAIL)
PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */
/* Xen */

View File

@ -976,8 +976,10 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
* either fall back to use of call_rcu() or rearrange the structure to
* position the rcu_head structure into the first 4096 bytes.
*
* Note that the allowable offset might decrease in the future, for example,
* to allow something like kmem_cache_free_rcu().
* The object to be freed can be allocated either by kmalloc() or
* kmem_cache_alloc().
*
* Note that the allowable offset might decrease in the future.
*
* The BUILD_BUG_ON check must not involve any function calls, hence the
* checks are done in macros here.

View File

@ -298,19 +298,6 @@ static inline unsigned int arch_slab_minalign(void)
#endif
#endif
#ifdef CONFIG_SLOB
/*
* SLOB passes all requests larger than one page to the page allocator.
* No kmalloc array is necessary since objects of different sizes can
* be allocated from the same page.
*/
#define KMALLOC_SHIFT_HIGH PAGE_SHIFT
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 3
#endif
#endif
/* Maximum allocatable size */
#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX)
/* Maximum size for which we actually use a slab cache */
@ -366,7 +353,6 @@ enum kmalloc_cache_type {
NR_KMALLOC_TYPES
};
#ifndef CONFIG_SLOB
extern struct kmem_cache *
kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
@ -458,7 +444,6 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
}
static_assert(PAGE_SHIFT <= 20);
#define kmalloc_index(s) __kmalloc_index(s, true)
#endif /* !CONFIG_SLOB */
void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
@ -487,10 +472,6 @@ void kmem_cache_free(struct kmem_cache *s, void *objp);
void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p);
int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p);
/*
* Caller must not use kfree_bulk() on memory not originally allocated
* by kmalloc(), because the SLOB allocator cannot handle this.
*/
static __always_inline void kfree_bulk(size_t size, void **p)
{
kmem_cache_free_bulk(NULL, size, p);
@ -526,7 +507,7 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align
* to be at least to the size.
*
* The @flags argument may be one of the GFP flags defined at
* include/linux/gfp.h and described at
* include/linux/gfp_types.h and described at
* :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>`
*
* The recommended usage of the @flags is described at
@ -567,7 +548,6 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align
* Try really hard to succeed the allocation but fail
* eventually.
*/
#ifndef CONFIG_SLOB
static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size) && size) {
@ -583,17 +563,7 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
}
return __kmalloc(size, flags);
}
#else
static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags);
return __kmalloc(size, flags);
}
#endif
#ifndef CONFIG_SLOB
static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
{
if (__builtin_constant_p(size) && size) {
@ -609,15 +579,6 @@ static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t fla
}
return __kmalloc_node(size, flags, node);
}
#else
static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
{
if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large_node(size, flags, node);
return __kmalloc_node(size, flags, node);
}
#endif
/**
* kmalloc_array - allocate memory for an array.

View File

@ -945,7 +945,7 @@ config MEMCG
config MEMCG_KMEM
bool
depends on MEMCG && !SLOB
depends on MEMCG
default y
config BLK_CGROUP

View File

@ -7,6 +7,5 @@ CONFIG_KERNEL_XZ=y
# CONFIG_KERNEL_LZO is not set
# CONFIG_KERNEL_LZ4 is not set
# CONFIG_SLAB is not set
# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y

View File

@ -238,30 +238,8 @@ config SLUB
and has enhanced diagnostics. SLUB is the default choice for
a slab allocator.
config SLOB_DEPRECATED
depends on EXPERT
bool "SLOB (Simple Allocator - DEPRECATED)"
depends on !PREEMPT_RT
help
Deprecated and scheduled for removal in a few cycles. SLUB
recommended as replacement. CONFIG_SLUB_TINY can be considered
on systems with 16MB or less RAM.
If you need SLOB to stay, please contact linux-mm@kvack.org and
people listed in the SLAB ALLOCATOR section of MAINTAINERS file,
with your use case.
SLOB replaces the stock allocator with a drastically simpler
allocator. SLOB is generally more space efficient but
does not perform as well on large systems.
endchoice
config SLOB
bool
default y
depends on SLOB_DEPRECATED
config SLUB_TINY
bool "Configure SLUB for minimal memory footprint"
depends on SLUB && EXPERT

View File

@ -60,9 +60,9 @@ config SLUB_DEBUG
select STACKDEPOT if STACKTRACE_SUPPORT
help
SLUB has extensive debug support features. Disabling these can
result in significant savings in code size. This also disables
SLUB sysfs support. /sys/slab will not exist and there will be
no support for cache validation etc.
result in significant savings in code size. While /sys/kernel/slab
will still exist (with SYSFS enabled), it will not provide e.g. cache
validation.
config SLUB_DEBUG_ON
bool "SLUB debugging on by default"

View File

@ -22,7 +22,6 @@ KCSAN_INSTRUMENT_BARRIERS := y
# flaky coverage that is not a function of syscall inputs. E.g. slab is out of
# free pages, or a task is migrated between nodes.
KCOV_INSTRUMENT_slab_common.o := n
KCOV_INSTRUMENT_slob.o := n
KCOV_INSTRUMENT_slab.o := n
KCOV_INSTRUMENT_slub.o := n
KCOV_INSTRUMENT_page_alloc.o := n
@ -81,7 +80,6 @@ obj-$(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP) += hugetlb_vmemmap.o
obj-$(CONFIG_NUMA) += mempolicy.o
obj-$(CONFIG_SPARSEMEM) += sparse.o
obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
obj-$(CONFIG_SLOB) += slob.o
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
obj-$(CONFIG_KSM) += ksm.o
obj-$(CONFIG_PAGE_POISONING) += page_poison.o

View File

@ -51,14 +51,6 @@ struct slab {
};
unsigned int __unused;
#elif defined(CONFIG_SLOB)
struct list_head slab_list;
void *__unused_1;
void *freelist; /* first free block */
long units;
unsigned int __unused_2;
#else
#error "Unexpected slab allocator configured"
#endif
@ -72,11 +64,7 @@ struct slab {
#define SLAB_MATCH(pg, sl) \
static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
SLAB_MATCH(flags, __page_flags);
#ifndef CONFIG_SLOB
SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */
#else
SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
#endif
SLAB_MATCH(_refcount, __page_refcount);
#ifdef CONFIG_MEMCG
SLAB_MATCH(memcg_data, memcg_data);
@ -200,31 +188,6 @@ static inline size_t slab_size(const struct slab *slab)
return PAGE_SIZE << slab_order(slab);
}
#ifdef CONFIG_SLOB
/*
* Common fields provided in kmem_cache by all slab allocators
* This struct is either used directly by the allocator (SLOB)
* or the allocator must include definitions for all fields
* provided in kmem_cache_common in their definition of kmem_cache.
*
* Once we can do anonymous structs (C11 standard) we could put a
* anonymous struct definition in these allocators so that the
* separate allocations in the kmem_cache structure of SLAB and
* SLUB is no longer needed.
*/
struct kmem_cache {
unsigned int object_size;/* The original size of the object */
unsigned int size; /* The aligned/padded/added on size */
unsigned int align; /* Alignment as calculated */
slab_flags_t flags; /* Active flags on the slab */
const char *name; /* Slab name for sysfs */
int refcount; /* Use counter */
void (*ctor)(void *); /* Called on object slot creation */
struct list_head list; /* List of all slab caches on the system */
};
#endif /* CONFIG_SLOB */
#ifdef CONFIG_SLAB
#include <linux/slab_def.h>
#endif
@ -274,7 +237,6 @@ extern const struct kmalloc_info_struct {
unsigned int size;
} kmalloc_info[];
#ifndef CONFIG_SLOB
/* Kmalloc array related functions */
void setup_kmalloc_cache_index_table(void);
void create_kmalloc_caches(slab_flags_t);
@ -286,7 +248,6 @@ void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t orig_size,
unsigned long caller);
void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller);
#endif
gfp_t kmalloc_fix_flags(gfp_t flags);
@ -303,33 +264,16 @@ extern void create_boot_cache(struct kmem_cache *, const char *name,
int slab_unmergeable(struct kmem_cache *s);
struct kmem_cache *find_mergeable(unsigned size, unsigned align,
slab_flags_t flags, const char *name, void (*ctor)(void *));
#ifndef CONFIG_SLOB
struct kmem_cache *
__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *));
slab_flags_t kmem_cache_flags(unsigned int object_size,
slab_flags_t flags, const char *name);
#else
static inline struct kmem_cache *
__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *))
{ return NULL; }
static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
slab_flags_t flags, const char *name)
{
return flags;
}
#endif
static inline bool is_kmalloc_cache(struct kmem_cache *s)
{
#ifndef CONFIG_SLOB
return (s->flags & SLAB_KMALLOC);
#else
return false;
#endif
}
/* Legal flag mask for kmem_cache_create(), for various configurations */
@ -634,7 +578,6 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
}
#endif /* CONFIG_MEMCG_KMEM */
#ifndef CONFIG_SLOB
static inline struct kmem_cache *virt_to_cache(const void *obj)
{
struct slab *slab;
@ -684,8 +627,6 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
void free_large_kmalloc(struct folio *folio, void *object);
#endif /* CONFIG_SLOB */
size_t __ksize(const void *objp);
static inline size_t slab_ksize(const struct kmem_cache *s)
@ -777,7 +718,6 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
}
#ifndef CONFIG_SLOB
/*
* The slab lists for all objects.
*/
@ -824,7 +764,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
for (__node = 0; __node < nr_node_ids; __node++) \
if ((__n = get_node(__s, __node)))
#endif
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
void dump_unreclaimable_slab(void);

View File

@ -625,7 +625,6 @@ void kmem_dump_obj(void *object)
EXPORT_SYMBOL_GPL(kmem_dump_obj);
#endif
#ifndef CONFIG_SLOB
/* Create a cache during boot when no slab services are available yet */
void __init create_boot_cache(struct kmem_cache *s, const char *name,
unsigned int size, slab_flags_t flags,
@ -990,12 +989,9 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
/**
* kfree - free previously allocated memory
* @object: pointer returned by kmalloc.
* @object: pointer returned by kmalloc() or kmem_cache_alloc()
*
* If @object is NULL, no operation is performed.
*
* Don't free memory not originally allocated by kmalloc()
* or you will run into trouble.
*/
void kfree(const void *object)
{
@ -1079,7 +1075,6 @@ void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
return ret;
}
EXPORT_SYMBOL(kmalloc_node_trace);
#endif /* !CONFIG_SLOB */
gfp_t kmalloc_fix_flags(gfp_t flags)
{

757
mm/slob.c
View File

@ -1,757 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* SLOB Allocator: Simple List Of Blocks
*
* Matt Mackall <mpm@selenic.com> 12/30/03
*
* NUMA support by Paul Mundt, 2007.
*
* How SLOB works:
*
* The core of SLOB is a traditional K&R style heap allocator, with
* support for returning aligned objects. The granularity of this
* allocator is as little as 2 bytes, however typically most architectures
* will require 4 bytes on 32-bit and 8 bytes on 64-bit.
*
* The slob heap is a set of linked list of pages from alloc_pages(),
* and within each page, there is a singly-linked list of free blocks
* (slob_t). The heap is grown on demand. To reduce fragmentation,
* heap pages are segregated into three lists, with objects less than
* 256 bytes, objects less than 1024 bytes, and all other objects.
*
* Allocation from heap involves first searching for a page with
* sufficient free blocks (using a next-fit-like approach) followed by
* a first-fit scan of the page. Deallocation inserts objects back
* into the free list in address order, so this is effectively an
* address-ordered first fit.
*
* Above this is an implementation of kmalloc/kfree. Blocks returned
* from kmalloc are prepended with a 4-byte header with the kmalloc size.
* If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
* alloc_pages() directly, allocating compound pages so the page order
* does not have to be separately tracked.
* These objects are detected in kfree() because folio_test_slab()
* is false for them.
*
* SLAB is emulated on top of SLOB by simply calling constructors and
* destructors for every SLAB allocation. Objects are returned with the
* 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
* case the low-level allocator will fragment blocks to create the proper
* alignment. Again, objects of page-size or greater are allocated by
* calling alloc_pages(). As SLAB objects know their size, no separate
* size bookkeeping is necessary and there is essentially no allocation
* space overhead, and compound pages aren't needed for multi-page
* allocations.
*
* NUMA support in SLOB is fairly simplistic, pushing most of the real
* logic down to the page allocator, and simply doing the node accounting
* on the upper levels. In the event that a node id is explicitly
* provided, __alloc_pages_node() with the specified node id is used
* instead. The common case (or when the node id isn't explicitly provided)
* will default to the current node, as per numa_node_id().
*
* Node aware pages are still inserted in to the global freelist, and
* these are scanned for by matching against the node id encoded in the
* page flags. As a result, block allocations that can be satisfied from
* the freelist will only be done so on pages residing on the same node,
* in order to prevent random node placement.
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/swap.h> /* struct reclaim_state */
#include <linux/cache.h>
#include <linux/init.h>
#include <linux/export.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <linux/kmemleak.h>
#include <trace/events/kmem.h>
#include <linux/atomic.h>
#include "slab.h"
/*
* slob_block has a field 'units', which indicates size of block if +ve,
* or offset of next block if -ve (in SLOB_UNITs).
*
* Free blocks of size 1 unit simply contain the offset of the next block.
* Those with larger size contain their size in the first SLOB_UNIT of
* memory, and the offset of the next free block in the second SLOB_UNIT.
*/
#if PAGE_SIZE <= (32767 * 2)
typedef s16 slobidx_t;
#else
typedef s32 slobidx_t;
#endif
struct slob_block {
slobidx_t units;
};
typedef struct slob_block slob_t;
/*
* All partially free slob pages go on these lists.
*/
#define SLOB_BREAK1 256
#define SLOB_BREAK2 1024
static LIST_HEAD(free_slob_small);
static LIST_HEAD(free_slob_medium);
static LIST_HEAD(free_slob_large);
/*
* slob_page_free: true for pages on free_slob_pages list.
*/
static inline int slob_page_free(struct slab *slab)
{
return PageSlobFree(slab_page(slab));
}
static void set_slob_page_free(struct slab *slab, struct list_head *list)
{
list_add(&slab->slab_list, list);
__SetPageSlobFree(slab_page(slab));
}
static inline void clear_slob_page_free(struct slab *slab)
{
list_del(&slab->slab_list);
__ClearPageSlobFree(slab_page(slab));
}
#define SLOB_UNIT sizeof(slob_t)
#define SLOB_UNITS(size) DIV_ROUND_UP(size, SLOB_UNIT)
/*
* struct slob_rcu is inserted at the tail of allocated slob blocks, which
* were created with a SLAB_TYPESAFE_BY_RCU slab. slob_rcu is used to free
* the block using call_rcu.
*/
struct slob_rcu {
struct rcu_head head;
int size;
};
/*
* slob_lock protects all slob allocator structures.
*/
static DEFINE_SPINLOCK(slob_lock);
/*
* Encode the given size and next info into a free slob block s.
*/
static void set_slob(slob_t *s, slobidx_t size, slob_t *next)
{
slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
slobidx_t offset = next - base;
if (size > 1) {
s[0].units = size;
s[1].units = offset;
} else
s[0].units = -offset;
}
/*
* Return the size of a slob block.
*/
static slobidx_t slob_units(slob_t *s)
{
if (s->units > 0)
return s->units;
return 1;
}
/*
* Return the next free slob block pointer after this one.
*/
static slob_t *slob_next(slob_t *s)
{
slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
slobidx_t next;
if (s[0].units < 0)
next = -s[0].units;
else
next = s[1].units;
return base+next;
}
/*
* Returns true if s is the last free block in its page.
*/
static int slob_last(slob_t *s)
{
return !((unsigned long)slob_next(s) & ~PAGE_MASK);
}
static void *slob_new_pages(gfp_t gfp, int order, int node)
{
struct page *page;
#ifdef CONFIG_NUMA
if (node != NUMA_NO_NODE)
page = __alloc_pages_node(node, gfp, order);
else
#endif
page = alloc_pages(gfp, order);
if (!page)
return NULL;
mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
PAGE_SIZE << order);
return page_address(page);
}
static void slob_free_pages(void *b, int order)
{
struct page *sp = virt_to_page(b);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += 1 << order;
mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(sp, order);
}
/*
* slob_page_alloc() - Allocate a slob block within a given slob_page sp.
* @sp: Page to look in.
* @size: Size of the allocation.
* @align: Allocation alignment.
* @align_offset: Offset in the allocated block that will be aligned.
* @page_removed_from_list: Return parameter.
*
* Tries to find a chunk of memory at least @size bytes big within @page.
*
* Return: Pointer to memory if allocated, %NULL otherwise. If the
* allocation fills up @page then the page is removed from the
* freelist, in this case @page_removed_from_list will be set to
* true (set to false otherwise).
*/
static void *slob_page_alloc(struct slab *sp, size_t size, int align,
int align_offset, bool *page_removed_from_list)
{
slob_t *prev, *cur, *aligned = NULL;
int delta = 0, units = SLOB_UNITS(size);
*page_removed_from_list = false;
for (prev = NULL, cur = sp->freelist; ; prev = cur, cur = slob_next(cur)) {
slobidx_t avail = slob_units(cur);
/*
* 'aligned' will hold the address of the slob block so that the
* address 'aligned'+'align_offset' is aligned according to the
* 'align' parameter. This is for kmalloc() which prepends the
* allocated block with its size, so that the block itself is
* aligned when needed.
*/
if (align) {
aligned = (slob_t *)
(ALIGN((unsigned long)cur + align_offset, align)
- align_offset);
delta = aligned - cur;
}
if (avail >= units + delta) { /* room enough? */
slob_t *next;
if (delta) { /* need to fragment head to align? */
next = slob_next(cur);
set_slob(aligned, avail - delta, next);
set_slob(cur, delta, aligned);
prev = cur;
cur = aligned;
avail = slob_units(cur);
}
next = slob_next(cur);
if (avail == units) { /* exact fit? unlink. */
if (prev)
set_slob(prev, slob_units(prev), next);
else
sp->freelist = next;
} else { /* fragment */
if (prev)
set_slob(prev, slob_units(prev), cur + units);
else
sp->freelist = cur + units;
set_slob(cur + units, avail - units, next);
}
sp->units -= units;
if (!sp->units) {
clear_slob_page_free(sp);
*page_removed_from_list = true;
}
return cur;
}
if (slob_last(cur))
return NULL;
}
}
/*
* slob_alloc: entry point into the slob allocator.
*/
static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
int align_offset)
{
struct folio *folio;
struct slab *sp;
struct list_head *slob_list;
slob_t *b = NULL;
unsigned long flags;
bool _unused;
if (size < SLOB_BREAK1)
slob_list = &free_slob_small;
else if (size < SLOB_BREAK2)
slob_list = &free_slob_medium;
else
slob_list = &free_slob_large;
spin_lock_irqsave(&slob_lock, flags);
/* Iterate through each partially free page, try to find room */
list_for_each_entry(sp, slob_list, slab_list) {
bool page_removed_from_list = false;
#ifdef CONFIG_NUMA
/*
* If there's a node specification, search for a partial
* page with a matching node id in the freelist.
*/
if (node != NUMA_NO_NODE && slab_nid(sp) != node)
continue;
#endif
/* Enough room on this page? */
if (sp->units < SLOB_UNITS(size))
continue;
b = slob_page_alloc(sp, size, align, align_offset, &page_removed_from_list);
if (!b)
continue;
/*
* If slob_page_alloc() removed sp from the list then we
* cannot call list functions on sp. If so allocation
* did not fragment the page anyway so optimisation is
* unnecessary.
*/
if (!page_removed_from_list) {
/*
* Improve fragment distribution and reduce our average
* search time by starting our next search here. (see
* Knuth vol 1, sec 2.5, pg 449)
*/
if (!list_is_first(&sp->slab_list, slob_list))
list_rotate_to_front(&sp->slab_list, slob_list);
}
break;
}
spin_unlock_irqrestore(&slob_lock, flags);
/* Not enough space: must allocate a new page */
if (!b) {
b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node);
if (!b)
return NULL;
folio = virt_to_folio(b);
__folio_set_slab(folio);
sp = folio_slab(folio);
spin_lock_irqsave(&slob_lock, flags);
sp->units = SLOB_UNITS(PAGE_SIZE);
sp->freelist = b;
INIT_LIST_HEAD(&sp->slab_list);
set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
set_slob_page_free(sp, slob_list);
b = slob_page_alloc(sp, size, align, align_offset, &_unused);
BUG_ON(!b);
spin_unlock_irqrestore(&slob_lock, flags);
}
if (unlikely(gfp & __GFP_ZERO))
memset(b, 0, size);
return b;
}
/*
* slob_free: entry point into the slob allocator.
*/
static void slob_free(void *block, int size)
{
struct slab *sp;
slob_t *prev, *next, *b = (slob_t *)block;
slobidx_t units;
unsigned long flags;
struct list_head *slob_list;
if (unlikely(ZERO_OR_NULL_PTR(block)))
return;
BUG_ON(!size);
sp = virt_to_slab(block);
units = SLOB_UNITS(size);
spin_lock_irqsave(&slob_lock, flags);
if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
/* Go directly to page allocator. Do not pass slob allocator */
if (slob_page_free(sp))
clear_slob_page_free(sp);
spin_unlock_irqrestore(&slob_lock, flags);
__folio_clear_slab(slab_folio(sp));
slob_free_pages(b, 0);
return;
}
if (!slob_page_free(sp)) {
/* This slob page is about to become partially free. Easy! */
sp->units = units;
sp->freelist = b;
set_slob(b, units,
(void *)((unsigned long)(b +
SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
if (size < SLOB_BREAK1)
slob_list = &free_slob_small;
else if (size < SLOB_BREAK2)
slob_list = &free_slob_medium;
else
slob_list = &free_slob_large;
set_slob_page_free(sp, slob_list);
goto out;
}
/*
* Otherwise the page is already partially free, so find reinsertion
* point.
*/
sp->units += units;
if (b < (slob_t *)sp->freelist) {
if (b + units == sp->freelist) {
units += slob_units(sp->freelist);
sp->freelist = slob_next(sp->freelist);
}
set_slob(b, units, sp->freelist);
sp->freelist = b;
} else {
prev = sp->freelist;
next = slob_next(prev);
while (b > next) {
prev = next;
next = slob_next(prev);
}
if (!slob_last(prev) && b + units == next) {
units += slob_units(next);
set_slob(b, units, slob_next(next));
} else
set_slob(b, units, next);
if (prev + slob_units(prev) == b) {
units = slob_units(b) + slob_units(prev);
set_slob(prev, units, slob_next(b));
} else
set_slob(prev, slob_units(prev), b);
}
out:
spin_unlock_irqrestore(&slob_lock, flags);
}
#ifdef CONFIG_PRINTK
void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
kpp->kp_ptr = object;
kpp->kp_slab = slab;
}
#endif
/*
* End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
*/
static __always_inline void *
__do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
{
unsigned int *m;
unsigned int minalign;
void *ret;
minalign = max_t(unsigned int, ARCH_KMALLOC_MINALIGN,
arch_slab_minalign());
gfp &= gfp_allowed_mask;
might_alloc(gfp);
if (size < PAGE_SIZE - minalign) {
int align = minalign;
/*
* For power of two sizes, guarantee natural alignment for
* kmalloc()'d objects.
*/
if (is_power_of_2(size))
align = max_t(unsigned int, minalign, size);
if (!size)
return ZERO_SIZE_PTR;
m = slob_alloc(size + minalign, gfp, align, node, minalign);
if (!m)
return NULL;
*m = size;
ret = (void *)m + minalign;
trace_kmalloc(caller, ret, size, size + minalign, gfp, node);
} else {
unsigned int order = get_order(size);
if (likely(order))
gfp |= __GFP_COMP;
ret = slob_new_pages(gfp, order, node);
trace_kmalloc(caller, ret, size, PAGE_SIZE << order, gfp, node);
}
kmemleak_alloc(ret, size, 1, gfp);
return ret;
}
void *__kmalloc(size_t size, gfp_t gfp)
{
return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc);
void *__kmalloc_node_track_caller(size_t size, gfp_t gfp,
int node, unsigned long caller)
{
return __do_kmalloc_node(size, gfp, node, caller);
}
EXPORT_SYMBOL(__kmalloc_node_track_caller);
void kfree(const void *block)
{
struct folio *sp;
trace_kfree(_RET_IP_, block);
if (unlikely(ZERO_OR_NULL_PTR(block)))
return;
kmemleak_free(block);
sp = virt_to_folio(block);
if (folio_test_slab(sp)) {
unsigned int align = max_t(unsigned int,
ARCH_KMALLOC_MINALIGN,
arch_slab_minalign());
unsigned int *m = (unsigned int *)(block - align);
slob_free(m, *m + align);
} else {
unsigned int order = folio_order(sp);
mod_node_page_state(folio_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(folio_page(sp, 0), order);
}
}
EXPORT_SYMBOL(kfree);
size_t kmalloc_size_roundup(size_t size)
{
/* Short-circuit the 0 size case. */
if (unlikely(size == 0))
return 0;
/* Short-circuit saturated "too-large" case. */
if (unlikely(size == SIZE_MAX))
return SIZE_MAX;
return ALIGN(size, ARCH_KMALLOC_MINALIGN);
}
EXPORT_SYMBOL(kmalloc_size_roundup);
/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
size_t __ksize(const void *block)
{
struct folio *folio;
unsigned int align;
unsigned int *m;
BUG_ON(!block);
if (unlikely(block == ZERO_SIZE_PTR))
return 0;
folio = virt_to_folio(block);
if (unlikely(!folio_test_slab(folio)))
return folio_size(folio);
align = max_t(unsigned int, ARCH_KMALLOC_MINALIGN,
arch_slab_minalign());
m = (unsigned int *)(block - align);
return SLOB_UNITS(*m) * SLOB_UNIT;
}
int __kmem_cache_create(struct kmem_cache *c, slab_flags_t flags)
{
if (flags & SLAB_TYPESAFE_BY_RCU) {
/* leave room for rcu footer at the end of object */
c->size += sizeof(struct slob_rcu);
}
/* Actual size allocated */
c->size = SLOB_UNITS(c->size) * SLOB_UNIT;
c->flags = flags;
return 0;
}
static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
{
void *b;
flags &= gfp_allowed_mask;
might_alloc(flags);
if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node, 0);
trace_kmem_cache_alloc(_RET_IP_, b, c, flags, node);
} else {
b = slob_new_pages(flags, get_order(c->size), node);
trace_kmem_cache_alloc(_RET_IP_, b, c, flags, node);
}
if (b && c->ctor) {
WARN_ON_ONCE(flags & __GFP_ZERO);
c->ctor(b);
}
kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags);
return b;
}
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
return slob_alloc_node(cachep, flags, NUMA_NO_NODE);
}
EXPORT_SYMBOL(kmem_cache_alloc);
void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags)
{
return slob_alloc_node(cachep, flags, NUMA_NO_NODE);
}
EXPORT_SYMBOL(kmem_cache_alloc_lru);
void *__kmalloc_node(size_t size, gfp_t gfp, int node)
{
return __do_kmalloc_node(size, gfp, node, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_node);
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t gfp, int node)
{
return slob_alloc_node(cachep, gfp, node);
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
static void __kmem_cache_free(void *b, int size)
{
if (size < PAGE_SIZE)
slob_free(b, size);
else
slob_free_pages(b, get_order(size));
}
static void kmem_rcu_free(struct rcu_head *head)
{
struct slob_rcu *slob_rcu = (struct slob_rcu *)head;
void *b = (void *)slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));
__kmem_cache_free(b, slob_rcu->size);
}
void kmem_cache_free(struct kmem_cache *c, void *b)
{
kmemleak_free_recursive(b, c->flags);
trace_kmem_cache_free(_RET_IP_, b, c);
if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) {
struct slob_rcu *slob_rcu;
slob_rcu = b + (c->size - sizeof(struct slob_rcu));
slob_rcu->size = c->size;
call_rcu(&slob_rcu->head, kmem_rcu_free);
} else {
__kmem_cache_free(b, c->size);
}
}
EXPORT_SYMBOL(kmem_cache_free);
void kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
{
size_t i;
for (i = 0; i < nr; i++) {
if (s)
kmem_cache_free(s, p[i]);
else
kfree(p[i]);
}
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
void **p)
{
size_t i;
for (i = 0; i < nr; i++) {
void *x = p[i] = kmem_cache_alloc(s, flags);
if (!x) {
kmem_cache_free_bulk(s, i, p);
return 0;
}
}
return i;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
int __kmem_cache_shutdown(struct kmem_cache *c)
{
/* No way to check for remaining objects */
return 0;
}
void __kmem_cache_release(struct kmem_cache *c)
{
}
int __kmem_cache_shrink(struct kmem_cache *d)
{
return 0;
}
static struct kmem_cache kmem_cache_boot = {
.name = "kmem_cache",
.size = sizeof(struct kmem_cache),
.flags = SLAB_PANIC,
.align = ARCH_KMALLOC_MINALIGN,
};
void __init kmem_cache_init(void)
{
kmem_cache = &kmem_cache_boot;
slab_state = UP;
}
void __init kmem_cache_init_late(void)
{
slab_state = FULL;
}

View File

@ -6059,7 +6059,7 @@ static const struct sysfs_ops slab_sysfs_ops = {
.store = slab_attr_store,
};
static struct kobj_type slab_ktype = {
static const struct kobj_type slab_ktype = {
.sysfs_ops = &slab_sysfs_ops,
.release = kmem_cache_release,
};

View File

@ -85,7 +85,6 @@
*/
#define KPF_ANON_EXCLUSIVE 47
#define KPF_READAHEAD 48
#define KPF_SLOB_FREE 49
#define KPF_SLUB_FROZEN 50
#define KPF_SLUB_DEBUG 51
#define KPF_FILE 61
@ -141,7 +140,6 @@ static const char * const page_flag_names[] = {
[KPF_ANON_EXCLUSIVE] = "d:anon_exclusive",
[KPF_READAHEAD] = "I:readahead",
[KPF_SLOB_FREE] = "P:slob_free",
[KPF_SLUB_FROZEN] = "A:slub_frozen",
[KPF_SLUB_DEBUG] = "E:slub_debug",
@ -478,10 +476,8 @@ static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme)
if ((flags & BIT(ANON)) && (flags & BIT(MAPPEDTODISK)))
flags ^= BIT(MAPPEDTODISK) | BIT(ANON_EXCLUSIVE);
/* SLOB/SLUB overload several page flags */
/* SLUB overloads several page flags */
if (flags & BIT(SLAB)) {
if (flags & BIT(PRIVATE))
flags ^= BIT(PRIVATE) | BIT(SLOB_FREE);
if (flags & BIT(ACTIVE))
flags ^= BIT(ACTIVE) | BIT(SLUB_FROZEN);
if (flags & BIT(ERROR))