slab updates for 6.6

-----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEe7vIQRWZI0iWSE3xu+CwddJFiJoFAmTtvVUACgkQu+CwddJF
 iJou7Qf/ZY1TB8AFejTkArNa24Nvtp6yzgfdKpCdt4JkUDBJ5OFgKdE7wHYFqsOK
 Ml3s2L6/k97G0jkHZi/Wx0akv4GsMqWjJm2l+Oqjbf5GjwcTkuq6VEzlUrF2Febx
 MlzC8teLYtqkL/qDajUH80NdizlhdiyuQE+jM0qVg9K68ZS2w6Ky2GT7GHzgPELP
 3gQvkY6bjTwm6wVKV1Ou6xMnuMFFwpdI8Fsq8pon6NplktjG/2kvyLEDSdj/qk6Y
 PhDdYBupFfXqUdlY0FxCOqPo9LY/shSiYamGfGKsdJ7wBsIiR8DcmJMrbYSwy4a9
 ZQgtRv4Pxe0R2mH6Cj0oFbFzI/qIWw==
 =zBvx
 -----END PGP SIGNATURE-----

Merge tag 'slab-for-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull slab updates from Vlastimil Babka:
 "This happens to be a small one (due to summer I guess), and all
  hardening related:

   - Randomized kmalloc caches, by GONG, Ruiqi.

     A new opt-in hardening feature to make heap spraying harder. It
     creates multiple (16) copies of kmalloc caches, reducing the chance
     of an attacker-controllable allocation site to land in the same
     slab as e.g. an allocation site with use-after-free vulnerability.

     The selection of the copy is derived from the allocation site
     address, including a per-boot random seed.

   - Stronger typing for hardened freelists in SLUB, by Jann Horn

     Introduces a custom type for hardened freelist entries instead of
     "void *" as those are not directly dereferencable. While reviewing
     this, I've noticed opportunities for further cleanups in that code
     and added those on top"

* tag 'slab-for-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
  Randomized slab caches for kmalloc()
  mm/slub: remove freelist_dereference()
  mm/slub: remove redundant kasan_reset_tag() from freelist_ptr calculations
  mm/slub: refactor freelist to use custom type
This commit is contained in:
Linus Torvalds 2023-08-29 13:04:15 -07:00
commit 651a00bc56
8 changed files with 130 additions and 40 deletions

View File

@ -35,6 +35,12 @@
#define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \
PCPU_MIN_ALLOC_SHIFT)
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
#define PERCPU_DYNAMIC_SIZE_SHIFT 12
#else
#define PERCPU_DYNAMIC_SIZE_SHIFT 10
#endif
/*
* Percpu allocator can serve percpu allocations before slab is
* initialized which allows slab to depend on the percpu allocator.
@ -42,7 +48,7 @@
* for this. Keep PERCPU_DYNAMIC_RESERVE equal to or larger than
* PERCPU_DYNAMIC_EARLY_SIZE.
*/
#define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10)
#define PERCPU_DYNAMIC_EARLY_SIZE (20 << PERCPU_DYNAMIC_SIZE_SHIFT)
/*
* PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy
@ -56,9 +62,9 @@
* intelligent way to determine this would be nice.
*/
#if BITS_PER_LONG > 32
#define PERCPU_DYNAMIC_RESERVE (28 << 10)
#define PERCPU_DYNAMIC_RESERVE (28 << PERCPU_DYNAMIC_SIZE_SHIFT)
#else
#define PERCPU_DYNAMIC_RESERVE (20 << 10)
#define PERCPU_DYNAMIC_RESERVE (20 << PERCPU_DYNAMIC_SIZE_SHIFT)
#endif
extern void *pcpu_base_addr;

View File

@ -19,6 +19,7 @@
#include <linux/workqueue.h>
#include <linux/percpu-refcount.h>
#include <linux/cleanup.h>
#include <linux/hash.h>
/*
@ -345,6 +346,12 @@ static inline unsigned int arch_slab_minalign(void)
#define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \
(KMALLOC_MIN_SIZE) : 16)
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
#define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies
#else
#define RANDOM_KMALLOC_CACHES_NR 0
#endif
/*
* Whenever changing this, take care of that kmalloc_type() and
* create_kmalloc_caches() still work as intended.
@ -361,6 +368,8 @@ enum kmalloc_cache_type {
#ifndef CONFIG_MEMCG_KMEM
KMALLOC_CGROUP = KMALLOC_NORMAL,
#endif
KMALLOC_RANDOM_START = KMALLOC_NORMAL,
KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR,
#ifdef CONFIG_SLUB_TINY
KMALLOC_RECLAIM = KMALLOC_NORMAL,
#else
@ -386,14 +395,22 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
(IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \
(IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0))
static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
extern unsigned long random_kmalloc_seed;
static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller)
{
/*
* The most common case is KMALLOC_NORMAL, so test for it
* with a single branch for all the relevant flags.
*/
if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0))
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
/* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */
return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed,
ilog2(RANDOM_KMALLOC_CACHES_NR + 1));
#else
return KMALLOC_NORMAL;
#endif
/*
* At least one of the flags has to be set. Their priorities in
@ -580,7 +597,7 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
index = kmalloc_index(size);
return kmalloc_trace(
kmalloc_caches[kmalloc_type(flags)][index],
kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, size);
}
return __kmalloc(size, flags);
@ -596,7 +613,7 @@ static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t fla
index = kmalloc_index(size);
return kmalloc_node_trace(
kmalloc_caches[kmalloc_type(flags)][index],
kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, node, size);
}
return __kmalloc_node(size, flags, node);

View File

@ -337,6 +337,23 @@ config SLUB_CPU_PARTIAL
which requires the taking of locks that may cause latency spikes.
Typically one would choose no for a realtime system.
config RANDOM_KMALLOC_CACHES
default n
depends on SLUB && !SLUB_TINY
bool "Randomize slab caches for normal kmalloc"
help
A hardening feature that creates multiple copies of slab caches for
normal kmalloc allocation and makes kmalloc randomly pick one based
on code address, which makes the attackers more difficult to spray
vulnerable memory objects on the heap for the purpose of exploiting
memory vulnerabilities.
Currently the number of copies is set to 16, a reasonably large value
that effectively diverges the memory objects allocated for different
subsystems or modules into different caches, at the expense of a
limited degree of memory and CPU overhead that relates to hardware and
system workload.
endmenu # SLAB allocator options
config SHUFFLE_PAGE_ALLOCATOR

View File

@ -212,7 +212,9 @@ static void test_cache_destroy(void)
static inline size_t kmalloc_cache_alignment(size_t size)
{
return kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)]->align;
/* just to get ->align so no need to pass in the real caller */
enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, 0);
return kmalloc_caches[type][__kmalloc_index(size, false)]->align;
}
/* Must always inline to match stack trace against caller. */
@ -282,8 +284,9 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
if (is_kfence_address(alloc)) {
struct slab *slab = virt_to_slab(alloc);
enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, _RET_IP_);
struct kmem_cache *s = test_cache ?:
kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)];
kmalloc_caches[type][__kmalloc_index(size, false)];
/*
* Verify that various helpers return the right values

View File

@ -1670,7 +1670,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
if (freelist_size > KMALLOC_MAX_CACHE_SIZE) {
freelist_cache_size = PAGE_SIZE << get_order(freelist_size);
} else {
freelist_cache = kmalloc_slab(freelist_size, 0u);
freelist_cache = kmalloc_slab(freelist_size, 0u, _RET_IP_);
if (!freelist_cache)
continue;
freelist_cache_size = freelist_cache->size;

View File

@ -282,7 +282,7 @@ void setup_kmalloc_cache_index_table(void);
void create_kmalloc_caches(slab_flags_t);
/* Find the kmalloc slab corresponding for a certain size */
struct kmem_cache *kmalloc_slab(size_t, gfp_t);
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller);
void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t orig_size,

View File

@ -678,6 +678,11 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init =
{ /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ };
EXPORT_SYMBOL(kmalloc_caches);
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
unsigned long random_kmalloc_seed __ro_after_init;
EXPORT_SYMBOL(random_kmalloc_seed);
#endif
/*
* Conversion table for small slabs sizes / 8 to the index in the
* kmalloc array. This is necessary for slabs < 192 since we have non power
@ -720,7 +725,7 @@ static inline unsigned int size_index_elem(unsigned int bytes)
* Find the kmem_cache structure that serves a given size of
* allocation
*/
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
{
unsigned int index;
@ -735,7 +740,7 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
index = fls(size - 1);
}
return kmalloc_caches[kmalloc_type(flags)][index];
return kmalloc_caches[kmalloc_type(flags, caller)][index];
}
size_t kmalloc_size_roundup(size_t size)
@ -752,8 +757,11 @@ size_t kmalloc_size_roundup(size_t size)
if (size > KMALLOC_MAX_CACHE_SIZE)
return PAGE_SIZE << get_order(size);
/* The flags don't matter since size_index is common to all. */
c = kmalloc_slab(size, GFP_KERNEL);
/*
* The flags don't matter since size_index is common to all.
* Neither does the caller for just getting ->object_size.
*/
c = kmalloc_slab(size, GFP_KERNEL, 0);
return c ? c->object_size : 0;
}
EXPORT_SYMBOL(kmalloc_size_roundup);
@ -776,12 +784,35 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
#define KMALLOC_RCL_NAME(sz)
#endif
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
#define __KMALLOC_RANDOM_CONCAT(a, b) a ## b
#define KMALLOC_RANDOM_NAME(N, sz) __KMALLOC_RANDOM_CONCAT(KMA_RAND_, N)(sz)
#define KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 1] = "kmalloc-rnd-01-" #sz,
#define KMA_RAND_2(sz) KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 2] = "kmalloc-rnd-02-" #sz,
#define KMA_RAND_3(sz) KMA_RAND_2(sz) .name[KMALLOC_RANDOM_START + 3] = "kmalloc-rnd-03-" #sz,
#define KMA_RAND_4(sz) KMA_RAND_3(sz) .name[KMALLOC_RANDOM_START + 4] = "kmalloc-rnd-04-" #sz,
#define KMA_RAND_5(sz) KMA_RAND_4(sz) .name[KMALLOC_RANDOM_START + 5] = "kmalloc-rnd-05-" #sz,
#define KMA_RAND_6(sz) KMA_RAND_5(sz) .name[KMALLOC_RANDOM_START + 6] = "kmalloc-rnd-06-" #sz,
#define KMA_RAND_7(sz) KMA_RAND_6(sz) .name[KMALLOC_RANDOM_START + 7] = "kmalloc-rnd-07-" #sz,
#define KMA_RAND_8(sz) KMA_RAND_7(sz) .name[KMALLOC_RANDOM_START + 8] = "kmalloc-rnd-08-" #sz,
#define KMA_RAND_9(sz) KMA_RAND_8(sz) .name[KMALLOC_RANDOM_START + 9] = "kmalloc-rnd-09-" #sz,
#define KMA_RAND_10(sz) KMA_RAND_9(sz) .name[KMALLOC_RANDOM_START + 10] = "kmalloc-rnd-10-" #sz,
#define KMA_RAND_11(sz) KMA_RAND_10(sz) .name[KMALLOC_RANDOM_START + 11] = "kmalloc-rnd-11-" #sz,
#define KMA_RAND_12(sz) KMA_RAND_11(sz) .name[KMALLOC_RANDOM_START + 12] = "kmalloc-rnd-12-" #sz,
#define KMA_RAND_13(sz) KMA_RAND_12(sz) .name[KMALLOC_RANDOM_START + 13] = "kmalloc-rnd-13-" #sz,
#define KMA_RAND_14(sz) KMA_RAND_13(sz) .name[KMALLOC_RANDOM_START + 14] = "kmalloc-rnd-14-" #sz,
#define KMA_RAND_15(sz) KMA_RAND_14(sz) .name[KMALLOC_RANDOM_START + 15] = "kmalloc-rnd-15-" #sz,
#else // CONFIG_RANDOM_KMALLOC_CACHES
#define KMALLOC_RANDOM_NAME(N, sz)
#endif
#define INIT_KMALLOC_INFO(__size, __short_size) \
{ \
.name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \
KMALLOC_RCL_NAME(__short_size) \
KMALLOC_CGROUP_NAME(__short_size) \
KMALLOC_DMA_NAME(__short_size) \
KMALLOC_RANDOM_NAME(RANDOM_KMALLOC_CACHES_NR, __short_size) \
.size = __size, \
}
@ -890,6 +921,11 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
flags |= SLAB_CACHE_DMA;
}
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
if (type >= KMALLOC_RANDOM_START && type <= KMALLOC_RANDOM_END)
flags |= SLAB_NO_MERGE;
#endif
/*
* If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
* KMALLOC_NORMAL caches.
@ -941,6 +977,9 @@ void __init create_kmalloc_caches(slab_flags_t flags)
new_kmalloc_cache(2, type, flags);
}
}
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
random_kmalloc_seed = get_random_u64();
#endif
/* Kmalloc array is now usable */
slab_state = UP;
@ -976,7 +1015,7 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller
return ret;
}
s = kmalloc_slab(size, flags);
s = kmalloc_slab(size, flags, caller);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;

View File

@ -360,44 +360,52 @@ static struct workqueue_struct *flushwq;
* Core slab cache functions
*******************************************************************/
/*
* freeptr_t represents a SLUB freelist pointer, which might be encoded
* and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled.
*/
typedef struct { unsigned long v; } freeptr_t;
/*
* Returns freelist pointer (ptr). With hardening, this is obfuscated
* with an XOR of the address where the pointer is held and a per-cache
* random number.
*/
static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
unsigned long ptr_addr)
static inline freeptr_t freelist_ptr_encode(const struct kmem_cache *s,
void *ptr, unsigned long ptr_addr)
{
unsigned long encoded;
#ifdef CONFIG_SLAB_FREELIST_HARDENED
/*
* When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged.
* Normally, this doesn't cause any issues, as both set_freepointer()
* and get_freepointer() are called with a pointer with the same tag.
* However, there are some issues with CONFIG_SLUB_DEBUG code. For
* example, when __free_slub() iterates over objects in a cache, it
* passes untagged pointers to check_object(). check_object() in turns
* calls get_freepointer() with an untagged pointer, which causes the
* freepointer to be restored incorrectly.
*/
return (void *)((unsigned long)ptr ^ s->random ^
swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
encoded = (unsigned long)ptr ^ s->random ^ swab(ptr_addr);
#else
return ptr;
encoded = (unsigned long)ptr;
#endif
return (freeptr_t){.v = encoded};
}
/* Returns the freelist pointer recorded at location ptr_addr. */
static inline void *freelist_dereference(const struct kmem_cache *s,
void *ptr_addr)
static inline void *freelist_ptr_decode(const struct kmem_cache *s,
freeptr_t ptr, unsigned long ptr_addr)
{
return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
(unsigned long)ptr_addr);
void *decoded;
#ifdef CONFIG_SLAB_FREELIST_HARDENED
decoded = (void *)(ptr.v ^ s->random ^ swab(ptr_addr));
#else
decoded = (void *)ptr.v;
#endif
return decoded;
}
static inline void *get_freepointer(struct kmem_cache *s, void *object)
{
unsigned long ptr_addr;
freeptr_t p;
object = kasan_reset_tag(object);
return freelist_dereference(s, object + s->offset);
ptr_addr = (unsigned long)object + s->offset;
p = *(freeptr_t *)(ptr_addr);
return freelist_ptr_decode(s, p, ptr_addr);
}
#ifndef CONFIG_SLUB_TINY
@ -421,15 +429,15 @@ __no_kmsan_checks
static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
{
unsigned long freepointer_addr;
void *p;
freeptr_t p;
if (!debug_pagealloc_enabled_static())
return get_freepointer(s, object);
object = kasan_reset_tag(object);
freepointer_addr = (unsigned long)object + s->offset;
copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
return freelist_ptr(s, p, freepointer_addr);
copy_from_kernel_nofault(&p, (freeptr_t *)freepointer_addr, sizeof(p));
return freelist_ptr_decode(s, p, freepointer_addr);
}
static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
@ -441,7 +449,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
#endif
freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr);
*(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
*(freeptr_t *)freeptr_addr = freelist_ptr_encode(s, fp, freeptr_addr);
}
/* Loop over all objects in a slab */