diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index adaae2c781c1..616ebd22ef9a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -19,6 +19,7 @@ endif KASAN_SANITIZE_head$(BITS).o := n KASAN_SANITIZE_dumpstack.o := n KASAN_SANITIZE_dumpstack_$(BITS).o := n +KASAN_SANITIZE_stacktrace.o := n OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h new file mode 100644 index 000000000000..7978b3e2c1e1 --- /dev/null +++ b/include/linux/stackdepot.h @@ -0,0 +1,32 @@ +/* + * A generic stack depot implementation + * + * Author: Alexander Potapenko + * Copyright (C) 2016 Google, Inc. + * + * Based on code by Dmitry Chernenkov. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_STACKDEPOT_H +#define _LINUX_STACKDEPOT_H + +typedef u32 depot_stack_handle_t; + +struct stack_trace; + +depot_stack_handle_t depot_save_stack(struct stack_trace *trace, gfp_t flags); + +void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace); + +#endif diff --git a/lib/Kconfig b/lib/Kconfig index 133ebc0c1773..3cca1222578e 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -536,4 +536,8 @@ config ARCH_HAS_PMEM_API config ARCH_HAS_MMIO_FLUSH bool +config STACKDEPOT + bool + select STACKTRACE + endmenu diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 0e4d2b3b0aee..67d8c6838ba9 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -7,6 +7,7 @@ config KASAN bool "KASan: runtime memory debugger" depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB) select CONSTRUCTORS + select STACKDEPOT if SLAB help Enables kernel address sanitizer - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. diff --git a/lib/Makefile b/lib/Makefile index a1de5b61ff40..7bd6fd436c97 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -181,6 +181,9 @@ obj-$(CONFIG_SG_SPLIT) += sg_split.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o obj-$(CONFIG_IRQ_POLL) += irq_poll.o +obj-$(CONFIG_STACKDEPOT) += stackdepot.o +KASAN_SANITIZE_stackdepot.o := n + libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ fdt_empty_tree.o $(foreach file, $(libfdt_files), \ diff --git a/lib/stackdepot.c b/lib/stackdepot.c new file mode 100644 index 000000000000..654c9d87e83a --- /dev/null +++ b/lib/stackdepot.c @@ -0,0 +1,284 @@ +/* + * Generic stack depot for storing stack traces. + * + * Some debugging tools need to save stack traces of certain events which can + * be later presented to the user. For example, KASAN needs to safe alloc and + * free stacks for each object, but storing two stack traces per object + * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for + * that). + * + * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc + * and free stacks repeat a lot, we save about 100x space. + * Stacks are never removed from depot, so we store them contiguously one after + * another in a contiguos memory allocation. + * + * Author: Alexander Potapenko + * Copyright (C) 2016 Google, Inc. + * + * Based on code by Dmitry Chernenkov. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8) + +#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */ +#define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER)) +#define STACK_ALLOC_ALIGN 4 +#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \ + STACK_ALLOC_ALIGN) +#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - STACK_ALLOC_OFFSET_BITS) +#define STACK_ALLOC_SLABS_CAP 1024 +#define STACK_ALLOC_MAX_SLABS \ + (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \ + (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP) + +/* The compact structure to store the reference to stacks. */ +union handle_parts { + depot_stack_handle_t handle; + struct { + u32 slabindex : STACK_ALLOC_INDEX_BITS; + u32 offset : STACK_ALLOC_OFFSET_BITS; + }; +}; + +struct stack_record { + struct stack_record *next; /* Link in the hashtable */ + u32 hash; /* Hash in the hastable */ + u32 size; /* Number of frames in the stack */ + union handle_parts handle; + unsigned long entries[1]; /* Variable-sized array of entries. */ +}; + +static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; + +static int depot_index; +static int next_slab_inited; +static size_t depot_offset; +static DEFINE_SPINLOCK(depot_lock); + +static bool init_stack_slab(void **prealloc) +{ + if (!*prealloc) + return false; + /* + * This smp_load_acquire() pairs with smp_store_release() to + * |next_slab_inited| below and in depot_alloc_stack(). + */ + if (smp_load_acquire(&next_slab_inited)) + return true; + if (stack_slabs[depot_index] == NULL) { + stack_slabs[depot_index] = *prealloc; + } else { + stack_slabs[depot_index + 1] = *prealloc; + /* + * This smp_store_release pairs with smp_load_acquire() from + * |next_slab_inited| above and in depot_save_stack(). + */ + smp_store_release(&next_slab_inited, 1); + } + *prealloc = NULL; + return true; +} + +/* Allocation of a new stack in raw storage */ +static struct stack_record *depot_alloc_stack(unsigned long *entries, int size, + u32 hash, void **prealloc, gfp_t alloc_flags) +{ + int required_size = offsetof(struct stack_record, entries) + + sizeof(unsigned long) * size; + struct stack_record *stack; + + required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN); + + if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) { + if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) { + WARN_ONCE(1, "Stack depot reached limit capacity"); + return NULL; + } + depot_index++; + depot_offset = 0; + /* + * smp_store_release() here pairs with smp_load_acquire() from + * |next_slab_inited| in depot_save_stack() and + * init_stack_slab(). + */ + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) + smp_store_release(&next_slab_inited, 0); + } + init_stack_slab(prealloc); + if (stack_slabs[depot_index] == NULL) + return NULL; + + stack = stack_slabs[depot_index] + depot_offset; + + stack->hash = hash; + stack->size = size; + stack->handle.slabindex = depot_index; + stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN; + memcpy(stack->entries, entries, size * sizeof(unsigned long)); + depot_offset += required_size; + + return stack; +} + +#define STACK_HASH_ORDER 20 +#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER) +#define STACK_HASH_MASK (STACK_HASH_SIZE - 1) +#define STACK_HASH_SEED 0x9747b28c + +static struct stack_record *stack_table[STACK_HASH_SIZE] = { + [0 ... STACK_HASH_SIZE - 1] = NULL +}; + +/* Calculate hash for a stack */ +static inline u32 hash_stack(unsigned long *entries, unsigned int size) +{ + return jhash2((u32 *)entries, + size * sizeof(unsigned long) / sizeof(u32), + STACK_HASH_SEED); +} + +/* Find a stack that is equal to the one stored in entries in the hash */ +static inline struct stack_record *find_stack(struct stack_record *bucket, + unsigned long *entries, int size, + u32 hash) +{ + struct stack_record *found; + + for (found = bucket; found; found = found->next) { + if (found->hash == hash && + found->size == size && + !memcmp(entries, found->entries, + size * sizeof(unsigned long))) { + return found; + } + } + return NULL; +} + +void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace) +{ + union handle_parts parts = { .handle = handle }; + void *slab = stack_slabs[parts.slabindex]; + size_t offset = parts.offset << STACK_ALLOC_ALIGN; + struct stack_record *stack = slab + offset; + + trace->nr_entries = trace->max_entries = stack->size; + trace->entries = stack->entries; + trace->skip = 0; +} + +/** + * depot_save_stack - save stack in a stack depot. + * @trace - the stacktrace to save. + * @alloc_flags - flags for allocating additional memory if required. + * + * Returns the handle of the stack struct stored in depot. + */ +depot_stack_handle_t depot_save_stack(struct stack_trace *trace, + gfp_t alloc_flags) +{ + u32 hash; + depot_stack_handle_t retval = 0; + struct stack_record *found = NULL, **bucket; + unsigned long flags; + struct page *page = NULL; + void *prealloc = NULL; + + if (unlikely(trace->nr_entries == 0)) + goto fast_exit; + + hash = hash_stack(trace->entries, trace->nr_entries); + /* Bad luck, we won't store this stack. */ + if (hash == 0) + goto exit; + + bucket = &stack_table[hash & STACK_HASH_MASK]; + + /* + * Fast path: look the stack trace up without locking. + * The smp_load_acquire() here pairs with smp_store_release() to + * |bucket| below. + */ + found = find_stack(smp_load_acquire(bucket), trace->entries, + trace->nr_entries, hash); + if (found) + goto exit; + + /* + * Check if the current or the next stack slab need to be initialized. + * If so, allocate the memory - we won't be able to do that under the + * lock. + * + * The smp_load_acquire() here pairs with smp_store_release() to + * |next_slab_inited| in depot_alloc_stack() and init_stack_slab(). + */ + if (unlikely(!smp_load_acquire(&next_slab_inited))) { + /* + * Zero out zone modifiers, as we don't have specific zone + * requirements. Keep the flags related to allocation in atomic + * contexts and I/O. + */ + alloc_flags &= ~GFP_ZONEMASK; + alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); + page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER); + if (page) + prealloc = page_address(page); + } + + spin_lock_irqsave(&depot_lock, flags); + + found = find_stack(*bucket, trace->entries, trace->nr_entries, hash); + if (!found) { + struct stack_record *new = + depot_alloc_stack(trace->entries, trace->nr_entries, + hash, &prealloc, alloc_flags); + if (new) { + new->next = *bucket; + /* + * This smp_store_release() pairs with + * smp_load_acquire() from |bucket| above. + */ + smp_store_release(bucket, new); + found = new; + } + } else if (prealloc) { + /* + * We didn't need to store this stack trace, but let's keep + * the preallocated memory for the future. + */ + WARN_ON(!init_stack_slab(&prealloc)); + } + + spin_unlock_irqrestore(&depot_lock, flags); +exit: + if (prealloc) { + /* Nobody used this memory, ok to free it. */ + free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER); + } + if (found) + retval = found->handle.handle; +fast_exit: + return retval; +} diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index cb998e0ec9d3..acb3b6c4dd89 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -17,7 +17,9 @@ #define DISABLE_BRANCH_PROFILING #include +#include #include +#include #include #include #include @@ -32,7 +34,6 @@ #include #include #include -#include #include "kasan.h" #include "../slab.h" @@ -413,23 +414,65 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object) #endif } -static inline void set_track(struct kasan_track *track) +#ifdef CONFIG_SLAB +static inline int in_irqentry_text(unsigned long ptr) { - track->cpu = raw_smp_processor_id(); - track->pid = current->pid; - track->when = jiffies; + return (ptr >= (unsigned long)&__irqentry_text_start && + ptr < (unsigned long)&__irqentry_text_end) || + (ptr >= (unsigned long)&__softirqentry_text_start && + ptr < (unsigned long)&__softirqentry_text_end); +} + +static inline void filter_irq_stacks(struct stack_trace *trace) +{ + int i; + + if (!trace->nr_entries) + return; + for (i = 0; i < trace->nr_entries; i++) + if (in_irqentry_text(trace->entries[i])) { + /* Include the irqentry function into the stack. */ + trace->nr_entries = i + 1; + break; + } +} + +static inline depot_stack_handle_t save_stack(gfp_t flags) +{ + unsigned long entries[KASAN_STACK_DEPTH]; + struct stack_trace trace = { + .nr_entries = 0, + .entries = entries, + .max_entries = KASAN_STACK_DEPTH, + .skip = 0 + }; + + save_stack_trace(&trace); + filter_irq_stacks(&trace); + if (trace.nr_entries != 0 && + trace.entries[trace.nr_entries-1] == ULONG_MAX) + trace.nr_entries--; + + return depot_save_stack(&trace, flags); +} + +static inline void set_track(struct kasan_track *track, gfp_t flags) +{ + track->pid = current->pid; + track->stack = save_stack(flags); } -#ifdef CONFIG_SLAB struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache, const void *object) { + BUILD_BUG_ON(sizeof(struct kasan_alloc_meta) > 32); return (void *)object + cache->kasan_info.alloc_meta_offset; } struct kasan_free_meta *get_free_info(struct kmem_cache *cache, const void *object) { + BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32); return (void *)object + cache->kasan_info.free_meta_offset; } #endif @@ -486,7 +529,7 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size, alloc_info->state = KASAN_STATE_ALLOC; alloc_info->alloc_size = size; - set_track(&alloc_info->track); + set_track(&alloc_info->track, flags); } #endif } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 7b9e4ab9b66b..30a2f0ba0e09 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -2,6 +2,7 @@ #define __MM_KASAN_KASAN_H #include +#include #define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) #define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1) @@ -64,16 +65,18 @@ enum kasan_state { KASAN_STATE_FREE }; +#define KASAN_STACK_DEPTH 64 + struct kasan_track { - u64 cpu : 6; /* for NR_CPUS = 64 */ - u64 pid : 16; /* 65536 processes */ - u64 when : 42; /* ~140 years */ + u32 pid; + depot_stack_handle_t stack; }; struct kasan_alloc_meta { + struct kasan_track track; u32 state : 2; /* enum kasan_state */ u32 alloc_size : 30; - struct kasan_track track; + u32 reserved; }; struct kasan_free_meta { diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 3e3385cc97ac..60869a5a0124 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -118,8 +119,15 @@ static inline bool init_task_stack_addr(const void *addr) #ifdef CONFIG_SLAB static void print_track(struct kasan_track *track) { - pr_err("PID = %u, CPU = %u, timestamp = %lu\n", track->pid, - track->cpu, (unsigned long)track->when); + pr_err("PID = %u\n", track->pid); + if (track->stack) { + struct stack_trace trace; + + depot_fetch_stack(track->stack, &trace); + print_stack_trace(&trace, 0); + } else { + pr_err("(stack is not available)\n"); + } } static void object_err(struct kmem_cache *cache, struct page *page,