From 1328710b8ec30ac552b51bf804d1fc255a1c7451 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 10 May 2017 13:36:37 -0400 Subject: [PATCH 1/9] mark most percpu globals as __ro_after_init Moving pcpu_base_addr to this section comes from PaX where it's part of KERNEXEC. This extends it to the rest of the globals only written by the init code. Signed-off-by: Daniel Micay Acked-by: Kees Cook Signed-off-by: Tejun Heo --- mm/percpu.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index e0aa8ae7bde7..c03753054099 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -121,35 +121,35 @@ struct pcpu_chunk { unsigned long populated[]; /* populated bitmap */ }; -static int pcpu_unit_pages __read_mostly; -static int pcpu_unit_size __read_mostly; -static int pcpu_nr_units __read_mostly; -static int pcpu_atom_size __read_mostly; -static int pcpu_nr_slots __read_mostly; -static size_t pcpu_chunk_struct_size __read_mostly; +static int pcpu_unit_pages __ro_after_init; +static int pcpu_unit_size __ro_after_init; +static int pcpu_nr_units __ro_after_init; +static int pcpu_atom_size __ro_after_init; +static int pcpu_nr_slots __ro_after_init; +static size_t pcpu_chunk_struct_size __ro_after_init; /* cpus with the lowest and highest unit addresses */ -static unsigned int pcpu_low_unit_cpu __read_mostly; -static unsigned int pcpu_high_unit_cpu __read_mostly; +static unsigned int pcpu_low_unit_cpu __ro_after_init; +static unsigned int pcpu_high_unit_cpu __ro_after_init; /* the address of the first chunk which starts with the kernel static area */ -void *pcpu_base_addr __read_mostly; +void *pcpu_base_addr __ro_after_init; EXPORT_SYMBOL_GPL(pcpu_base_addr); -static const int *pcpu_unit_map __read_mostly; /* cpu -> unit */ -const unsigned long *pcpu_unit_offsets __read_mostly; /* cpu -> unit offset */ +static const int *pcpu_unit_map __ro_after_init; /* cpu -> unit */ +const unsigned long *pcpu_unit_offsets __ro_after_init; /* cpu -> unit offset */ /* group information, used for vm allocation */ -static int pcpu_nr_groups __read_mostly; -static const unsigned long *pcpu_group_offsets __read_mostly; -static const size_t *pcpu_group_sizes __read_mostly; +static int pcpu_nr_groups __ro_after_init; +static const unsigned long *pcpu_group_offsets __ro_after_init; +static const size_t *pcpu_group_sizes __ro_after_init; /* * The first chunk which always exists. Note that unlike other * chunks, this one can be allocated and mapped in several different * ways and thus often doesn't live in the vmalloc area. */ -static struct pcpu_chunk *pcpu_first_chunk; +static struct pcpu_chunk *pcpu_first_chunk __ro_after_init; /* * Optional reserved chunk. This chunk reserves part of the first @@ -158,13 +158,13 @@ static struct pcpu_chunk *pcpu_first_chunk; * area doesn't exist, the following variables contain NULL and 0 * respectively. */ -static struct pcpu_chunk *pcpu_reserved_chunk; -static int pcpu_reserved_chunk_limit; +static struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init; +static int pcpu_reserved_chunk_limit __ro_after_init; static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */ -static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ +static struct list_head *pcpu_slot __ro_after_init; /* chunk list slots */ /* chunks which need their map areas extended, protected by pcpu_lock */ static LIST_HEAD(pcpu_map_extend_chunks); From 5ccd30e40e731051f6d1eb02f7ac073c1ef9deba Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Mon, 19 Jun 2017 19:28:29 -0400 Subject: [PATCH 2/9] percpu: add missing lockdep_assert_held to func pcpu_free_area Add a missing lockdep_assert_held for pcpu_lock to improve consistency and safety throughout mm/percpu.c. Signed-off-by: Dennis Zhou Signed-off-by: Tejun Heo --- mm/percpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/percpu.c b/mm/percpu.c index c03753054099..be95d3192acb 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -672,6 +672,8 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme, int to_free = 0; int *p; + lockdep_assert_held(&pcpu_lock); + freeme |= 1; /* we are searching for pair */ i = 0; From 8fa3ed8014ac792a4c76d585b5c0f68bd202ff6b Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Mon, 19 Jun 2017 19:28:30 -0400 Subject: [PATCH 3/9] percpu: migrate percpu data structures to internal header Migrates pcpu_chunk definition and a few percpu static variables to an internal header file from mm/percpu.c. These will be used with debugfs to expose statistics about percpu memory improving visibility regarding allocations and fragmentation. Signed-off-by: Dennis Zhou Signed-off-by: Tejun Heo --- mm/percpu-internal.h | 33 +++++++++++++++++++++++++++++++++ mm/percpu.c | 30 +++++++----------------------- 2 files changed, 40 insertions(+), 23 deletions(-) create mode 100644 mm/percpu-internal.h diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h new file mode 100644 index 000000000000..68bf18522a6d --- /dev/null +++ b/mm/percpu-internal.h @@ -0,0 +1,33 @@ +#ifndef _MM_PERCPU_INTERNAL_H +#define _MM_PERCPU_INTERNAL_H + +#include +#include + +struct pcpu_chunk { + struct list_head list; /* linked to pcpu_slot lists */ + int free_size; /* free bytes in the chunk */ + int contig_hint; /* max contiguous size hint */ + void *base_addr; /* base address of this chunk */ + + int map_used; /* # of map entries used before the sentry */ + int map_alloc; /* # of map entries allocated */ + int *map; /* allocation map */ + struct list_head map_extend_list;/* on pcpu_map_extend_chunks */ + + void *data; /* chunk data */ + int first_free; /* no free below this */ + bool immutable; /* no [de]population allowed */ + int nr_populated; /* # of populated pages */ + unsigned long populated[]; /* populated bitmap */ +}; + +extern spinlock_t pcpu_lock; + +extern struct list_head *pcpu_slot; +extern int pcpu_nr_slots; + +extern struct pcpu_chunk *pcpu_first_chunk; +extern struct pcpu_chunk *pcpu_reserved_chunk; + +#endif diff --git a/mm/percpu.c b/mm/percpu.c index be95d3192acb..75ac982c19df 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -76,6 +76,8 @@ #include #include +#include "percpu-internal.h" + #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ #define PCPU_ATOMIC_MAP_MARGIN_LOW 32 @@ -103,29 +105,11 @@ #define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr) #endif /* CONFIG_SMP */ -struct pcpu_chunk { - struct list_head list; /* linked to pcpu_slot lists */ - int free_size; /* free bytes in the chunk */ - int contig_hint; /* max contiguous size hint */ - void *base_addr; /* base address of this chunk */ - - int map_used; /* # of map entries used before the sentry */ - int map_alloc; /* # of map entries allocated */ - int *map; /* allocation map */ - struct list_head map_extend_list;/* on pcpu_map_extend_chunks */ - - void *data; /* chunk data */ - int first_free; /* no free below this */ - bool immutable; /* no [de]population allowed */ - int nr_populated; /* # of populated pages */ - unsigned long populated[]; /* populated bitmap */ -}; - static int pcpu_unit_pages __ro_after_init; static int pcpu_unit_size __ro_after_init; static int pcpu_nr_units __ro_after_init; static int pcpu_atom_size __ro_after_init; -static int pcpu_nr_slots __ro_after_init; +int pcpu_nr_slots __ro_after_init; static size_t pcpu_chunk_struct_size __ro_after_init; /* cpus with the lowest and highest unit addresses */ @@ -149,7 +133,7 @@ static const size_t *pcpu_group_sizes __ro_after_init; * chunks, this one can be allocated and mapped in several different * ways and thus often doesn't live in the vmalloc area. */ -static struct pcpu_chunk *pcpu_first_chunk __ro_after_init; +struct pcpu_chunk *pcpu_first_chunk __ro_after_init; /* * Optional reserved chunk. This chunk reserves part of the first @@ -158,13 +142,13 @@ static struct pcpu_chunk *pcpu_first_chunk __ro_after_init; * area doesn't exist, the following variables contain NULL and 0 * respectively. */ -static struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init; +struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init; static int pcpu_reserved_chunk_limit __ro_after_init; -static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ +DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */ -static struct list_head *pcpu_slot __ro_after_init; /* chunk list slots */ +struct list_head *pcpu_slot __ro_after_init; /* chunk list slots */ /* chunks which need their map areas extended, protected by pcpu_lock */ static LIST_HEAD(pcpu_map_extend_chunks); From 30a5b5367ef9d5c9055414e12ec2f02d9de2e70f Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Mon, 19 Jun 2017 19:28:31 -0400 Subject: [PATCH 4/9] percpu: expose statistics about percpu memory via debugfs There is limited visibility into the use of percpu memory leaving us unable to reason about correctness of parameters and overall use of percpu memory. These counters and statistics aim to help understand basic statistics about percpu memory such as number of allocations over the lifetime, allocation sizes, and fragmentation. New Config: PERCPU_STATS Signed-off-by: Dennis Zhou Signed-off-by: Tejun Heo --- mm/Kconfig | 8 ++ mm/Makefile | 1 + mm/percpu-internal.h | 131 +++++++++++++++++++++++++ mm/percpu-km.c | 4 + mm/percpu-stats.c | 222 +++++++++++++++++++++++++++++++++++++++++++ mm/percpu-vm.c | 5 + mm/percpu.c | 9 ++ 7 files changed, 380 insertions(+) create mode 100644 mm/percpu-stats.c diff --git a/mm/Kconfig b/mm/Kconfig index beb7a455915d..8fae42606d56 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -706,3 +706,11 @@ config ARCH_USES_HIGH_VMA_FLAGS bool config ARCH_HAS_PKEYS bool + +config PERCPU_STATS + bool "Collect percpu memory statistics" + default n + help + This feature collects and exposes statistics via debugfs. The + information includes global and per chunk statistics, which can + be used to help understand percpu memory usage. diff --git a/mm/Makefile b/mm/Makefile index 026f6a828a50..411bd24d4a7c 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -103,3 +103,4 @@ obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o +obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index 68bf18522a6d..d030fce745a2 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -5,6 +5,11 @@ #include struct pcpu_chunk { +#ifdef CONFIG_PERCPU_STATS + int nr_alloc; /* # of allocations */ + size_t max_alloc_size; /* largest allocation size */ +#endif + struct list_head list; /* linked to pcpu_slot lists */ int free_size; /* free bytes in the chunk */ int contig_hint; /* max contiguous size hint */ @@ -18,6 +23,11 @@ struct pcpu_chunk { void *data; /* chunk data */ int first_free; /* no free below this */ bool immutable; /* no [de]population allowed */ + bool has_reserved; /* Indicates if chunk has reserved space + at the beginning. Reserved chunk will + contain reservation for static chunk. + Dynamic chunk will contain reservation + for static and reserved chunks. */ int nr_populated; /* # of populated pages */ unsigned long populated[]; /* populated bitmap */ }; @@ -30,4 +40,125 @@ extern int pcpu_nr_slots; extern struct pcpu_chunk *pcpu_first_chunk; extern struct pcpu_chunk *pcpu_reserved_chunk; +#ifdef CONFIG_PERCPU_STATS + +#include + +struct percpu_stats { + u64 nr_alloc; /* lifetime # of allocations */ + u64 nr_dealloc; /* lifetime # of deallocations */ + u64 nr_cur_alloc; /* current # of allocations */ + u64 nr_max_alloc; /* max # of live allocations */ + u32 nr_chunks; /* current # of live chunks */ + u32 nr_max_chunks; /* max # of live chunks */ + size_t min_alloc_size; /* min allocaiton size */ + size_t max_alloc_size; /* max allocation size */ +}; + +extern struct percpu_stats pcpu_stats; +extern struct pcpu_alloc_info pcpu_stats_ai; + +/* + * For debug purposes. We don't care about the flexible array. + */ +static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai) +{ + memcpy(&pcpu_stats_ai, ai, sizeof(struct pcpu_alloc_info)); + + /* initialize min_alloc_size to unit_size */ + pcpu_stats.min_alloc_size = pcpu_stats_ai.unit_size; +} + +/* + * pcpu_stats_area_alloc - increment area allocation stats + * @chunk: the location of the area being allocated + * @size: size of area to allocate in bytes + * + * CONTEXT: + * pcpu_lock. + */ +static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size) +{ + lockdep_assert_held(&pcpu_lock); + + pcpu_stats.nr_alloc++; + pcpu_stats.nr_cur_alloc++; + pcpu_stats.nr_max_alloc = + max(pcpu_stats.nr_max_alloc, pcpu_stats.nr_cur_alloc); + pcpu_stats.min_alloc_size = + min(pcpu_stats.min_alloc_size, size); + pcpu_stats.max_alloc_size = + max(pcpu_stats.max_alloc_size, size); + + chunk->nr_alloc++; + chunk->max_alloc_size = max(chunk->max_alloc_size, size); +} + +/* + * pcpu_stats_area_dealloc - decrement allocation stats + * @chunk: the location of the area being deallocated + * + * CONTEXT: + * pcpu_lock. + */ +static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk) +{ + lockdep_assert_held(&pcpu_lock); + + pcpu_stats.nr_dealloc++; + pcpu_stats.nr_cur_alloc--; + + chunk->nr_alloc--; +} + +/* + * pcpu_stats_chunk_alloc - increment chunk stats + */ +static inline void pcpu_stats_chunk_alloc(void) +{ + spin_lock_irq(&pcpu_lock); + + pcpu_stats.nr_chunks++; + pcpu_stats.nr_max_chunks = + max(pcpu_stats.nr_max_chunks, pcpu_stats.nr_chunks); + + spin_unlock_irq(&pcpu_lock); +} + +/* + * pcpu_stats_chunk_dealloc - decrement chunk stats + */ +static inline void pcpu_stats_chunk_dealloc(void) +{ + spin_lock_irq(&pcpu_lock); + + pcpu_stats.nr_chunks--; + + spin_unlock_irq(&pcpu_lock); +} + +#else + +static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai) +{ +} + +static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size) +{ +} + +static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk) +{ +} + +static inline void pcpu_stats_chunk_alloc(void) +{ +} + +static inline void pcpu_stats_chunk_dealloc(void) +{ +} + +#endif /* !CONFIG_PERCPU_STATS */ + #endif diff --git a/mm/percpu-km.c b/mm/percpu-km.c index d66911ff42d9..3bbfa0c9d069 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -72,6 +72,8 @@ static struct pcpu_chunk *pcpu_create_chunk(void) pcpu_chunk_populated(chunk, 0, nr_pages); spin_unlock_irq(&pcpu_lock); + pcpu_stats_chunk_alloc(); + return chunk; } @@ -79,6 +81,8 @@ static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) { const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; + pcpu_stats_chunk_dealloc(); + if (chunk && chunk->data) __free_pages(chunk->data, order_base_2(nr_pages)); pcpu_free_chunk(chunk); diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c new file mode 100644 index 000000000000..03524a56eeff --- /dev/null +++ b/mm/percpu-stats.c @@ -0,0 +1,222 @@ +/* + * mm/percpu-debug.c + * + * Copyright (C) 2017 Facebook Inc. + * Copyright (C) 2017 Dennis Zhou + * + * This file is released under the GPLv2. + * + * Prints statistics about the percpu allocator and backing chunks. + */ +#include +#include +#include +#include +#include +#include + +#include "percpu-internal.h" + +#define P(X, Y) \ + seq_printf(m, " %-24s: %8lld\n", X, (long long int)Y) + +struct percpu_stats pcpu_stats; +struct pcpu_alloc_info pcpu_stats_ai; + +static int cmpint(const void *a, const void *b) +{ + return *(int *)a - *(int *)b; +} + +/* + * Iterates over all chunks to find the max # of map entries used. + */ +static int find_max_map_used(void) +{ + struct pcpu_chunk *chunk; + int slot, max_map_used; + + max_map_used = 0; + for (slot = 0; slot < pcpu_nr_slots; slot++) + list_for_each_entry(chunk, &pcpu_slot[slot], list) + max_map_used = max(max_map_used, chunk->map_used); + + return max_map_used; +} + +/* + * Prints out chunk state. Fragmentation is considered between + * the beginning of the chunk to the last allocation. + */ +static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk, + void *buffer) +{ + int i, s_index, last_alloc, alloc_sign, as_len; + int *alloc_sizes, *p; + /* statistics */ + int sum_frag = 0, max_frag = 0; + int cur_min_alloc = 0, cur_med_alloc = 0, cur_max_alloc = 0; + + alloc_sizes = buffer; + s_index = chunk->has_reserved ? 1 : 0; + + /* find last allocation */ + last_alloc = -1; + for (i = chunk->map_used - 1; i >= s_index; i--) { + if (chunk->map[i] & 1) { + last_alloc = i; + break; + } + } + + /* if the chunk is not empty - ignoring reserve */ + if (last_alloc >= s_index) { + as_len = last_alloc + 1 - s_index; + + /* + * Iterate through chunk map computing size info. + * The first bit is overloaded to be a used flag. + * negative = free space, positive = allocated + */ + for (i = 0, p = chunk->map + s_index; i < as_len; i++, p++) { + alloc_sign = (*p & 1) ? 1 : -1; + alloc_sizes[i] = alloc_sign * + ((p[1] & ~1) - (p[0] & ~1)); + } + + sort(alloc_sizes, as_len, sizeof(chunk->map[0]), cmpint, NULL); + + /* Iterate through the unallocated fragements. */ + for (i = 0, p = alloc_sizes; *p < 0 && i < as_len; i++, p++) { + sum_frag -= *p; + max_frag = max(max_frag, -1 * (*p)); + } + + cur_min_alloc = alloc_sizes[i]; + cur_med_alloc = alloc_sizes[(i + as_len - 1) / 2]; + cur_max_alloc = alloc_sizes[as_len - 1]; + } + + P("nr_alloc", chunk->nr_alloc); + P("max_alloc_size", chunk->max_alloc_size); + P("free_size", chunk->free_size); + P("contig_hint", chunk->contig_hint); + P("sum_frag", sum_frag); + P("max_frag", max_frag); + P("cur_min_alloc", cur_min_alloc); + P("cur_med_alloc", cur_med_alloc); + P("cur_max_alloc", cur_max_alloc); + seq_putc(m, '\n'); +} + +static int percpu_stats_show(struct seq_file *m, void *v) +{ + struct pcpu_chunk *chunk; + int slot, max_map_used; + void *buffer; + +alloc_buffer: + spin_lock_irq(&pcpu_lock); + max_map_used = find_max_map_used(); + spin_unlock_irq(&pcpu_lock); + + buffer = vmalloc(max_map_used * sizeof(pcpu_first_chunk->map[0])); + if (!buffer) + return -ENOMEM; + + spin_lock_irq(&pcpu_lock); + + /* if the buffer allocated earlier is too small */ + if (max_map_used < find_max_map_used()) { + spin_unlock_irq(&pcpu_lock); + vfree(buffer); + goto alloc_buffer; + } + +#define PL(X) \ + seq_printf(m, " %-24s: %8lld\n", #X, (long long int)pcpu_stats_ai.X) + + seq_printf(m, + "Percpu Memory Statistics\n" + "Allocation Info:\n" + "----------------------------------------\n"); + PL(unit_size); + PL(static_size); + PL(reserved_size); + PL(dyn_size); + PL(atom_size); + PL(alloc_size); + seq_putc(m, '\n'); + +#undef PL + +#define PU(X) \ + seq_printf(m, " %-18s: %14llu\n", #X, (unsigned long long)pcpu_stats.X) + + seq_printf(m, + "Global Stats:\n" + "----------------------------------------\n"); + PU(nr_alloc); + PU(nr_dealloc); + PU(nr_cur_alloc); + PU(nr_max_alloc); + PU(nr_chunks); + PU(nr_max_chunks); + PU(min_alloc_size); + PU(max_alloc_size); + seq_putc(m, '\n'); + +#undef PU + + seq_printf(m, + "Per Chunk Stats:\n" + "----------------------------------------\n"); + + if (pcpu_reserved_chunk) { + seq_puts(m, "Chunk: <- Reserved Chunk\n"); + chunk_map_stats(m, pcpu_reserved_chunk, buffer); + } + + for (slot = 0; slot < pcpu_nr_slots; slot++) { + list_for_each_entry(chunk, &pcpu_slot[slot], list) { + if (chunk == pcpu_first_chunk) { + seq_puts(m, "Chunk: <- First Chunk\n"); + chunk_map_stats(m, chunk, buffer); + + + } else { + seq_puts(m, "Chunk:\n"); + chunk_map_stats(m, chunk, buffer); + } + + } + } + + spin_unlock_irq(&pcpu_lock); + + vfree(buffer); + + return 0; +} + +static int percpu_stats_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, percpu_stats_show, NULL); +} + +static const struct file_operations percpu_stats_fops = { + .open = percpu_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init init_percpu_stats_debugfs(void) +{ + debugfs_create_file("percpu_stats", 0444, NULL, NULL, + &percpu_stats_fops); + + return 0; +} + +late_initcall(init_percpu_stats_debugfs); diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 9ac639499bd1..5915a224da52 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -343,11 +343,16 @@ static struct pcpu_chunk *pcpu_create_chunk(void) chunk->data = vms; chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; + + pcpu_stats_chunk_alloc(); + return chunk; } static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) { + pcpu_stats_chunk_dealloc(); + if (chunk && chunk->data) pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); pcpu_free_chunk(chunk); diff --git a/mm/percpu.c b/mm/percpu.c index 75ac982c19df..44a1cadf74a7 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -657,6 +657,7 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme, int *p; lockdep_assert_held(&pcpu_lock); + pcpu_stats_area_dealloc(chunk); freeme |= 1; /* we are searching for pair */ @@ -721,6 +722,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) chunk->map[0] = 0; chunk->map[1] = pcpu_unit_size | 1; chunk->map_used = 1; + chunk->has_reserved = false; INIT_LIST_HEAD(&chunk->list); INIT_LIST_HEAD(&chunk->map_extend_list); @@ -970,6 +972,7 @@ restart: goto restart; area_found: + pcpu_stats_area_alloc(chunk, size); spin_unlock_irqrestore(&pcpu_lock, flags); /* populate if not all pages are already there */ @@ -1642,6 +1645,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); + pcpu_stats_save_ai(ai); + /* * Allocate chunk slots. The additional last slot is for * empty chunks. @@ -1685,6 +1690,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, if (schunk->free_size) schunk->map[++schunk->map_used] = ai->static_size + schunk->free_size; schunk->map[schunk->map_used] |= 1; + schunk->has_reserved = true; /* init dynamic chunk if necessary */ if (dyn_size) { @@ -1703,6 +1709,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, dchunk->map[1] = pcpu_reserved_chunk_limit; dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1; dchunk->map_used = 2; + dchunk->has_reserved = true; } /* link the first chunk in */ @@ -1711,6 +1718,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, pcpu_count_occupied_pages(pcpu_first_chunk, 1); pcpu_chunk_relocate(pcpu_first_chunk, -1); + pcpu_stats_chunk_alloc(); + /* we're done */ pcpu_base_addr = base_addr; return 0; From df95e795a722892a9e0603ce4b9b62fab9f02967 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Mon, 19 Jun 2017 19:28:32 -0400 Subject: [PATCH 5/9] percpu: add tracepoint support for percpu memory Add support for tracepoints to the following events: chunk allocation, chunk free, area allocation, area free, and area allocation failure. This should let us replay percpu memory requests and evaluate corresponding decisions. Signed-off-by: Dennis Zhou Signed-off-by: Tejun Heo --- include/trace/events/percpu.h | 125 ++++++++++++++++++++++++++++++++++ mm/percpu-km.c | 2 + mm/percpu-vm.c | 2 + mm/percpu.c | 12 ++++ 4 files changed, 141 insertions(+) create mode 100644 include/trace/events/percpu.h diff --git a/include/trace/events/percpu.h b/include/trace/events/percpu.h new file mode 100644 index 000000000000..ad34b1bae047 --- /dev/null +++ b/include/trace/events/percpu.h @@ -0,0 +1,125 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM percpu + +#if !defined(_TRACE_PERCPU_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PERCPU_H + +#include + +TRACE_EVENT(percpu_alloc_percpu, + + TP_PROTO(bool reserved, bool is_atomic, size_t size, + size_t align, void *base_addr, int off, void __percpu *ptr), + + TP_ARGS(reserved, is_atomic, size, align, base_addr, off, ptr), + + TP_STRUCT__entry( + __field( bool, reserved ) + __field( bool, is_atomic ) + __field( size_t, size ) + __field( size_t, align ) + __field( void *, base_addr ) + __field( int, off ) + __field( void __percpu *, ptr ) + ), + + TP_fast_assign( + __entry->reserved = reserved; + __entry->is_atomic = is_atomic; + __entry->size = size; + __entry->align = align; + __entry->base_addr = base_addr; + __entry->off = off; + __entry->ptr = ptr; + ), + + TP_printk("reserved=%d is_atomic=%d size=%zu align=%zu base_addr=%p off=%d ptr=%p", + __entry->reserved, __entry->is_atomic, + __entry->size, __entry->align, + __entry->base_addr, __entry->off, __entry->ptr) +); + +TRACE_EVENT(percpu_free_percpu, + + TP_PROTO(void *base_addr, int off, void __percpu *ptr), + + TP_ARGS(base_addr, off, ptr), + + TP_STRUCT__entry( + __field( void *, base_addr ) + __field( int, off ) + __field( void __percpu *, ptr ) + ), + + TP_fast_assign( + __entry->base_addr = base_addr; + __entry->off = off; + __entry->ptr = ptr; + ), + + TP_printk("base_addr=%p off=%d ptr=%p", + __entry->base_addr, __entry->off, __entry->ptr) +); + +TRACE_EVENT(percpu_alloc_percpu_fail, + + TP_PROTO(bool reserved, bool is_atomic, size_t size, size_t align), + + TP_ARGS(reserved, is_atomic, size, align), + + TP_STRUCT__entry( + __field( bool, reserved ) + __field( bool, is_atomic ) + __field( size_t, size ) + __field( size_t, align ) + ), + + TP_fast_assign( + __entry->reserved = reserved; + __entry->is_atomic = is_atomic; + __entry->size = size; + __entry->align = align; + ), + + TP_printk("reserved=%d is_atomic=%d size=%zu align=%zu", + __entry->reserved, __entry->is_atomic, + __entry->size, __entry->align) +); + +TRACE_EVENT(percpu_create_chunk, + + TP_PROTO(void *base_addr), + + TP_ARGS(base_addr), + + TP_STRUCT__entry( + __field( void *, base_addr ) + ), + + TP_fast_assign( + __entry->base_addr = base_addr; + ), + + TP_printk("base_addr=%p", __entry->base_addr) +); + +TRACE_EVENT(percpu_destroy_chunk, + + TP_PROTO(void *base_addr), + + TP_ARGS(base_addr), + + TP_STRUCT__entry( + __field( void *, base_addr ) + ), + + TP_fast_assign( + __entry->base_addr = base_addr; + ), + + TP_printk("base_addr=%p", __entry->base_addr) +); + +#endif /* _TRACE_PERCPU_H */ + +#include diff --git a/mm/percpu-km.c b/mm/percpu-km.c index 3bbfa0c9d069..2b79e43c626f 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -73,6 +73,7 @@ static struct pcpu_chunk *pcpu_create_chunk(void) spin_unlock_irq(&pcpu_lock); pcpu_stats_chunk_alloc(); + trace_percpu_create_chunk(chunk->base_addr); return chunk; } @@ -82,6 +83,7 @@ static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; pcpu_stats_chunk_dealloc(); + trace_percpu_destroy_chunk(chunk->base_addr); if (chunk && chunk->data) __free_pages(chunk->data, order_base_2(nr_pages)); diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 5915a224da52..7ad9d94bf547 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -345,6 +345,7 @@ static struct pcpu_chunk *pcpu_create_chunk(void) chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; pcpu_stats_chunk_alloc(); + trace_percpu_create_chunk(chunk->base_addr); return chunk; } @@ -352,6 +353,7 @@ static struct pcpu_chunk *pcpu_create_chunk(void) static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) { pcpu_stats_chunk_dealloc(); + trace_percpu_destroy_chunk(chunk->base_addr); if (chunk && chunk->data) pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); diff --git a/mm/percpu.c b/mm/percpu.c index 44a1cadf74a7..a5bc3634d2a9 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -76,6 +76,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + #include "percpu-internal.h" #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ @@ -1015,11 +1018,17 @@ area_found: ptr = __addr_to_pcpu_ptr(chunk->base_addr + off); kmemleak_alloc_percpu(ptr, size, gfp); + + trace_percpu_alloc_percpu(reserved, is_atomic, size, align, + chunk->base_addr, off, ptr); + return ptr; fail_unlock: spin_unlock_irqrestore(&pcpu_lock, flags); fail: + trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align); + if (!is_atomic && warn_limit) { pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", size, align, is_atomic, err); @@ -1269,6 +1278,8 @@ void free_percpu(void __percpu *ptr) } } + trace_percpu_free_percpu(chunk->base_addr, off, ptr); + spin_unlock_irqrestore(&pcpu_lock, flags); } EXPORT_SYMBOL_GPL(free_percpu); @@ -1719,6 +1730,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, pcpu_chunk_relocate(pcpu_first_chunk, -1); pcpu_stats_chunk_alloc(); + trace_percpu_create_chunk(base_addr); /* we're done */ pcpu_base_addr = base_addr; From 104b4e5139fe384431ac11c3b8a6cf4a529edf4a Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 20 Jun 2017 21:01:20 +0300 Subject: [PATCH 6/9] percpu_counter: Rename __percpu_counter_add to percpu_counter_add_batch Currently, percpu_counter_add is a wrapper around __percpu_counter_add which is preempt safe due to explicit calls to preempt_disable. Given how __ prefix is used in percpu related interfaces, the naming unfortunately creates the false sense that __percpu_counter_add is less safe than percpu_counter_add. In terms of context-safety, they're equivalent. The only difference is that the __ version takes a batch parameter. Make this a bit more explicit by just renaming __percpu_counter_add to percpu_counter_add_batch. This patch doesn't cause any functional changes. tj: Minor updates to patch description for clarity. Cosmetic indentation updates. Signed-off-by: Nikolay Borisov Signed-off-by: Tejun Heo Cc: Chris Mason Cc: Josef Bacik Cc: David Sterba Cc: Darrick J. Wong Cc: Jan Kara Cc: Jens Axboe Cc: linux-mm@kvack.org Cc: "David S. Miller" --- fs/btrfs/disk-io.c | 12 ++++++------ fs/btrfs/extent_io.c | 6 +++--- fs/btrfs/inode.c | 8 ++++---- fs/xfs/xfs_mount.c | 4 ++-- include/linux/backing-dev.h | 2 +- include/linux/blk-cgroup.h | 6 +++--- include/linux/mman.h | 2 +- include/linux/percpu_counter.h | 7 ++++--- include/net/inet_frag.h | 4 ++-- lib/flex_proportions.c | 6 +++--- lib/percpu_counter.c | 4 ++-- 11 files changed, 31 insertions(+), 30 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 061c1d1f774f..eb5c95569300 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1255,9 +1255,9 @@ void clean_tree_block(struct btrfs_fs_info *fs_info, btrfs_assert_tree_locked(buf); if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { - __percpu_counter_add(&fs_info->dirty_metadata_bytes, - -buf->len, - fs_info->dirty_metadata_batch); + percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, + -buf->len, + fs_info->dirty_metadata_batch); /* ugh, clear_extent_buffer_dirty needs to lock the page */ btrfs_set_lock_blocking(buf); clear_extent_buffer_dirty(buf); @@ -4046,9 +4046,9 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) buf->start, transid, fs_info->generation); was_dirty = set_extent_buffer_dirty(buf); if (!was_dirty) - __percpu_counter_add(&fs_info->dirty_metadata_bytes, - buf->len, - fs_info->dirty_metadata_batch); + percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, + buf->len, + fs_info->dirty_metadata_batch); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { btrfs_print_leaf(fs_info, buf); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 27fdb250b446..a3455d216a1d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3584,9 +3584,9 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); spin_unlock(&eb->refs_lock); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); - __percpu_counter_add(&fs_info->dirty_metadata_bytes, - -eb->len, - fs_info->dirty_metadata_batch); + percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, + -eb->len, + fs_info->dirty_metadata_batch); ret = 1; } else { spin_unlock(&eb->refs_lock); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5e71f1ea3391..aabdcb9f234f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1682,8 +1682,8 @@ static void btrfs_set_bit_hook(struct inode *inode, if (btrfs_is_testing(fs_info)) return; - __percpu_counter_add(&fs_info->delalloc_bytes, len, - fs_info->delalloc_batch); + percpu_counter_add_batch(&fs_info->delalloc_bytes, len, + fs_info->delalloc_batch); spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->delalloc_bytes += len; if (*bits & EXTENT_DEFRAG) @@ -1749,8 +1749,8 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode, &inode->vfs_inode, state->start, len); - __percpu_counter_add(&fs_info->delalloc_bytes, -len, - fs_info->delalloc_batch); + percpu_counter_add_batch(&fs_info->delalloc_bytes, -len, + fs_info->delalloc_batch); spin_lock(&inode->lock); inode->delalloc_bytes -= len; if (do_list && inode->delalloc_bytes == 0 && diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 2eaf81859166..7147d4a8d207 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1209,7 +1209,7 @@ xfs_mod_icount( struct xfs_mount *mp, int64_t delta) { - __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH); + percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH); if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) { ASSERT(0); percpu_counter_add(&mp->m_icount, -delta); @@ -1288,7 +1288,7 @@ xfs_mod_fdblocks( else batch = XFS_FDBLOCKS_BATCH; - __percpu_counter_add(&mp->m_fdblocks, delta, batch); + percpu_counter_add_batch(&mp->m_fdblocks, delta, batch); if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside, XFS_FDBLOCKS_BATCH) >= 0) { /* we had space! */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 557d84063934..ace73f96eb1e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -66,7 +66,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi) static inline void __add_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item, s64 amount) { - __percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH); + percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); } static inline void __inc_wb_stat(struct bdi_writeback *wb, diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 01b62e7bac74..7104bea8dab1 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -518,7 +518,7 @@ static inline void blkg_stat_exit(struct blkg_stat *stat) */ static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) { - __percpu_counter_add(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); + percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); } /** @@ -597,14 +597,14 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; - __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); + percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); if (op_is_sync(op)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; - __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); + percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); } /** diff --git a/include/linux/mman.h b/include/linux/mman.h index 634c4c51fe3a..c8367041fafd 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -22,7 +22,7 @@ unsigned long vm_memory_committed(void); static inline void vm_acct_memory(long pages) { - __percpu_counter_add(&vm_committed_as, pages, vm_committed_as_batch); + percpu_counter_add_batch(&vm_committed_as, pages, vm_committed_as_batch); } static inline void vm_unacct_memory(long pages) diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 84a109449610..ec065387f443 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -39,7 +39,8 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); -void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); +void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, + s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); @@ -50,7 +51,7 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { - __percpu_counter_add(fbc, amount, percpu_counter_batch); + percpu_counter_add_batch(fbc, amount, percpu_counter_batch); } static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) @@ -136,7 +137,7 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount) } static inline void -__percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) +percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { percpu_counter_add(fbc, amount); } diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 5894730ec82a..5932e6de8fc0 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -154,12 +154,12 @@ static inline int frag_mem_limit(struct netns_frags *nf) static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch); + percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch); } static inline void add_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch); + percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch); } static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index a71cf1bdd4c9..2cc1f94e03a1 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -207,7 +207,7 @@ static void fprop_reflect_period_percpu(struct fprop_global *p, if (val < (nr_cpu_ids * PROP_BATCH)) val = percpu_counter_sum(&pl->events); - __percpu_counter_add(&pl->events, + percpu_counter_add_batch(&pl->events, -val + (val >> (period-pl->period)), PROP_BATCH); } else percpu_counter_set(&pl->events, 0); @@ -219,7 +219,7 @@ static void fprop_reflect_period_percpu(struct fprop_global *p, void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) { fprop_reflect_period_percpu(p, pl); - __percpu_counter_add(&pl->events, 1, PROP_BATCH); + percpu_counter_add_batch(&pl->events, 1, PROP_BATCH); percpu_counter_add(&p->events, 1); } @@ -267,6 +267,6 @@ void __fprop_inc_percpu_max(struct fprop_global *p, return; } else fprop_reflect_period_percpu(p, pl); - __percpu_counter_add(&pl->events, 1, PROP_BATCH); + percpu_counter_add_batch(&pl->events, 1, PROP_BATCH); percpu_counter_add(&p->events, 1); } diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 9c21000df0b5..8ee7e5ec21be 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -72,7 +72,7 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount) } EXPORT_SYMBOL(percpu_counter_set); -void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) +void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { s64 count; @@ -89,7 +89,7 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) } preempt_enable(); } -EXPORT_SYMBOL(__percpu_counter_add); +EXPORT_SYMBOL(percpu_counter_add_batch); /* * Add up all the per-cpu counts, return the result. This is a more accurate From 11df02bf9bc1f6fd8416d22c08275e31f8c4f30d Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 21 Jun 2017 11:51:09 -0400 Subject: [PATCH 7/9] percpu: resolve err may not be initialized in pcpu_alloc From 4a42ecc735cff0015cc73c3d87edede631f4b885 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 21 Jun 2017 08:07:15 -0700 Add error message to out of space failure for atomic allocations in percpu allocation path to fix -Wmaybe-uninitialized. Signed-off-by: Dennis Zhou Reported-by: Stephen Rothwell Signed-off-by: Tejun Heo --- mm/percpu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/percpu.c b/mm/percpu.c index a5bc3634d2a9..bd4130a69bbc 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -956,8 +956,10 @@ restart: * tasks to create chunks simultaneously. Serialize and create iff * there's still no empty chunk after grabbing the mutex. */ - if (is_atomic) + if (is_atomic) { + err = "atomic alloc failed, no space left"; goto fail; + } if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { chunk = pcpu_create_chunk(); From 303abfdf76ea41c228e8b3da73ed3807121a9ca6 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 21 Jun 2017 13:52:46 -0400 Subject: [PATCH 8/9] percpu: fix early calls for spinlock in pcpu_stats From 2c06e795162cb306c9707ec51d3e1deadb37f573 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 21 Jun 2017 10:17:09 -0700 Commit 30a5b5367ef9 ("percpu: expose statistics about percpu memory via debugfs") introduces percpu memory statistics. pcpu_stats_chunk_alloc takes the spin lock and disables/enables irqs on creation of a chunk. Irqs are not enabled when the first chunk is initialized and thus kernels are failing to boot with kernel debugging enabled. Fixed by changing _irq to _irqsave and _irqrestore. Fixes: 30a5b5367ef9 ("percpu: expose statistics about percpu memory via debugfs") Signed-off-by: Dennis Zhou Reported-by: Alexander Levin Signed-off-by: Tejun Heo --- mm/percpu-internal.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index d030fce745a2..cd2442e13d8f 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -116,13 +116,14 @@ static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk) */ static inline void pcpu_stats_chunk_alloc(void) { - spin_lock_irq(&pcpu_lock); + unsigned long flags; + spin_lock_irqsave(&pcpu_lock, flags); pcpu_stats.nr_chunks++; pcpu_stats.nr_max_chunks = max(pcpu_stats.nr_max_chunks, pcpu_stats.nr_chunks); - spin_unlock_irq(&pcpu_lock); + spin_unlock_irqrestore(&pcpu_lock, flags); } /* @@ -130,11 +131,12 @@ static inline void pcpu_stats_chunk_alloc(void) */ static inline void pcpu_stats_chunk_dealloc(void) { - spin_lock_irq(&pcpu_lock); + unsigned long flags; + spin_lock_irqsave(&pcpu_lock, flags); pcpu_stats.nr_chunks--; - spin_unlock_irq(&pcpu_lock); + spin_unlock_irqrestore(&pcpu_lock, flags); } #else From e3efe3db932b55ed34ba32862f568abae32046d0 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Thu, 29 Jun 2017 10:56:26 -0400 Subject: [PATCH 9/9] percpu: fix static checker warnings in pcpu_destroy_chunk From 5021b97f4026334d2c8dfad80797dd1028cddd73 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Thu, 29 Jun 2017 07:11:41 -0700 Add NULL check in pcpu_destroy_chunk to correct static checker warnings. Signed-off-by: Dennis Zhou Reported-by: Dan Carpenter Signed-off-by: Tejun Heo --- mm/percpu-km.c | 5 ++++- mm/percpu-vm.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/percpu-km.c b/mm/percpu-km.c index 2b79e43c626f..eb58aa4c0997 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -82,10 +82,13 @@ static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) { const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; + if (!chunk) + return; + pcpu_stats_chunk_dealloc(); trace_percpu_destroy_chunk(chunk->base_addr); - if (chunk && chunk->data) + if (chunk->data) __free_pages(chunk->data, order_base_2(nr_pages)); pcpu_free_chunk(chunk); } diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 7ad9d94bf547..15dab691ea70 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -352,10 +352,13 @@ static struct pcpu_chunk *pcpu_create_chunk(void) static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) { + if (!chunk) + return; + pcpu_stats_chunk_dealloc(); trace_percpu_destroy_chunk(chunk->base_addr); - if (chunk && chunk->data) + if (chunk->data) pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); pcpu_free_chunk(chunk); }