diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 42e0062d880b..0dad17d26ce2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -575,35 +575,106 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) return mz; } +/* Subset of node_stat_item for memcg stats */ +static const unsigned int memcg_node_stat_items[] = { + NR_INACTIVE_ANON, + NR_ACTIVE_ANON, + NR_INACTIVE_FILE, + NR_ACTIVE_FILE, + NR_UNEVICTABLE, + NR_SLAB_RECLAIMABLE_B, + NR_SLAB_UNRECLAIMABLE_B, + WORKINGSET_REFAULT_ANON, + WORKINGSET_REFAULT_FILE, + WORKINGSET_ACTIVATE_ANON, + WORKINGSET_ACTIVATE_FILE, + WORKINGSET_RESTORE_ANON, + WORKINGSET_RESTORE_FILE, + WORKINGSET_NODERECLAIM, + NR_ANON_MAPPED, + NR_FILE_MAPPED, + NR_FILE_PAGES, + NR_FILE_DIRTY, + NR_WRITEBACK, + NR_SHMEM, + NR_SHMEM_THPS, + NR_FILE_THPS, + NR_ANON_THPS, + NR_KERNEL_STACK_KB, + NR_PAGETABLE, + NR_SECONDARY_PAGETABLE, +#ifdef CONFIG_SWAP + NR_SWAPCACHE, +#endif +}; + +static const unsigned int memcg_stat_items[] = { + MEMCG_SWAP, + MEMCG_SOCK, + MEMCG_PERCPU_B, + MEMCG_VMALLOC, + MEMCG_KMEM, + MEMCG_ZSWAP_B, + MEMCG_ZSWAPPED, +}; + +#define NR_MEMCG_NODE_STAT_ITEMS ARRAY_SIZE(memcg_node_stat_items) +#define MEMCG_VMSTAT_SIZE (NR_MEMCG_NODE_STAT_ITEMS + \ + ARRAY_SIZE(memcg_stat_items)) +static int8_t mem_cgroup_stats_index[MEMCG_NR_STAT] __read_mostly; + +static void init_memcg_stats(void) +{ + int8_t i, j = 0; + + BUILD_BUG_ON(MEMCG_NR_STAT >= S8_MAX); + + for (i = 0; i < NR_MEMCG_NODE_STAT_ITEMS; ++i) + mem_cgroup_stats_index[memcg_node_stat_items[i]] = ++j; + + for (i = 0; i < ARRAY_SIZE(memcg_stat_items); ++i) + mem_cgroup_stats_index[memcg_stat_items[i]] = ++j; +} + +static inline int memcg_stats_index(int idx) +{ + return mem_cgroup_stats_index[idx] - 1; +} + struct lruvec_stats_percpu { /* Local (CPU and cgroup) state */ - long state[NR_VM_NODE_STAT_ITEMS]; + long state[NR_MEMCG_NODE_STAT_ITEMS]; /* Delta calculation for lockless upward propagation */ - long state_prev[NR_VM_NODE_STAT_ITEMS]; + long state_prev[NR_MEMCG_NODE_STAT_ITEMS]; }; struct lruvec_stats { /* Aggregated (CPU and subtree) state */ - long state[NR_VM_NODE_STAT_ITEMS]; + long state[NR_MEMCG_NODE_STAT_ITEMS]; /* Non-hierarchical (CPU aggregated) state */ - long state_local[NR_VM_NODE_STAT_ITEMS]; + long state_local[NR_MEMCG_NODE_STAT_ITEMS]; /* Pending child counts during tree propagation */ - long state_pending[NR_VM_NODE_STAT_ITEMS]; + long state_pending[NR_MEMCG_NODE_STAT_ITEMS]; }; unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { struct mem_cgroup_per_node *pn; long x; + int i; if (mem_cgroup_disabled()) return node_page_state(lruvec_pgdat(lruvec), idx); + i = memcg_stats_index(idx); + if (i < 0) + return 0; + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - x = READ_ONCE(pn->lruvec_stats->state[idx]); + x = READ_ONCE(pn->lruvec_stats->state[i]); #ifdef CONFIG_SMP if (x < 0) x = 0; @@ -616,12 +687,17 @@ unsigned long lruvec_page_state_local(struct lruvec *lruvec, { struct mem_cgroup_per_node *pn; long x = 0; + int i; if (mem_cgroup_disabled()) return node_page_state(lruvec_pgdat(lruvec), idx); + i = memcg_stats_index(idx); + if (i < 0) + return 0; + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - x = READ_ONCE(pn->lruvec_stats->state_local[idx]); + x = READ_ONCE(pn->lruvec_stats->state_local[i]); #ifdef CONFIG_SMP if (x < 0) x = 0; @@ -688,11 +764,11 @@ struct memcg_vmstats_percpu { /* The above should fit a single cacheline for memcg_rstat_updated() */ /* Local (CPU and cgroup) page state & events */ - long state[MEMCG_NR_STAT]; + long state[MEMCG_VMSTAT_SIZE]; unsigned long events[NR_MEMCG_EVENTS]; /* Delta calculation for lockless upward propagation */ - long state_prev[MEMCG_NR_STAT]; + long state_prev[MEMCG_VMSTAT_SIZE]; unsigned long events_prev[NR_MEMCG_EVENTS]; /* Cgroup1: threshold notifications & softlimit tree updates */ @@ -702,15 +778,15 @@ struct memcg_vmstats_percpu { struct memcg_vmstats { /* Aggregated (CPU and subtree) page state & events */ - long state[MEMCG_NR_STAT]; + long state[MEMCG_VMSTAT_SIZE]; unsigned long events[NR_MEMCG_EVENTS]; /* Non-hierarchical (CPU aggregated) page state & events */ - long state_local[MEMCG_NR_STAT]; + long state_local[MEMCG_VMSTAT_SIZE]; unsigned long events_local[NR_MEMCG_EVENTS]; /* Pending child counts during tree propagation */ - long state_pending[MEMCG_NR_STAT]; + long state_pending[MEMCG_VMSTAT_SIZE]; unsigned long events_pending[NR_MEMCG_EVENTS]; /* Stats updates since the last flush */ @@ -843,7 +919,13 @@ static void flush_memcg_stats_dwork(struct work_struct *w) unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { - long x = READ_ONCE(memcg->vmstats->state[idx]); + long x; + int i = memcg_stats_index(idx); + + if (i < 0) + return 0; + + x = READ_ONCE(memcg->vmstats->state[i]); #ifdef CONFIG_SMP if (x < 0) x = 0; @@ -875,18 +957,25 @@ static int memcg_state_val_in_pages(int idx, int val) */ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) { - if (mem_cgroup_disabled()) + int i = memcg_stats_index(idx); + + if (mem_cgroup_disabled() || i < 0) return; - __this_cpu_add(memcg->vmstats_percpu->state[idx], val); + __this_cpu_add(memcg->vmstats_percpu->state[i], val); memcg_rstat_updated(memcg, memcg_state_val_in_pages(idx, val)); } /* idx can be of type enum memcg_stat_item or node_stat_item. */ static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx) { - long x = READ_ONCE(memcg->vmstats->state_local[idx]); + long x; + int i = memcg_stats_index(idx); + if (i < 0) + return 0; + + x = READ_ONCE(memcg->vmstats->state_local[i]); #ifdef CONFIG_SMP if (x < 0) x = 0; @@ -900,6 +989,10 @@ static void __mod_memcg_lruvec_state(struct lruvec *lruvec, { struct mem_cgroup_per_node *pn; struct mem_cgroup *memcg; + int i = memcg_stats_index(idx); + + if (i < 0) + return; pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); memcg = pn->memcg; @@ -926,10 +1019,10 @@ static void __mod_memcg_lruvec_state(struct lruvec *lruvec, } /* Update memcg */ - __this_cpu_add(memcg->vmstats_percpu->state[idx], val); + __this_cpu_add(memcg->vmstats_percpu->state[i], val); /* Update lruvec */ - __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val); + __this_cpu_add(pn->lruvec_stats_percpu->state[i], val); memcg_rstat_updated(memcg, memcg_state_val_in_pages(idx, val)); memcg_stats_unlock(); @@ -5697,6 +5790,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) page_counter_init(&memcg->kmem, &parent->kmem); page_counter_init(&memcg->tcpmem, &parent->tcpmem); } else { + init_memcg_stats(); init_memcg_events(); page_counter_init(&memcg->memory, NULL); page_counter_init(&memcg->swap, NULL); @@ -5868,7 +5962,7 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) statc = per_cpu_ptr(memcg->vmstats_percpu, cpu); - for (i = 0; i < MEMCG_NR_STAT; i++) { + for (i = 0; i < MEMCG_VMSTAT_SIZE; i++) { /* * Collect the aggregated propagation counts of groups * below us. We're in a per-cpu loop here and this is @@ -5932,7 +6026,7 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu); - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { + for (i = 0; i < NR_MEMCG_NODE_STAT_ITEMS; i++) { delta = lstats->state_pending[i]; if (delta) lstats->state_pending[i] = 0;