diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 03c503def835..c5aa0c2cb68b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -631,6 +631,9 @@ struct memcg_vmstats_percpu { /* Cgroup1: threshold notifications & softlimit tree updates */ unsigned long nr_page_events; unsigned long targets[MEM_CGROUP_NTARGETS]; + + /* Stats updates since the last flush */ + unsigned int stats_updates; }; struct memcg_vmstats { @@ -645,6 +648,9 @@ struct memcg_vmstats { /* Pending child counts during tree propagation */ long state_pending[MEMCG_NR_STAT]; unsigned long events_pending[NR_MEMCG_EVENTS]; + + /* Stats updates since the last flush */ + atomic64_t stats_updates; }; /* @@ -664,9 +670,7 @@ struct memcg_vmstats { */ static void flush_memcg_stats_dwork(struct work_struct *w); static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork); -static DEFINE_PER_CPU(unsigned int, stats_updates); static atomic_t stats_flush_ongoing = ATOMIC_INIT(0); -static atomic_t stats_flush_threshold = ATOMIC_INIT(0); static u64 flush_last_time; #define FLUSH_TIME (2UL*HZ) @@ -693,26 +697,37 @@ static void memcg_stats_unlock(void) preempt_enable_nested(); } + +static bool memcg_should_flush_stats(struct mem_cgroup *memcg) +{ + return atomic64_read(&memcg->vmstats->stats_updates) > + MEMCG_CHARGE_BATCH * num_online_cpus(); +} + static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val) { + int cpu = smp_processor_id(); unsigned int x; if (!val) return; - cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id()); + cgroup_rstat_updated(memcg->css.cgroup, cpu); + + for (; memcg; memcg = parent_mem_cgroup(memcg)) { + x = __this_cpu_add_return(memcg->vmstats_percpu->stats_updates, + abs(val)); + + if (x < MEMCG_CHARGE_BATCH) + continue; - x = __this_cpu_add_return(stats_updates, abs(val)); - if (x > MEMCG_CHARGE_BATCH) { /* - * If stats_flush_threshold exceeds the threshold - * (>num_online_cpus()), cgroup stats update will be triggered - * in __mem_cgroup_flush_stats(). Increasing this var further - * is redundant and simply adds overhead in atomic update. + * If @memcg is already flush-able, increasing stats_updates is + * redundant. Avoid the overhead of the atomic update. */ - if (atomic_read(&stats_flush_threshold) <= num_online_cpus()) - atomic_add(x / MEMCG_CHARGE_BATCH, &stats_flush_threshold); - __this_cpu_write(stats_updates, 0); + if (!memcg_should_flush_stats(memcg)) + atomic64_add(x, &memcg->vmstats->stats_updates); + __this_cpu_write(memcg->vmstats_percpu->stats_updates, 0); } } @@ -731,13 +746,12 @@ static void do_flush_stats(void) cgroup_rstat_flush(root_mem_cgroup->css.cgroup); - atomic_set(&stats_flush_threshold, 0); atomic_set(&stats_flush_ongoing, 0); } void mem_cgroup_flush_stats(void) { - if (atomic_read(&stats_flush_threshold) > num_online_cpus()) + if (memcg_should_flush_stats(root_mem_cgroup)) do_flush_stats(); } @@ -751,8 +765,8 @@ void mem_cgroup_flush_stats_ratelimited(void) static void flush_memcg_stats_dwork(struct work_struct *w) { /* - * Always flush here so that flushing in latency-sensitive paths is - * as cheap as possible. + * Deliberately ignore memcg_should_flush_stats() here so that flushing + * in latency-sensitive paths is as cheap as possible. */ do_flush_stats(); queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME); @@ -5788,6 +5802,10 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) } } } + statc->stats_updates = 0; + /* We are in a per-cpu loop here, only do the atomic write once */ + if (atomic64_read(&memcg->vmstats->stats_updates)) + atomic64_set(&memcg->vmstats->stats_updates, 0); } #ifdef CONFIG_MMU