mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-26 12:26:11 +00:00
f1a7941243
Currently mm_struct maintains rss_stats which are updated on page fault and the unmapping codepaths. For page fault codepath the updates are cached per thread with the batch of TASK_RSS_EVENTS_THRESH which is 64. The reason for caching is performance for multithreaded applications otherwise the rss_stats updates may become hotspot for such applications. However this optimization comes with the cost of error margin in the rss stats. The rss_stats for applications with large number of threads can be very skewed. At worst the error margin is (nr_threads * 64) and we have a lot of applications with 100s of threads, so the error margin can be very high. Internally we had to reduce TASK_RSS_EVENTS_THRESH to 32. Recently we started seeing the unbounded errors for rss_stats for specific applications which use TCP rx0cp. It seems like vm_insert_pages() codepath does not sync rss_stats at all. This patch converts the rss_stats into percpu_counter to convert the error margin from (nr_threads * 64) to approximately (nr_cpus ^ 2). However this conversion enable us to get the accurate stats for situations where accuracy is more important than the cpu cost. This patch does not make such tradeoffs - we can just use percpu_counter_add_local() for the updates and percpu_counter_sum() (or percpu_counter_sync() + percpu_counter_read) for the readers. At the moment the readers are either procfs interface, oom_killer and memory reclaim which I think are not performance critical and should be ok with slow read. However I think we can make that change in a separate patch. Link: https://lkml.kernel.org/r/20221024052841.3291983-1-shakeelb@google.com Signed-off-by: Shakeel Butt <shakeelb@google.com> Cc: Marek Szyprowski <m.szyprowski@samsung.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
74 lines
2 KiB
C
74 lines
2 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_MM_TYPES_TASK_H
|
|
#define _LINUX_MM_TYPES_TASK_H
|
|
|
|
/*
|
|
* Here are the definitions of the MM data types that are embedded in 'struct task_struct'.
|
|
*
|
|
* (These are defined separately to decouple sched.h from mm_types.h as much as possible.)
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/threads.h>
|
|
#include <linux/atomic.h>
|
|
#include <linux/cpumask.h>
|
|
|
|
#include <asm/page.h>
|
|
|
|
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
|
#include <asm/tlbbatch.h>
|
|
#endif
|
|
|
|
#define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
|
|
#define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \
|
|
IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
|
|
#define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8)
|
|
|
|
/*
|
|
* When updating this, please also update struct resident_page_types[] in
|
|
* kernel/fork.c
|
|
*/
|
|
enum {
|
|
MM_FILEPAGES, /* Resident file mapping pages */
|
|
MM_ANONPAGES, /* Resident anonymous pages */
|
|
MM_SWAPENTS, /* Anonymous swap entries */
|
|
MM_SHMEMPAGES, /* Resident shared memory pages */
|
|
NR_MM_COUNTERS
|
|
};
|
|
|
|
struct page_frag {
|
|
struct page *page;
|
|
#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
|
|
__u32 offset;
|
|
__u32 size;
|
|
#else
|
|
__u16 offset;
|
|
__u16 size;
|
|
#endif
|
|
};
|
|
|
|
/* Track pages that require TLB flushes */
|
|
struct tlbflush_unmap_batch {
|
|
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
|
/*
|
|
* The arch code makes the following promise: generic code can modify a
|
|
* PTE, then call arch_tlbbatch_add_mm() (which internally provides all
|
|
* needed barriers), then call arch_tlbbatch_flush(), and the entries
|
|
* will be flushed on all CPUs by the time that arch_tlbbatch_flush()
|
|
* returns.
|
|
*/
|
|
struct arch_tlbflush_unmap_batch arch;
|
|
|
|
/* True if a flush is needed. */
|
|
bool flush_required;
|
|
|
|
/*
|
|
* If true then the PTE was dirty when unmapped. The entry must be
|
|
* flushed before IO is initiated or a stale TLB entry potentially
|
|
* allows an update without redirtying the page.
|
|
*/
|
|
bool writable;
|
|
#endif
|
|
};
|
|
|
|
#endif /* _LINUX_MM_TYPES_TASK_H */
|