linux-stable/include/linux/hugetlb_cgroup.h
Mina Almasry f477619990 hugetlb: add hugetlb.*.numa_stat file
For hugetlb backed jobs/VMs it's critical to understand the numa
information for the memory backing these jobs to deliver optimal
performance.

Currently this technically can be queried from /proc/self/numa_maps, but
there are significant issues with that.  Namely:

1. Memory can be mapped or unmapped.

2. numa_maps are per process and need to be aggregated across all
   processes in the cgroup.  For shared memory this is more involved as
   the userspace needs to make sure it doesn't double count shared
   mappings.

3. I believe querying numa_maps needs to hold the mmap_lock which adds
   to the contention on this lock.

For these reasons I propose simply adding hugetlb.*.numa_stat file,
   which shows the numa information of the cgroup similarly to
   memory.numa_stat.

On cgroup-v2:
   cat /sys/fs/cgroup/unified/test/hugetlb.2MB.numa_stat
   total=2097152 N0=2097152 N1=0

On cgroup-v1:
   cat /sys/fs/cgroup/hugetlb/test/hugetlb.2MB.numa_stat
   total=2097152 N0=2097152 N1=0
   hierarichal_total=2097152 N0=2097152 N1=0

This patch was tested manually by allocating hugetlb memory and querying
the hugetlb.*.numa_stat file of the cgroup and its parents.

[colin.i.king@googlemail.com: fix spelling mistake "hierarichal" -> "hierarchical"]
  Link: https://lkml.kernel.org/r/20211125090635.23508-1-colin.i.king@gmail.com
[keescook@chromium.org: fix copy/paste array assignment]
  Link: https://lkml.kernel.org/r/20211203065647.2819707-1-keescook@chromium.org

Link: https://lkml.kernel.org/r/20211123001020.4083653-1-almasrymina@google.com
Signed-off-by: Mina Almasry <almasrymina@google.com>
Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jue Wang <juew@google.com>
Cc: Yang Yao <ygyao@google.com>
Cc: Joanna Li <joannali@google.com>
Cc: Cannon Matthews <cannonmatthews@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-01-15 16:30:29 +02:00

297 lines
7.5 KiB
C

/*
* Copyright IBM Corporation, 2012
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
*/
#ifndef _LINUX_HUGETLB_CGROUP_H
#define _LINUX_HUGETLB_CGROUP_H
#include <linux/mmdebug.h>
struct hugetlb_cgroup;
struct resv_map;
struct file_region;
#ifdef CONFIG_CGROUP_HUGETLB
/*
* Minimum page order trackable by hugetlb cgroup.
* At least 4 pages are necessary for all the tracking information.
* The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault
* usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD])
* is the reservation usage cgroup.
*/
#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1)
enum hugetlb_memory_event {
HUGETLB_MAX,
HUGETLB_NR_MEMORY_EVENTS,
};
struct hugetlb_cgroup_per_node {
/* hugetlb usage in pages over all hstates. */
unsigned long usage[HUGE_MAX_HSTATE];
};
struct hugetlb_cgroup {
struct cgroup_subsys_state css;
/*
* the counter to account for hugepages from hugetlb.
*/
struct page_counter hugepage[HUGE_MAX_HSTATE];
/*
* the counter to account for hugepage reservations from hugetlb.
*/
struct page_counter rsvd_hugepage[HUGE_MAX_HSTATE];
atomic_long_t events[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
atomic_long_t events_local[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
/* Handle for "hugetlb.events" */
struct cgroup_file events_file[HUGE_MAX_HSTATE];
/* Handle for "hugetlb.events.local" */
struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
struct hugetlb_cgroup_per_node *nodeinfo[];
};
static inline struct hugetlb_cgroup *
__hugetlb_cgroup_from_page(struct page *page, bool rsvd)
{
VM_BUG_ON_PAGE(!PageHuge(page), page);
if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
return NULL;
if (rsvd)
return (void *)page_private(page + SUBPAGE_INDEX_CGROUP_RSVD);
else
return (void *)page_private(page + SUBPAGE_INDEX_CGROUP);
}
static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
{
return __hugetlb_cgroup_from_page(page, false);
}
static inline struct hugetlb_cgroup *
hugetlb_cgroup_from_page_rsvd(struct page *page)
{
return __hugetlb_cgroup_from_page(page, true);
}
static inline int __set_hugetlb_cgroup(struct page *page,
struct hugetlb_cgroup *h_cg, bool rsvd)
{
VM_BUG_ON_PAGE(!PageHuge(page), page);
if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
return -1;
if (rsvd)
set_page_private(page + SUBPAGE_INDEX_CGROUP_RSVD,
(unsigned long)h_cg);
else
set_page_private(page + SUBPAGE_INDEX_CGROUP,
(unsigned long)h_cg);
return 0;
}
static inline int set_hugetlb_cgroup(struct page *page,
struct hugetlb_cgroup *h_cg)
{
return __set_hugetlb_cgroup(page, h_cg, false);
}
static inline int set_hugetlb_cgroup_rsvd(struct page *page,
struct hugetlb_cgroup *h_cg)
{
return __set_hugetlb_cgroup(page, h_cg, true);
}
static inline bool hugetlb_cgroup_disabled(void)
{
return !cgroup_subsys_enabled(hugetlb_cgrp_subsys);
}
static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg)
{
css_put(&h_cg->css);
}
static inline void resv_map_dup_hugetlb_cgroup_uncharge_info(
struct resv_map *resv_map)
{
if (resv_map->css)
css_get(resv_map->css);
}
static inline void resv_map_put_hugetlb_cgroup_uncharge_info(
struct resv_map *resv_map)
{
if (resv_map->css)
css_put(resv_map->css);
}
extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup **ptr);
extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
struct hugetlb_cgroup **ptr);
extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
struct page *page);
extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
struct page *page);
extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
struct page *page);
extern void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
struct page *page);
extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg);
extern void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg);
extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv,
unsigned long start,
unsigned long end);
extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
struct file_region *rg,
unsigned long nr_pages,
bool region_del);
extern void hugetlb_cgroup_file_init(void) __init;
extern void hugetlb_cgroup_migrate(struct page *oldhpage,
struct page *newhpage);
#else
static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
struct file_region *rg,
unsigned long nr_pages,
bool region_del)
{
}
static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
{
return NULL;
}
static inline struct hugetlb_cgroup *
hugetlb_cgroup_from_page_resv(struct page *page)
{
return NULL;
}
static inline struct hugetlb_cgroup *
hugetlb_cgroup_from_page_rsvd(struct page *page)
{
return NULL;
}
static inline int set_hugetlb_cgroup(struct page *page,
struct hugetlb_cgroup *h_cg)
{
return 0;
}
static inline int set_hugetlb_cgroup_rsvd(struct page *page,
struct hugetlb_cgroup *h_cg)
{
return 0;
}
static inline bool hugetlb_cgroup_disabled(void)
{
return true;
}
static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg)
{
}
static inline void resv_map_dup_hugetlb_cgroup_uncharge_info(
struct resv_map *resv_map)
{
}
static inline void resv_map_put_hugetlb_cgroup_uncharge_info(
struct resv_map *resv_map)
{
}
static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup **ptr)
{
return 0;
}
static inline int hugetlb_cgroup_charge_cgroup_rsvd(int idx,
unsigned long nr_pages,
struct hugetlb_cgroup **ptr)
{
return 0;
}
static inline void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
struct page *page)
{
}
static inline void
hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
struct page *page)
{
}
static inline void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
struct page *page)
{
}
static inline void hugetlb_cgroup_uncharge_page_rsvd(int idx,
unsigned long nr_pages,
struct page *page)
{
}
static inline void hugetlb_cgroup_uncharge_cgroup(int idx,
unsigned long nr_pages,
struct hugetlb_cgroup *h_cg)
{
}
static inline void
hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg)
{
}
static inline void hugetlb_cgroup_uncharge_counter(struct resv_map *resv,
unsigned long start,
unsigned long end)
{
}
static inline void hugetlb_cgroup_file_init(void)
{
}
static inline void hugetlb_cgroup_migrate(struct page *oldhpage,
struct page *newhpage)
{
}
#endif /* CONFIG_MEM_RES_CTLR_HUGETLB */
#endif