linux-stable/include/linux/memory.h
David Hildenbrand 028fc57a1c drivers/base/memory: introduce "memory groups" to logically group memory blocks
In our "auto-movable" memory onlining policy, we want to make decisions
across memory blocks of a single memory device.  Examples of memory
devices include ACPI memory devices (in the simplest case a single DIMM)
and virtio-mem.  For now, we don't have a connection between a single
memory block device and the real memory device.  Each memory device
consists of 1..X memory block devices.

Let's logically group memory blocks belonging to the same memory device in
"memory groups".  Memory groups can span multiple physical ranges and a
memory group itself does not contain any information regarding physical
ranges, only properties (e.g., "max_pages") necessary for improved memory
onlining.

Introduce two memory group types:

1) Static memory group: E.g., a single ACPI memory device, consisting
   of 1..X memory resources.  A memory group consists of 1..Y memory
   blocks.  The whole group is added/removed in one go.  If any part
   cannot get offlined, the whole group cannot be removed.

2) Dynamic memory group: E.g., a single virtio-mem device.  Memory is
   dynamically added/removed in a fixed granularity, called a "unit",
   consisting of 1..X memory blocks.  A unit is added/removed in one go.
   If any part of a unit cannot get offlined, the whole unit cannot be
   removed.

In case of 1) we usually want either all memory managed by ZONE_MOVABLE or
none.  In case of 2) we usually want to have as many units as possible
managed by ZONE_MOVABLE.  We want a single unit to be of the same type.

For now, memory groups are an internal concept that is not exposed to user
space; we might want to change that in the future, though.

add_memory() users can specify a mgid instead of a nid when passing the
MHP_NID_IS_MGID flag.

Link: https://lkml.kernel.org/r/20210806124715.17090-4-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hui Zhu <teawater@gmail.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Marek Kedzierski <mkedzier@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-09-08 11:50:23 -07:00

166 lines
5.5 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* include/linux/memory.h - generic memory definition
*
* This is mainly for topological representation. We define the
* basic "struct memory_block" here, which can be embedded in per-arch
* definitions or NUMA information.
*
* Basic handling of the devices is done in drivers/base/memory.c
* and system devices are handled in drivers/base/sys.c.
*
* Memory block are exported via sysfs in the class/memory/devices/
* directory.
*
*/
#ifndef _LINUX_MEMORY_H_
#define _LINUX_MEMORY_H_
#include <linux/node.h>
#include <linux/compiler.h>
#include <linux/mutex.h>
#include <linux/notifier.h>
#define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS)
/**
* struct memory_group - a logical group of memory blocks
* @nid: The node id for all memory blocks inside the memory group.
* @blocks: List of all memory blocks belonging to this memory group.
* @is_dynamic: The memory group type: static vs. dynamic
* @s.max_pages: Valid with &memory_group.is_dynamic == false. The maximum
* number of pages we'll have in this static memory group.
* @d.unit_pages: Valid with &memory_group.is_dynamic == true. Unit in pages
* in which memory is added/removed in this dynamic memory group.
* This granularity defines the alignment of a unit in physical
* address space; it has to be at least as big as a single
* memory block.
*
* A memory group logically groups memory blocks; each memory block
* belongs to at most one memory group. A memory group corresponds to
* a memory device, such as a DIMM or a NUMA node, which spans multiple
* memory blocks and might even span multiple non-contiguous physical memory
* ranges.
*
* Modification of members after registration is serialized by memory
* hot(un)plug code.
*/
struct memory_group {
int nid;
struct list_head memory_blocks;
bool is_dynamic;
union {
struct {
unsigned long max_pages;
} s;
struct {
unsigned long unit_pages;
} d;
};
};
struct memory_block {
unsigned long start_section_nr;
unsigned long state; /* serialized by the dev->lock */
int online_type; /* for passing data to online routine */
int nid; /* NID for this memory block */
struct device dev;
/*
* Number of vmemmap pages. These pages
* lay at the beginning of the memory block.
*/
unsigned long nr_vmemmap_pages;
struct memory_group *group; /* group (if any) for this block */
struct list_head group_next; /* next block inside memory group */
};
int arch_get_memory_phys_device(unsigned long start_pfn);
unsigned long memory_block_size_bytes(void);
int set_memory_block_size_order(unsigned int order);
/* These states are exposed to userspace as text strings in sysfs */
#define MEM_ONLINE (1<<0) /* exposed to userspace */
#define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */
#define MEM_OFFLINE (1<<2) /* exposed to userspace */
#define MEM_GOING_ONLINE (1<<3)
#define MEM_CANCEL_ONLINE (1<<4)
#define MEM_CANCEL_OFFLINE (1<<5)
struct memory_notify {
unsigned long start_pfn;
unsigned long nr_pages;
int status_change_nid_normal;
int status_change_nid_high;
int status_change_nid;
};
struct notifier_block;
struct mem_section;
/*
* Priorities for the hotplug memory callback routines (stored in decreasing
* order in the callback chain)
*/
#define SLAB_CALLBACK_PRI 1
#define IPC_CALLBACK_PRI 10
#ifndef CONFIG_MEMORY_HOTPLUG_SPARSE
static inline void memory_dev_init(void)
{
return;
}
static inline int register_memory_notifier(struct notifier_block *nb)
{
return 0;
}
static inline void unregister_memory_notifier(struct notifier_block *nb)
{
}
static inline int memory_notify(unsigned long val, void *v)
{
return 0;
}
#else
extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
int create_memory_block_devices(unsigned long start, unsigned long size,
unsigned long vmemmap_pages,
struct memory_group *group);
void remove_memory_block_devices(unsigned long start, unsigned long size);
extern void memory_dev_init(void);
extern int memory_notify(unsigned long val, void *v);
extern struct memory_block *find_memory_block(struct mem_section *);
typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
extern int walk_memory_blocks(unsigned long start, unsigned long size,
void *arg, walk_memory_blocks_func_t func);
extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
extern int memory_group_register_static(int nid, unsigned long max_pages);
extern int memory_group_register_dynamic(int nid, unsigned long unit_pages);
extern int memory_group_unregister(int mgid);
struct memory_group *memory_group_find_by_id(int mgid);
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
#ifdef CONFIG_MEMORY_HOTPLUG
#define hotplug_memory_notifier(fn, pri) ({ \
static __meminitdata struct notifier_block fn##_mem_nb =\
{ .notifier_call = fn, .priority = pri };\
register_memory_notifier(&fn##_mem_nb); \
})
#define register_hotmemory_notifier(nb) register_memory_notifier(nb)
#define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb)
#else
#define hotplug_memory_notifier(fn, pri) ({ 0; })
/* These aren't inline functions due to a GCC bug. */
#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; })
#define unregister_hotmemory_notifier(nb) ({ (void)(nb); })
#endif
/*
* Kernel text modification mutex, used for code patching. Users of this lock
* can sleep.
*/
extern struct mutex text_mutex;
#endif /* _LINUX_MEMORY_H_ */