mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-10-27 22:51:31 +00:00
028fc57a1c
In our "auto-movable" memory onlining policy, we want to make decisions across memory blocks of a single memory device. Examples of memory devices include ACPI memory devices (in the simplest case a single DIMM) and virtio-mem. For now, we don't have a connection between a single memory block device and the real memory device. Each memory device consists of 1..X memory block devices. Let's logically group memory blocks belonging to the same memory device in "memory groups". Memory groups can span multiple physical ranges and a memory group itself does not contain any information regarding physical ranges, only properties (e.g., "max_pages") necessary for improved memory onlining. Introduce two memory group types: 1) Static memory group: E.g., a single ACPI memory device, consisting of 1..X memory resources. A memory group consists of 1..Y memory blocks. The whole group is added/removed in one go. If any part cannot get offlined, the whole group cannot be removed. 2) Dynamic memory group: E.g., a single virtio-mem device. Memory is dynamically added/removed in a fixed granularity, called a "unit", consisting of 1..X memory blocks. A unit is added/removed in one go. If any part of a unit cannot get offlined, the whole unit cannot be removed. In case of 1) we usually want either all memory managed by ZONE_MOVABLE or none. In case of 2) we usually want to have as many units as possible managed by ZONE_MOVABLE. We want a single unit to be of the same type. For now, memory groups are an internal concept that is not exposed to user space; we might want to change that in the future, though. add_memory() users can specify a mgid instead of a nid when passing the MHP_NID_IS_MGID flag. Link: https://lkml.kernel.org/r/20210806124715.17090-4-david@redhat.com Signed-off-by: David Hildenbrand <david@redhat.com> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Hui Zhu <teawater@gmail.com> Cc: Jason Wang <jasowang@redhat.com> Cc: Len Brown <lenb@kernel.org> Cc: Marek Kedzierski <mkedzier@redhat.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Mike Rapoport <rppt@kernel.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com> Cc: Pavel Tatashin <pasha.tatashin@soleen.com> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Vitaly Kuznetsov <vkuznets@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Wei Yang <richard.weiyang@linux.alibaba.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
166 lines
5.5 KiB
C
166 lines
5.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* include/linux/memory.h - generic memory definition
|
|
*
|
|
* This is mainly for topological representation. We define the
|
|
* basic "struct memory_block" here, which can be embedded in per-arch
|
|
* definitions or NUMA information.
|
|
*
|
|
* Basic handling of the devices is done in drivers/base/memory.c
|
|
* and system devices are handled in drivers/base/sys.c.
|
|
*
|
|
* Memory block are exported via sysfs in the class/memory/devices/
|
|
* directory.
|
|
*
|
|
*/
|
|
#ifndef _LINUX_MEMORY_H_
|
|
#define _LINUX_MEMORY_H_
|
|
|
|
#include <linux/node.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/notifier.h>
|
|
|
|
#define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS)
|
|
|
|
/**
|
|
* struct memory_group - a logical group of memory blocks
|
|
* @nid: The node id for all memory blocks inside the memory group.
|
|
* @blocks: List of all memory blocks belonging to this memory group.
|
|
* @is_dynamic: The memory group type: static vs. dynamic
|
|
* @s.max_pages: Valid with &memory_group.is_dynamic == false. The maximum
|
|
* number of pages we'll have in this static memory group.
|
|
* @d.unit_pages: Valid with &memory_group.is_dynamic == true. Unit in pages
|
|
* in which memory is added/removed in this dynamic memory group.
|
|
* This granularity defines the alignment of a unit in physical
|
|
* address space; it has to be at least as big as a single
|
|
* memory block.
|
|
*
|
|
* A memory group logically groups memory blocks; each memory block
|
|
* belongs to at most one memory group. A memory group corresponds to
|
|
* a memory device, such as a DIMM or a NUMA node, which spans multiple
|
|
* memory blocks and might even span multiple non-contiguous physical memory
|
|
* ranges.
|
|
*
|
|
* Modification of members after registration is serialized by memory
|
|
* hot(un)plug code.
|
|
*/
|
|
struct memory_group {
|
|
int nid;
|
|
struct list_head memory_blocks;
|
|
bool is_dynamic;
|
|
union {
|
|
struct {
|
|
unsigned long max_pages;
|
|
} s;
|
|
struct {
|
|
unsigned long unit_pages;
|
|
} d;
|
|
};
|
|
};
|
|
|
|
struct memory_block {
|
|
unsigned long start_section_nr;
|
|
unsigned long state; /* serialized by the dev->lock */
|
|
int online_type; /* for passing data to online routine */
|
|
int nid; /* NID for this memory block */
|
|
struct device dev;
|
|
/*
|
|
* Number of vmemmap pages. These pages
|
|
* lay at the beginning of the memory block.
|
|
*/
|
|
unsigned long nr_vmemmap_pages;
|
|
struct memory_group *group; /* group (if any) for this block */
|
|
struct list_head group_next; /* next block inside memory group */
|
|
};
|
|
|
|
int arch_get_memory_phys_device(unsigned long start_pfn);
|
|
unsigned long memory_block_size_bytes(void);
|
|
int set_memory_block_size_order(unsigned int order);
|
|
|
|
/* These states are exposed to userspace as text strings in sysfs */
|
|
#define MEM_ONLINE (1<<0) /* exposed to userspace */
|
|
#define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */
|
|
#define MEM_OFFLINE (1<<2) /* exposed to userspace */
|
|
#define MEM_GOING_ONLINE (1<<3)
|
|
#define MEM_CANCEL_ONLINE (1<<4)
|
|
#define MEM_CANCEL_OFFLINE (1<<5)
|
|
|
|
struct memory_notify {
|
|
unsigned long start_pfn;
|
|
unsigned long nr_pages;
|
|
int status_change_nid_normal;
|
|
int status_change_nid_high;
|
|
int status_change_nid;
|
|
};
|
|
|
|
struct notifier_block;
|
|
struct mem_section;
|
|
|
|
/*
|
|
* Priorities for the hotplug memory callback routines (stored in decreasing
|
|
* order in the callback chain)
|
|
*/
|
|
#define SLAB_CALLBACK_PRI 1
|
|
#define IPC_CALLBACK_PRI 10
|
|
|
|
#ifndef CONFIG_MEMORY_HOTPLUG_SPARSE
|
|
static inline void memory_dev_init(void)
|
|
{
|
|
return;
|
|
}
|
|
static inline int register_memory_notifier(struct notifier_block *nb)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline void unregister_memory_notifier(struct notifier_block *nb)
|
|
{
|
|
}
|
|
static inline int memory_notify(unsigned long val, void *v)
|
|
{
|
|
return 0;
|
|
}
|
|
#else
|
|
extern int register_memory_notifier(struct notifier_block *nb);
|
|
extern void unregister_memory_notifier(struct notifier_block *nb);
|
|
int create_memory_block_devices(unsigned long start, unsigned long size,
|
|
unsigned long vmemmap_pages,
|
|
struct memory_group *group);
|
|
void remove_memory_block_devices(unsigned long start, unsigned long size);
|
|
extern void memory_dev_init(void);
|
|
extern int memory_notify(unsigned long val, void *v);
|
|
extern struct memory_block *find_memory_block(struct mem_section *);
|
|
typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
|
|
extern int walk_memory_blocks(unsigned long start, unsigned long size,
|
|
void *arg, walk_memory_blocks_func_t func);
|
|
extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
|
|
#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
|
|
|
|
extern int memory_group_register_static(int nid, unsigned long max_pages);
|
|
extern int memory_group_register_dynamic(int nid, unsigned long unit_pages);
|
|
extern int memory_group_unregister(int mgid);
|
|
struct memory_group *memory_group_find_by_id(int mgid);
|
|
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
|
|
|
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
|
#define hotplug_memory_notifier(fn, pri) ({ \
|
|
static __meminitdata struct notifier_block fn##_mem_nb =\
|
|
{ .notifier_call = fn, .priority = pri };\
|
|
register_memory_notifier(&fn##_mem_nb); \
|
|
})
|
|
#define register_hotmemory_notifier(nb) register_memory_notifier(nb)
|
|
#define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb)
|
|
#else
|
|
#define hotplug_memory_notifier(fn, pri) ({ 0; })
|
|
/* These aren't inline functions due to a GCC bug. */
|
|
#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; })
|
|
#define unregister_hotmemory_notifier(nb) ({ (void)(nb); })
|
|
#endif
|
|
|
|
/*
|
|
* Kernel text modification mutex, used for code patching. Users of this lock
|
|
* can sleep.
|
|
*/
|
|
extern struct mutex text_mutex;
|
|
|
|
#endif /* _LINUX_MEMORY_H_ */
|