linux-stable/include/linux/memory-tiers.h
Jagdish Gediya 3200802728 mm/demotion: demote pages according to allocation fallback order
Currently, a higher tier node can only be demoted to selected nodes on the
next lower tier as defined by the demotion path.  This strict demotion
order does not work in all use cases (e.g.  some use cases may want to
allow cross-socket demotion to another node in the same demotion tier as a
fallback when the preferred demotion node is out of space).  This demotion
order is also inconsistent with the page allocation fallback order when
all the nodes in a higher tier are out of space: The page allocation can
fall back to any node from any lower tier, whereas the demotion order
doesn't allow that currently.

This patch adds support to get all the allowed demotion targets for a
memory tier.  demote_page_list() function is now modified to utilize this
allowed node mask as the fallback allocation mask.

Link: https://lkml.kernel.org/r/20220818131042.113280-9-aneesh.kumar@linux.ibm.com
Signed-off-by: Jagdish Gediya <jvgediya.oss@gmail.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Acked-by: Wei Xu <weixugc@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Hesham Almatary <hesham.almatary@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tim Chen <tim.c.chen@intel.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2022-09-26 19:46:12 -07:00

91 lines
2.3 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MEMORY_TIERS_H
#define _LINUX_MEMORY_TIERS_H
#include <linux/types.h>
#include <linux/nodemask.h>
#include <linux/kref.h>
#include <linux/mmzone.h>
/*
* Each tier cover a abstrace distance chunk size of 128
*/
#define MEMTIER_CHUNK_BITS 7
#define MEMTIER_CHUNK_SIZE (1 << MEMTIER_CHUNK_BITS)
/*
* Smaller abstract distance values imply faster (higher) memory tiers. Offset
* the DRAM adistance so that we can accommodate devices with a slightly lower
* adistance value (slightly faster) than default DRAM adistance to be part of
* the same memory tier.
*/
#define MEMTIER_ADISTANCE_DRAM ((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1))
#define MEMTIER_HOTPLUG_PRIO 100
struct memory_tier;
struct memory_dev_type {
/* list of memory types that are part of same tier as this type */
struct list_head tier_sibiling;
/* abstract distance for this specific memory type */
int adistance;
/* Nodes of same abstract distance */
nodemask_t nodes;
struct kref kref;
};
#ifdef CONFIG_NUMA
extern bool numa_demotion_enabled;
struct memory_dev_type *alloc_memory_type(int adistance);
void destroy_memory_type(struct memory_dev_type *memtype);
void init_node_memory_type(int node, struct memory_dev_type *default_type);
void clear_node_memory_type(int node, struct memory_dev_type *memtype);
#ifdef CONFIG_MIGRATION
int next_demotion_node(int node);
void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
#else
static inline int next_demotion_node(int node)
{
return NUMA_NO_NODE;
}
static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
{
*targets = NODE_MASK_NONE;
}
#endif
#else
#define numa_demotion_enabled false
/*
* CONFIG_NUMA implementation returns non NULL error.
*/
static inline struct memory_dev_type *alloc_memory_type(int adistance)
{
return NULL;
}
static inline void destroy_memory_type(struct memory_dev_type *memtype)
{
}
static inline void init_node_memory_type(int node, struct memory_dev_type *default_type)
{
}
static inline void clear_node_memory_type(int node, struct memory_dev_type *memtype)
{
}
static inline int next_demotion_node(int node)
{
return NUMA_NO_NODE;
}
static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
{
*targets = NODE_MASK_NONE;
}
#endif /* CONFIG_NUMA */
#endif /* _LINUX_MEMORY_TIERS_H */