diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index 373dc45c024e..c24a45b05fbb 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -7,29 +7,124 @@ # error "Incorrect import of SD flags definitions" #endif -/* Balance when about to become idle */ -SD_FLAG(SD_BALANCE_NEWIDLE) -/* Balance on exec */ -SD_FLAG(SD_BALANCE_EXEC) -/* Balance on fork, clone */ -SD_FLAG(SD_BALANCE_FORK) -/* Balance on wakeup */ -SD_FLAG(SD_BALANCE_WAKE) -/* Wake task to waking CPU */ -SD_FLAG(SD_WAKE_AFFINE) -/* Domain members have different CPU capacities */ -SD_FLAG(SD_ASYM_CPUCAPACITY) -/* Domain members share CPU capacity */ -SD_FLAG(SD_SHARE_CPUCAPACITY) -/* Domain members share CPU pkg resources */ -SD_FLAG(SD_SHARE_PKG_RESOURCES) -/* Only a single load balancing instance */ -SD_FLAG(SD_SERIALIZE) -/* Place busy groups earlier in the domain */ -SD_FLAG(SD_ASYM_PACKING) -/* Prefer to place tasks in a sibling domain */ -SD_FLAG(SD_PREFER_SIBLING) -/* sched_domains of this level overlap */ -SD_FLAG(SD_OVERLAP) -/* cross-node balancing */ -SD_FLAG(SD_NUMA) +/* + * Expected flag uses + * + * SHARED_CHILD: These flags are meant to be set from the base domain upwards. + * If a domain has this flag set, all of its children should have it set. This + * is usually because the flag describes some shared resource (all CPUs in that + * domain share the same resource), or because they are tied to a scheduling + * behaviour that we want to disable at some point in the hierarchy for + * scalability reasons. + * + * In those cases it doesn't make sense to have the flag set for a domain but + * not have it in (some of) its children: sched domains ALWAYS span their child + * domains, so operations done with parent domains will cover CPUs in the lower + * child domains. + * + * + * SHARED_PARENT: These flags are meant to be set from the highest domain + * downwards. If a domain has this flag set, all of its parents should have it + * set. This is usually for topology properties that start to appear above a + * certain level (e.g. domain starts spanning CPUs outside of the base CPU's + * socket). + */ +#define SDF_SHARED_CHILD 0x1 +#define SDF_SHARED_PARENT 0x2 + +/* + * Balance when about to become idle + * + * SHARED_CHILD: Set from the base domain up to cpuset.sched_relax_domain_level. + */ +SD_FLAG(SD_BALANCE_NEWIDLE, SDF_SHARED_CHILD) + +/* + * Balance on exec + * + * SHARED_CHILD: Set from the base domain up to the NUMA reclaim level. + */ +SD_FLAG(SD_BALANCE_EXEC, SDF_SHARED_CHILD) + +/* + * Balance on fork, clone + * + * SHARED_CHILD: Set from the base domain up to the NUMA reclaim level. + */ +SD_FLAG(SD_BALANCE_FORK, SDF_SHARED_CHILD) + +/* + * Balance on wakeup + * + * SHARED_CHILD: Set from the base domain up to cpuset.sched_relax_domain_level. + */ +SD_FLAG(SD_BALANCE_WAKE, SDF_SHARED_CHILD) + +/* + * Consider waking task on waking CPU. + * + * SHARED_CHILD: Set from the base domain up to the NUMA reclaim level. + */ +SD_FLAG(SD_WAKE_AFFINE, SDF_SHARED_CHILD) + +/* + * Domain members have different CPU capacities + */ +SD_FLAG(SD_ASYM_CPUCAPACITY, 0) + +/* + * Domain members share CPU capacity (i.e. SMT) + * + * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share + * CPU capacity. + */ +SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD) + +/* + * Domain members share CPU package resources (i.e. caches) + * + * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share + * the same cache(s). + */ +SD_FLAG(SD_SHARE_PKG_RESOURCES, SDF_SHARED_CHILD) + +/* + * Only a single load balancing instance + * + * SHARED_PARENT: Set for all NUMA levels above NODE. Could be set from a + * different level upwards, but it doesn't change that if a domain has this flag + * set, then all of its parents need to have it too (otherwise the serialization + * doesn't make sense). + */ +SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT) + +/* + * Place busy tasks earlier in the domain + * + * SHARED_CHILD: Usually set on the SMT level. Technically could be set further + * up, but currently assumed to be set from the base domain upwards (see + * update_top_cache_domain()). + */ +SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD) + +/* + * Prefer to place tasks in a sibling domain + * + * Set up until domains start spanning NUMA nodes. Close to being a SHARED_CHILD + * flag, but cleared below domains with SD_ASYM_CPUCAPACITY. + */ +SD_FLAG(SD_PREFER_SIBLING, 0) + +/* + * sched_groups of this level overlap + * + * SHARED_PARENT: Set for all NUMA levels above NODE. + */ +SD_FLAG(SD_OVERLAP, SDF_SHARED_PARENT) + +/* + * Cross-node balancing + * + * SHARED_PARENT: Set for all NUMA levels above NODE. + */ +SD_FLAG(SD_NUMA, SDF_SHARED_PARENT) diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 3e41c0401b5f..32f602ff37a0 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -12,19 +12,30 @@ #ifdef CONFIG_SMP /* Generate SD flag indexes */ -#define SD_FLAG(name) __##name, +#define SD_FLAG(name, mflags) __##name, enum { #include __SD_FLAG_CNT, }; #undef SD_FLAG /* Generate SD flag bits */ -#define SD_FLAG(name) name = 1 << __##name, +#define SD_FLAG(name, mflags) name = 1 << __##name, enum { #include }; #undef SD_FLAG +#ifdef CONFIG_SCHED_DEBUG +#define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = #_name }, +static const struct { + unsigned int meta_flags; + char *name; +} sd_flag_debug[] = { +#include +}; +#undef SD_FLAG +#endif + #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) {