s390/topology: add drawer scheduling domain level

The z13 machine added a fourth level to the cpu topology
information. The new top level is called drawer.

A drawer contains two books, which used to be the top level.

Adding this additional scheduling domain did show performance
improvements for some workloads of up to 8%, while there don't
seem to be any workloads impacted in a negative way.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
Heiko Carstens 2016-05-25 10:25:50 +02:00 committed by Martin Schwidefsky
parent a62247e1f5
commit adac0f1e8c
4 changed files with 55 additions and 11 deletions

View File

@ -478,6 +478,9 @@ config SCHED_MC
config SCHED_BOOK
def_bool n
config SCHED_DRAWER
def_bool n
config SCHED_TOPOLOGY
def_bool y
prompt "Topology scheduler support"
@ -485,6 +488,7 @@ config SCHED_TOPOLOGY
select SCHED_SMT
select SCHED_MC
select SCHED_BOOK
select SCHED_DRAWER
help
Topology scheduler support improves the CPU scheduler's decision
making when dealing with machines that have multi-threading,

View File

@ -14,10 +14,12 @@ struct cpu_topology_s390 {
unsigned short core_id;
unsigned short socket_id;
unsigned short book_id;
unsigned short drawer_id;
unsigned short node_id;
cpumask_t thread_mask;
cpumask_t core_mask;
cpumask_t book_mask;
cpumask_t drawer_mask;
};
DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
@ -30,6 +32,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
#define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask)
#define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id)
#define topology_book_cpumask(cpu) (&per_cpu(cpu_topology, cpu).book_mask)
#define topology_drawer_id(cpu) (per_cpu(cpu_topology, cpu).drawer_id)
#define topology_drawer_cpumask(cpu) (&per_cpu(cpu_topology, cpu).drawer_mask)
#define mc_capable() 1

View File

@ -46,6 +46,7 @@ static DECLARE_WORK(topology_work, topology_work_fn);
*/
static struct mask_info socket_info;
static struct mask_info book_info;
static struct mask_info drawer_info;
DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
@ -80,6 +81,7 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
}
static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
struct mask_info *drawer,
struct mask_info *book,
struct mask_info *socket,
int one_socket_per_cpu)
@ -97,9 +99,11 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
continue;
for (i = 0; i <= smp_cpu_mtid; i++) {
topo = &per_cpu(cpu_topology, lcpu + i);
topo->drawer_id = drawer->id;
topo->book_id = book->id;
topo->core_id = rcore;
topo->thread_id = lcpu + i;
cpumask_set_cpu(lcpu + i, &drawer->mask);
cpumask_set_cpu(lcpu + i, &book->mask);
cpumask_set_cpu(lcpu + i, &socket->mask);
if (one_socket_per_cpu)
@ -128,6 +132,11 @@ static void clear_masks(void)
cpumask_clear(&info->mask);
info = info->next;
}
info = &drawer_info;
while (info) {
cpumask_clear(&info->mask);
info = info->next;
}
}
static union topology_entry *next_tle(union topology_entry *tle)
@ -141,12 +150,17 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
{
struct mask_info *socket = &socket_info;
struct mask_info *book = &book_info;
struct mask_info *drawer = &drawer_info;
union topology_entry *tle, *end;
tle = info->tle;
end = (union topology_entry *)((unsigned long)info + info->length);
while (tle < end) {
switch (tle->nl) {
case 3:
drawer = drawer->next;
drawer->id = tle->container.id;
break;
case 2:
book = book->next;
book->id = tle->container.id;
@ -156,7 +170,7 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
socket->id = tle->container.id;
break;
case 0:
add_cpus_to_mask(&tle->cpu, book, socket, 0);
add_cpus_to_mask(&tle->cpu, drawer, book, socket, 0);
break;
default:
clear_masks();
@ -170,6 +184,7 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
{
struct mask_info *socket = &socket_info;
struct mask_info *book = &book_info;
struct mask_info *drawer = &drawer_info;
union topology_entry *tle, *end;
tle = info->tle;
@ -181,7 +196,7 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
book->id = tle->container.id;
break;
case 0:
socket = add_cpus_to_mask(&tle->cpu, book, socket, 1);
socket = add_cpus_to_mask(&tle->cpu, drawer, book, socket, 1);
break;
default:
clear_masks();
@ -257,11 +272,13 @@ static void update_cpu_masks(void)
topo->thread_mask = cpu_thread_map(cpu);
topo->core_mask = cpu_group_map(&socket_info, cpu);
topo->book_mask = cpu_group_map(&book_info, cpu);
topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
if (!MACHINE_HAS_TOPOLOGY) {
topo->thread_id = cpu;
topo->core_id = cpu;
topo->socket_id = cpu;
topo->book_id = cpu;
topo->drawer_id = cpu;
}
}
numa_update_cpu_topology();
@ -269,10 +286,7 @@ static void update_cpu_masks(void)
void store_topology(struct sysinfo_15_1_x *info)
{
if (topology_max_mnest >= 3)
stsi(info, 15, 1, 3);
else
stsi(info, 15, 1, 2);
stsi(info, 15, 1, min(topology_max_mnest, 4));
}
int arch_update_cpu_topology(void)
@ -442,6 +456,11 @@ static const struct cpumask *cpu_book_mask(int cpu)
return &per_cpu(cpu_topology, cpu).book_mask;
}
static const struct cpumask *cpu_drawer_mask(int cpu)
{
return &per_cpu(cpu_topology, cpu).drawer_mask;
}
static int __init early_parse_topology(char *p)
{
return kstrtobool(p, &topology_enabled);
@ -452,6 +471,7 @@ static struct sched_domain_topology_level s390_topology[] = {
{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
{ cpu_book_mask, SD_INIT_NAME(BOOK) },
{ cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },
};
@ -487,6 +507,7 @@ static int __init s390_topology_init(void)
printk(KERN_CONT " / %d\n", info->mnest);
alloc_masks(info, &socket_info, 1);
alloc_masks(info, &book_info, 2);
alloc_masks(info, &drawer_info, 3);
set_sched_topology(s390_topology);
return 0;
}

View File

@ -34,7 +34,8 @@
#define DIST_CORE 1
#define DIST_MC 2
#define DIST_BOOK 3
#define DIST_MAX 4
#define DIST_DRAWER 4
#define DIST_MAX 5
/* Node distance reported to common code */
#define EMU_NODE_DIST 10
@ -43,7 +44,7 @@
#define NODE_ID_FREE -1
/* Different levels of toptree */
enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY};
enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY};
/* The two toptree IDs */
enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
@ -113,6 +114,14 @@ static int cores_free(struct toptree *tree)
* Return node of core
*/
static struct toptree *core_node(struct toptree *core)
{
return core->parent->parent->parent->parent;
}
/*
* Return drawer of core
*/
static struct toptree *core_drawer(struct toptree *core)
{
return core->parent->parent->parent;
}
@ -138,6 +147,8 @@ static struct toptree *core_mc(struct toptree *core)
*/
static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
{
if (core_drawer(core1)->id != core_drawer(core2)->id)
return DIST_DRAWER;
if (core_book(core1)->id != core_book(core2)->id)
return DIST_BOOK;
if (core_mc(core1)->id != core_mc(core2)->id)
@ -262,6 +273,8 @@ static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
struct toptree *core;
/* Always try to move perfectly fitting structures first */
move_level_to_numa(numa, phys, DRAWER, true);
move_level_to_numa(numa, phys, DRAWER, false);
move_level_to_numa(numa, phys, BOOK, true);
move_level_to_numa(numa, phys, BOOK, false);
move_level_to_numa(numa, phys, MC, true);
@ -335,7 +348,7 @@ static struct toptree *toptree_to_numa(struct toptree *phys)
*/
static struct toptree *toptree_from_topology(void)
{
struct toptree *phys, *node, *book, *mc, *core;
struct toptree *phys, *node, *drawer, *book, *mc, *core;
struct cpu_topology_s390 *top;
int cpu;
@ -344,10 +357,11 @@ static struct toptree *toptree_from_topology(void)
for_each_online_cpu(cpu) {
top = &per_cpu(cpu_topology, cpu);
node = toptree_get_child(phys, 0);
book = toptree_get_child(node, top->book_id);
drawer = toptree_get_child(node, top->drawer_id);
book = toptree_get_child(drawer, top->book_id);
mc = toptree_get_child(book, top->socket_id);
core = toptree_get_child(mc, top->core_id);
if (!book || !mc || !core)
if (!drawer || !book || !mc || !core)
panic("NUMA emulation could not allocate memory");
cpumask_set_cpu(cpu, &core->mask);
toptree_update_mask(mc);
@ -368,6 +382,7 @@ static void topology_add_core(struct toptree *core)
cpumask_copy(&top->thread_mask, &core->mask);
cpumask_copy(&top->core_mask, &core_mc(core)->mask);
cpumask_copy(&top->book_mask, &core_book(core)->mask);
cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask);
cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
top->node_id = core_node(core)->id;
}