sched/isolation: Move isolcpus= handling to the housekeeping code

We want to centralize the isolation features, to be done by the housekeeping
subsystem and scheduler domain isolation is a significant part of it.

No intended behaviour change, we just reuse the housekeeping cpumask
and core code.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1509072159-31808-11-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Frederic Weisbecker 2017-10-27 04:42:37 +02:00 committed by Ingo Molnar
parent 6f1982fedd
commit edb9382175
7 changed files with 72 additions and 58 deletions

View File

@ -18,6 +18,7 @@
#include <linux/cpufeature.h>
#include <linux/tick.h>
#include <linux/pm_qos.h>
#include <linux/sched/isolation.h>
#include "base.h"
@ -271,8 +272,16 @@ static ssize_t print_cpus_isolated(struct device *dev,
struct device_attribute *attr, char *buf)
{
int n = 0, len = PAGE_SIZE-2;
cpumask_var_t isolated;
n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(cpu_isolated_map));
if (!alloc_cpumask_var(&isolated, GFP_KERNEL))
return -ENOMEM;
cpumask_andnot(isolated, cpu_possible_mask,
housekeeping_cpumask(HK_FLAG_DOMAIN));
n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(isolated));
free_cpumask_var(isolated);
return n;
}

View File

@ -165,8 +165,6 @@ struct task_group;
/* Task command name length: */
#define TASK_COMM_LEN 16
extern cpumask_var_t cpu_isolated_map;
extern void scheduler_tick(void);
#define MAX_SCHEDULE_TIMEOUT LONG_MAX

View File

@ -11,6 +11,7 @@ enum hk_flags {
HK_FLAG_MISC = (1 << 2),
HK_FLAG_SCHED = (1 << 3),
HK_FLAG_TICK = (1 << 4),
HK_FLAG_DOMAIN = (1 << 5),
};
#ifdef CONFIG_CPU_ISOLATION

View File

@ -57,7 +57,7 @@
#include <linux/backing-dev.h>
#include <linux/sort.h>
#include <linux/oom.h>
#include <linux/sched/isolation.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <linux/mutex.h>
@ -656,7 +656,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
int csn; /* how many cpuset ptrs in csa so far */
int i, j, k; /* indices for partition finding loops */
cpumask_var_t *doms; /* resulting partition; i.e. sched domains */
cpumask_var_t non_isolated_cpus; /* load balanced CPUs */
struct sched_domain_attr *dattr; /* attributes for custom domains */
int ndoms = 0; /* number of sched domains in result */
int nslot; /* next empty doms[] struct cpumask slot */
@ -666,10 +665,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
dattr = NULL;
csa = NULL;
if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL))
goto done;
cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
/* Special case for the 99% of systems with one, full, sched domain */
if (is_sched_load_balance(&top_cpuset)) {
ndoms = 1;
@ -683,7 +678,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
update_domain_attr_tree(dattr, &top_cpuset);
}
cpumask_and(doms[0], top_cpuset.effective_cpus,
non_isolated_cpus);
housekeeping_cpumask(HK_FLAG_DOMAIN));
goto done;
}
@ -707,7 +702,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
*/
if (!cpumask_empty(cp->cpus_allowed) &&
!(is_sched_load_balance(cp) &&
cpumask_intersects(cp->cpus_allowed, non_isolated_cpus)))
cpumask_intersects(cp->cpus_allowed,
housekeeping_cpumask(HK_FLAG_DOMAIN))))
continue;
if (is_sched_load_balance(cp))
@ -789,7 +785,7 @@ restart:
if (apn == b->pn) {
cpumask_or(dp, dp, b->effective_cpus);
cpumask_and(dp, dp, non_isolated_cpus);
cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN));
if (dattr)
update_domain_attr_tree(dattr + nslot, b);
@ -802,7 +798,6 @@ restart:
BUG_ON(nslot != ndoms);
done:
free_cpumask_var(non_isolated_cpus);
kfree(csa);
/*

View File

@ -84,9 +84,6 @@ __read_mostly int scheduler_running;
*/
int sysctl_sched_rt_runtime = 950000;
/* CPUs with isolated domains */
cpumask_var_t cpu_isolated_map;
/*
* __task_rq_lock - lock the rq @p resides on.
*/
@ -5735,10 +5732,6 @@ static inline void sched_init_smt(void) { }
void __init sched_init_smp(void)
{
cpumask_var_t non_isolated_cpus;
alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
sched_init_numa();
/*
@ -5748,16 +5741,12 @@ void __init sched_init_smp(void)
*/
mutex_lock(&sched_domains_mutex);
sched_init_domains(cpu_active_mask);
cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
if (cpumask_empty(non_isolated_cpus))
cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
mutex_unlock(&sched_domains_mutex);
/* Move init over to a non-isolated CPU */
if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
BUG();
sched_init_granularity();
free_cpumask_var(non_isolated_cpus);
init_sched_rt_class();
init_sched_dl_class();
@ -5961,9 +5950,6 @@ void __init sched_init(void)
calc_load_update = jiffies + LOAD_FREQ;
#ifdef CONFIG_SMP
/* May be allocated at isolcpus cmdline parse time */
if (cpu_isolated_map == NULL)
zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
idle_thread_set_boot_cpu();
set_cpu_rq_start_time(smp_processor_id());
#endif

View File

@ -63,32 +63,69 @@ void __init housekeeping_init(void)
WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
}
#ifdef CONFIG_NO_HZ_FULL
static int __init housekeeping_nohz_full_setup(char *str)
static int __init housekeeping_setup(char *str, enum hk_flags flags)
{
cpumask_var_t non_housekeeping_mask;
int err;
alloc_bootmem_cpumask_var(&non_housekeeping_mask);
if (cpulist_parse(str, non_housekeeping_mask) < 0) {
pr_warn("Housekeeping: Incorrect nohz_full cpumask\n");
err = cpulist_parse(str, non_housekeeping_mask);
if (err < 0 || cpumask_last(non_housekeeping_mask) >= nr_cpu_ids) {
pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n");
free_bootmem_cpumask_var(non_housekeeping_mask);
return 0;
}
alloc_bootmem_cpumask_var(&housekeeping_mask);
cpumask_andnot(housekeeping_mask, cpu_possible_mask, non_housekeeping_mask);
if (!housekeeping_flags) {
alloc_bootmem_cpumask_var(&housekeeping_mask);
cpumask_andnot(housekeeping_mask,
cpu_possible_mask, non_housekeeping_mask);
if (cpumask_empty(housekeeping_mask))
cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
} else {
cpumask_var_t tmp;
if (cpumask_empty(housekeeping_mask))
cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
alloc_bootmem_cpumask_var(&tmp);
cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask);
if (!cpumask_equal(tmp, housekeeping_mask)) {
pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
free_bootmem_cpumask_var(tmp);
free_bootmem_cpumask_var(non_housekeeping_mask);
return 0;
}
free_bootmem_cpumask_var(tmp);
}
housekeeping_flags = HK_FLAG_TICK | HK_FLAG_TIMER |
HK_FLAG_RCU | HK_FLAG_MISC;
if ((flags & HK_FLAG_TICK) && !(housekeeping_flags & HK_FLAG_TICK)) {
if (IS_ENABLED(CONFIG_NO_HZ_FULL)) {
tick_nohz_full_setup(non_housekeeping_mask);
} else {
pr_warn("Housekeeping: nohz unsupported."
" Build with CONFIG_NO_HZ_FULL\n");
free_bootmem_cpumask_var(non_housekeeping_mask);
return 0;
}
}
tick_nohz_full_setup(non_housekeeping_mask);
housekeeping_flags |= flags;
free_bootmem_cpumask_var(non_housekeeping_mask);
return 1;
}
static int __init housekeeping_nohz_full_setup(char *str)
{
unsigned int flags;
flags = HK_FLAG_TICK | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC;
return housekeeping_setup(str, flags);
}
__setup("nohz_full=", housekeeping_nohz_full_setup);
#endif
static int __init housekeeping_isolcpus_setup(char *str)
{
return housekeeping_setup(str, HK_FLAG_DOMAIN);
}
__setup("isolcpus=", housekeeping_isolcpus_setup);

View File

@ -3,6 +3,7 @@
*/
#include <linux/sched.h>
#include <linux/mutex.h>
#include <linux/sched/isolation.h>
#include "sched.h"
@ -469,21 +470,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
update_top_cache_domain(cpu);
}
/* Setup the mask of CPUs configured for isolated domains */
static int __init isolated_cpu_setup(char *str)
{
int ret;
alloc_bootmem_cpumask_var(&cpu_isolated_map);
ret = cpulist_parse(str, cpu_isolated_map);
if (ret || cpumask_last(cpu_isolated_map) >= nr_cpu_ids) {
pr_err("sched: Error, all isolcpus= values must be between 0 and %u - ignoring them.\n", nr_cpu_ids-1);
return 0;
}
return 1;
}
__setup("isolcpus=", isolated_cpu_setup);
struct s_data {
struct sched_domain ** __percpu sd;
struct root_domain *rd;
@ -1792,7 +1778,7 @@ int sched_init_domains(const struct cpumask *cpu_map)
doms_cur = alloc_sched_domains(ndoms_cur);
if (!doms_cur)
doms_cur = &fallback_doms;
cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
err = build_sched_domains(doms_cur[0], NULL);
register_sched_domain_sysctl();
@ -1875,7 +1861,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
doms_new = alloc_sched_domains(1);
if (doms_new) {
n = 1;
cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
cpumask_and(doms_new[0], cpu_active_mask,
housekeeping_cpumask(HK_FLAG_DOMAIN));
}
} else {
n = ndoms_new;
@ -1898,7 +1885,8 @@ match1:
if (!doms_new) {
n = 0;
doms_new = &fallback_doms;
cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
cpumask_and(doms_new[0], cpu_active_mask,
housekeeping_cpumask(HK_FLAG_DOMAIN));
}
/* Build new domains: */