diff --git a/MAINTAINERS b/MAINTAINERS index f9faadef7ab7..64168f6dd89e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5185,7 +5185,7 @@ F: kernel/delayacct.c PERFORMANCE EVENTS SUBSYSTEM M: Peter Zijlstra M: Paul Mackerras -M: Ingo Molnar +M: Ingo Molnar M: Arnaldo Carvalho de Melo T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core S: Supported @@ -5833,7 +5833,7 @@ S: Maintained F: drivers/watchdog/sc1200wdt.c SCHEDULER -M: Ingo Molnar +M: Ingo Molnar M: Peter Zijlstra T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched/core S: Maintained diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5104a2b685cf..ce13315d48fb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -219,14 +219,9 @@ static void __cpuinit smp_callin(void) * Update loops_per_jiffy in cpu_data. Previous call to * smp_store_cpu_info() stored a value that is close but not as * accurate as the value just calculated. - * - * Need to enable IRQs because it can take longer and then - * the NMI watchdog might kill us. */ - local_irq_enable(); calibrate_delay(); cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; - local_irq_disable(); pr_debug("Stack at about %p\n", &cpuid); /* diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 7a7e5fd2a277..668f66baac7b 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -22,7 +22,7 @@ extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_update_active_cpus(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); -extern int cpuset_cpus_allowed_fallback(struct task_struct *p); +extern void cpuset_cpus_allowed_fallback(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -135,10 +135,8 @@ static inline void cpuset_cpus_allowed(struct task_struct *p, cpumask_copy(mask, cpu_possible_mask); } -static inline int cpuset_cpus_allowed_fallback(struct task_struct *p) +static inline void cpuset_cpus_allowed_fallback(struct task_struct *p) { - do_set_cpus_allowed(p, cpu_possible_mask); - return cpumask_any(cpu_active_mask); } static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1010cc61931f..b96ad75b7e64 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2162,10 +2162,9 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) mutex_unlock(&callback_mutex); } -int cpuset_cpus_allowed_fallback(struct task_struct *tsk) +void cpuset_cpus_allowed_fallback(struct task_struct *tsk) { const struct cpuset *cs; - int cpu; rcu_read_lock(); cs = task_cs(tsk); @@ -2186,22 +2185,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk) * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary * set any mask even if it is not right from task_cs() pov, * the pending set_cpus_allowed_ptr() will fix things. + * + * select_fallback_rq() will fix things ups and set cpu_possible_mask + * if required. */ - - cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask); - if (cpu >= nr_cpu_ids) { - /* - * Either tsk->cpus_allowed is wrong (see above) or it - * is actually empty. The latter case is only possible - * if we are racing with remove_tasks_in_empty_cpuset(). - * Like above we can temporary set any mask and rely on - * set_cpus_allowed_ptr() as synchronization point. - */ - do_set_cpus_allowed(tsk, cpu_possible_mask); - cpu = cpumask_any(cpu_active_mask); - } - - return cpu; } void cpuset_init_current_mems_allowed(void) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 157fb9b2b186..e3ed0ecee7c7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1265,29 +1265,59 @@ EXPORT_SYMBOL_GPL(kick_process); */ static int select_fallback_rq(int cpu, struct task_struct *p) { - int dest_cpu; const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); + enum { cpuset, possible, fail } state = cpuset; + int dest_cpu; /* Look for allowed, online CPU in same node. */ - for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) + for_each_cpu_mask(dest_cpu, *nodemask) { + if (!cpu_online(dest_cpu)) + continue; + if (!cpu_active(dest_cpu)) + continue; if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) return dest_cpu; + } - /* Any allowed, online CPU? */ - dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask); - if (dest_cpu < nr_cpu_ids) - return dest_cpu; + for (;;) { + /* Any allowed, online CPU? */ + for_each_cpu_mask(dest_cpu, *tsk_cpus_allowed(p)) { + if (!cpu_online(dest_cpu)) + continue; + if (!cpu_active(dest_cpu)) + continue; + goto out; + } - /* No more Mr. Nice Guy. */ - dest_cpu = cpuset_cpus_allowed_fallback(p); - /* - * Don't tell them about moving exiting tasks or - * kernel threads (both mm NULL), since they never - * leave kernel. - */ - if (p->mm && printk_ratelimit()) { - printk_sched("process %d (%s) no longer affine to cpu%d\n", - task_pid_nr(p), p->comm, cpu); + switch (state) { + case cpuset: + /* No more Mr. Nice Guy. */ + cpuset_cpus_allowed_fallback(p); + state = possible; + break; + + case possible: + do_set_cpus_allowed(p, cpu_possible_mask); + state = fail; + break; + + case fail: + BUG(); + break; + } + } + +out: + if (state != cpuset) { + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk_sched("process %d (%s) no longer affine to cpu%d\n", + task_pid_nr(p), p->comm, cpu); + } } return dest_cpu; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 94340c7544a9..0d97ebdc58f0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -416,8 +416,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) #endif /* CONFIG_FAIR_GROUP_SCHED */ -static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, - unsigned long delta_exec); +static __always_inline +void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec); /************************************************************** * Scheduling class tree data structure manipulation methods: @@ -1162,7 +1162,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) __clear_buddies_skip(se); } -static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); +static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); static void dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) @@ -1546,8 +1546,8 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, resched_task(rq_of(cfs_rq)->curr); } -static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, - unsigned long delta_exec) +static __always_inline +void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) { if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled) return; @@ -2073,11 +2073,11 @@ void unthrottle_offline_cfs_rqs(struct rq *rq) } #else /* CONFIG_CFS_BANDWIDTH */ -static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, - unsigned long delta_exec) {} +static __always_inline +void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {} static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} -static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} +static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) { diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index b60dad720173..44af55e6d5d0 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1428,7 +1428,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) next_idx: if (idx >= MAX_RT_PRIO) continue; - if (next && next->prio < idx) + if (next && next->prio <= idx) continue; list_for_each_entry(rt_se, array->queue + idx, run_list) { struct task_struct *p;