mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-08-27 03:10:12 +00:00
Scheduler fixes/updates:
- Deduplicate the average computations in the scheduler core and the fair class code. - Fix a raise between runtime distribution and assignement which can cause exceeding the quota by up to 70%. - Prevent negative results in the imbalanace calculation - Remove a stale warning in the workqueue code which can be triggered since the call site was moved out of preempt disabled code. It's a false positive. - Deduplicate the print macros for procfs - Add the ucmap values to the SCHED_DEBUG procfs output for completness -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAl6TFEoTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoRoPD/9cTERNZb/aT/SVD3TntBs5bMlnRR2U KsyoN7w2bmYGY6XbKdjWX+KorPyJ9YRcUpOfmH+6JmMioM5RrgCtAQ4cXP4q89k3 6+hdHu+WAhp2AyKr5LAYZK+NYdq0fy/9xx7fdXNri8/QakQCy1vu/u6iiKRMmrEf R7zB7v4ddTOTKamUuGBNnmM4GwfrD7td9NksfjDqV4yJ2ZkaQ+apWAPzJK8ixfEa TGjVnnUZA82tZRZ+O4RDN2AVgG0CuXYRYPWRfqi3XgQ3d0+ju5mM7q+6TQeEUuAq 19IoscLhmYGb9yYCY5p0j1AUkyr6FcSFr1SJt/8Jxpyw4JsKw+ANFBA3kkzEBZ+h 0VcWjUw4S6A6+0HdrqUQYjr5tl7RCY+hOE+QQ/Xvrm4PpWi0L+1tB+kgR7VKNPO8 Xqfp/CK7Rcgh2/nUHT4uxdwh4ZNsLo39QGTuahyPXzeRVJTEGUjbzktnEUVc9wmd OfjpC0Q2DBdXx+vnzH82flv/rtUUYor/Owhpq92E6d50Z7CjTa6xRNbmQUiVKls8 jqehpRBUg5cB3TI0BKeb5+kzgeGDGpm7MfZbvSwvoL0stdJdzNwwc4nhOLjPkJFK R5m7cg82Kx+r/00Vb7k6WgYZ10WTS2Il1OKbgwHl5uTlaKuY7TPEIRVuJQV/XUSG 88n4jIwpRbLMNw== =BVqY -----END PGP SIGNATURE----- Merge tag 'sched-urgent-2020-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler fixes/updates from Thomas Gleixner: - Deduplicate the average computations in the scheduler core and the fair class code. - Fix a raise between runtime distribution and assignement which can cause exceeding the quota by up to 70%. - Prevent negative results in the imbalanace calculation - Remove a stale warning in the workqueue code which can be triggered since the call site was moved out of preempt disabled code. It's a false positive. - Deduplicate the print macros for procfs - Add the ucmap values to the SCHED_DEBUG procfs output for completness * tag 'sched-urgent-2020-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/debug: Add task uclamp values to SCHED_DEBUG procfs sched/debug: Factor out printing formats into common macros sched/debug: Remove redundant macro define sched/core: Remove unused rq::last_load_update_tick workqueue: Remove the warning in wq_worker_sleeping() sched/fair: Fix negative imbalance in imbalance calculation sched/fair: Fix race between runtime distribution and assignment sched/fair: Align rq->avg_idle and rq->avg_scan_cost
This commit is contained in:
commit
590680d139
5 changed files with 51 additions and 62 deletions
|
@ -2119,12 +2119,6 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
|
||||||
return cpu;
|
return cpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_avg(u64 *avg, u64 sample)
|
|
||||||
{
|
|
||||||
s64 diff = sample - *avg;
|
|
||||||
*avg += diff >> 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
void sched_set_stop_task(int cpu, struct task_struct *stop)
|
void sched_set_stop_task(int cpu, struct task_struct *stop)
|
||||||
{
|
{
|
||||||
struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
|
struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
|
||||||
|
@ -4126,7 +4120,8 @@ static inline void sched_submit_work(struct task_struct *tsk)
|
||||||
* it wants to wake up a task to maintain concurrency.
|
* it wants to wake up a task to maintain concurrency.
|
||||||
* As this function is called inside the schedule() context,
|
* As this function is called inside the schedule() context,
|
||||||
* we disable preemption to avoid it calling schedule() again
|
* we disable preemption to avoid it calling schedule() again
|
||||||
* in the possible wakeup of a kworker.
|
* in the possible wakeup of a kworker and because wq_worker_sleeping()
|
||||||
|
* requires it.
|
||||||
*/
|
*/
|
||||||
if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
|
if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
|
@ -6699,7 +6694,6 @@ void __init sched_init(void)
|
||||||
|
|
||||||
rq_attach_root(rq, &def_root_domain);
|
rq_attach_root(rq, &def_root_domain);
|
||||||
#ifdef CONFIG_NO_HZ_COMMON
|
#ifdef CONFIG_NO_HZ_COMMON
|
||||||
rq->last_load_update_tick = jiffies;
|
|
||||||
rq->last_blocked_load_update_tick = jiffies;
|
rq->last_blocked_load_update_tick = jiffies;
|
||||||
atomic_set(&rq->nohz_flags, 0);
|
atomic_set(&rq->nohz_flags, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -816,10 +816,12 @@ static int __init init_sched_debug_procfs(void)
|
||||||
|
|
||||||
__initcall(init_sched_debug_procfs);
|
__initcall(init_sched_debug_procfs);
|
||||||
|
|
||||||
#define __P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
|
#define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
|
||||||
#define P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
|
#define __P(F) __PS(#F, F)
|
||||||
#define __PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
|
#define P(F) __PS(#F, p->F)
|
||||||
#define PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
|
#define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
|
||||||
|
#define __PN(F) __PSN(#F, F)
|
||||||
|
#define PN(F) __PSN(#F, p->F)
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA_BALANCING
|
#ifdef CONFIG_NUMA_BALANCING
|
||||||
|
@ -868,18 +870,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||||
SEQ_printf(m,
|
SEQ_printf(m,
|
||||||
"---------------------------------------------------------"
|
"---------------------------------------------------------"
|
||||||
"----------\n");
|
"----------\n");
|
||||||
#define __P(F) \
|
|
||||||
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
|
#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->F))
|
||||||
#define P(F) \
|
#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F))
|
||||||
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
|
|
||||||
#define P_SCHEDSTAT(F) \
|
|
||||||
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
|
|
||||||
#define __PN(F) \
|
|
||||||
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
|
|
||||||
#define PN(F) \
|
|
||||||
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
|
|
||||||
#define PN_SCHEDSTAT(F) \
|
|
||||||
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
|
|
||||||
|
|
||||||
PN(se.exec_start);
|
PN(se.exec_start);
|
||||||
PN(se.vruntime);
|
PN(se.vruntime);
|
||||||
|
@ -939,10 +932,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||||
}
|
}
|
||||||
|
|
||||||
__P(nr_switches);
|
__P(nr_switches);
|
||||||
SEQ_printf(m, "%-45s:%21Ld\n",
|
__PS("nr_voluntary_switches", p->nvcsw);
|
||||||
"nr_voluntary_switches", (long long)p->nvcsw);
|
__PS("nr_involuntary_switches", p->nivcsw);
|
||||||
SEQ_printf(m, "%-45s:%21Ld\n",
|
|
||||||
"nr_involuntary_switches", (long long)p->nivcsw);
|
|
||||||
|
|
||||||
P(se.load.weight);
|
P(se.load.weight);
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
@ -955,6 +946,12 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||||
P(se.avg.last_update_time);
|
P(se.avg.last_update_time);
|
||||||
P(se.avg.util_est.ewma);
|
P(se.avg.util_est.ewma);
|
||||||
P(se.avg.util_est.enqueued);
|
P(se.avg.util_est.enqueued);
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_UCLAMP_TASK
|
||||||
|
__PS("uclamp.min", p->uclamp[UCLAMP_MIN].value);
|
||||||
|
__PS("uclamp.max", p->uclamp[UCLAMP_MAX].value);
|
||||||
|
__PS("effective uclamp.min", uclamp_eff_value(p, UCLAMP_MIN));
|
||||||
|
__PS("effective uclamp.max", uclamp_eff_value(p, UCLAMP_MAX));
|
||||||
#endif
|
#endif
|
||||||
P(policy);
|
P(policy);
|
||||||
P(prio);
|
P(prio);
|
||||||
|
@ -963,11 +960,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||||
P(dl.deadline);
|
P(dl.deadline);
|
||||||
}
|
}
|
||||||
#undef PN_SCHEDSTAT
|
#undef PN_SCHEDSTAT
|
||||||
#undef PN
|
|
||||||
#undef __PN
|
|
||||||
#undef P_SCHEDSTAT
|
#undef P_SCHEDSTAT
|
||||||
#undef P
|
|
||||||
#undef __P
|
|
||||||
|
|
||||||
{
|
{
|
||||||
unsigned int this_cpu = raw_smp_processor_id();
|
unsigned int this_cpu = raw_smp_processor_id();
|
||||||
|
@ -975,8 +968,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||||
|
|
||||||
t0 = cpu_clock(this_cpu);
|
t0 = cpu_clock(this_cpu);
|
||||||
t1 = cpu_clock(this_cpu);
|
t1 = cpu_clock(this_cpu);
|
||||||
SEQ_printf(m, "%-45s:%21Ld\n",
|
__PS("clock-delta", t1-t0);
|
||||||
"clock-delta", (long long)(t1-t0));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sched_show_numa(p, m);
|
sched_show_numa(p, m);
|
||||||
|
|
|
@ -4836,11 +4836,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||||
resched_curr(rq);
|
resched_curr(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
|
static void distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
|
||||||
{
|
{
|
||||||
struct cfs_rq *cfs_rq;
|
struct cfs_rq *cfs_rq;
|
||||||
u64 runtime;
|
u64 runtime, remaining = 1;
|
||||||
u64 starting_runtime = remaining;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
|
list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
|
||||||
|
@ -4855,10 +4854,13 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
|
||||||
/* By the above check, this should never be true */
|
/* By the above check, this should never be true */
|
||||||
SCHED_WARN_ON(cfs_rq->runtime_remaining > 0);
|
SCHED_WARN_ON(cfs_rq->runtime_remaining > 0);
|
||||||
|
|
||||||
|
raw_spin_lock(&cfs_b->lock);
|
||||||
runtime = -cfs_rq->runtime_remaining + 1;
|
runtime = -cfs_rq->runtime_remaining + 1;
|
||||||
if (runtime > remaining)
|
if (runtime > cfs_b->runtime)
|
||||||
runtime = remaining;
|
runtime = cfs_b->runtime;
|
||||||
remaining -= runtime;
|
cfs_b->runtime -= runtime;
|
||||||
|
remaining = cfs_b->runtime;
|
||||||
|
raw_spin_unlock(&cfs_b->lock);
|
||||||
|
|
||||||
cfs_rq->runtime_remaining += runtime;
|
cfs_rq->runtime_remaining += runtime;
|
||||||
|
|
||||||
|
@ -4873,8 +4875,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
return starting_runtime - remaining;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -4885,7 +4885,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
|
||||||
*/
|
*/
|
||||||
static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)
|
static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)
|
||||||
{
|
{
|
||||||
u64 runtime;
|
|
||||||
int throttled;
|
int throttled;
|
||||||
|
|
||||||
/* no need to continue the timer with no bandwidth constraint */
|
/* no need to continue the timer with no bandwidth constraint */
|
||||||
|
@ -4914,24 +4913,17 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, u
|
||||||
cfs_b->nr_throttled += overrun;
|
cfs_b->nr_throttled += overrun;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This check is repeated as we are holding onto the new bandwidth while
|
* This check is repeated as we release cfs_b->lock while we unthrottle.
|
||||||
* we unthrottle. This can potentially race with an unthrottled group
|
|
||||||
* trying to acquire new bandwidth from the global pool. This can result
|
|
||||||
* in us over-using our runtime if it is all used during this loop, but
|
|
||||||
* only by limited amounts in that extreme case.
|
|
||||||
*/
|
*/
|
||||||
while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
|
while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
|
||||||
runtime = cfs_b->runtime;
|
|
||||||
cfs_b->distribute_running = 1;
|
cfs_b->distribute_running = 1;
|
||||||
raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||||
/* we can't nest cfs_b->lock while distributing bandwidth */
|
/* we can't nest cfs_b->lock while distributing bandwidth */
|
||||||
runtime = distribute_cfs_runtime(cfs_b, runtime);
|
distribute_cfs_runtime(cfs_b);
|
||||||
raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
||||||
|
|
||||||
cfs_b->distribute_running = 0;
|
cfs_b->distribute_running = 0;
|
||||||
throttled = !list_empty(&cfs_b->throttled_cfs_rq);
|
throttled = !list_empty(&cfs_b->throttled_cfs_rq);
|
||||||
|
|
||||||
lsub_positive(&cfs_b->runtime, runtime);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -5065,10 +5057,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
|
||||||
if (!runtime)
|
if (!runtime)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
runtime = distribute_cfs_runtime(cfs_b, runtime);
|
distribute_cfs_runtime(cfs_b);
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
||||||
lsub_positive(&cfs_b->runtime, runtime);
|
|
||||||
cfs_b->distribute_running = 0;
|
cfs_b->distribute_running = 0;
|
||||||
raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||||
}
|
}
|
||||||
|
@ -6080,8 +6071,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
||||||
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
|
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
|
||||||
struct sched_domain *this_sd;
|
struct sched_domain *this_sd;
|
||||||
u64 avg_cost, avg_idle;
|
u64 avg_cost, avg_idle;
|
||||||
u64 time, cost;
|
u64 time;
|
||||||
s64 delta;
|
|
||||||
int this = smp_processor_id();
|
int this = smp_processor_id();
|
||||||
int cpu, nr = INT_MAX;
|
int cpu, nr = INT_MAX;
|
||||||
|
|
||||||
|
@ -6119,9 +6109,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
||||||
}
|
}
|
||||||
|
|
||||||
time = cpu_clock(this) - time;
|
time = cpu_clock(this) - time;
|
||||||
cost = this_sd->avg_scan_cost;
|
update_avg(&this_sd->avg_scan_cost, time);
|
||||||
delta = (s64)(time - cost) / 8;
|
|
||||||
this_sd->avg_scan_cost += delta;
|
|
||||||
|
|
||||||
return cpu;
|
return cpu;
|
||||||
}
|
}
|
||||||
|
@ -9048,6 +9036,14 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||||
|
|
||||||
sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
|
sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
|
||||||
sds->total_capacity;
|
sds->total_capacity;
|
||||||
|
/*
|
||||||
|
* If the local group is more loaded than the selected
|
||||||
|
* busiest group don't try to pull any tasks.
|
||||||
|
*/
|
||||||
|
if (local->avg_load >= busiest->avg_load) {
|
||||||
|
env->imbalance = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -195,6 +195,12 @@ static inline int task_has_dl_policy(struct task_struct *p)
|
||||||
|
|
||||||
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
|
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
|
||||||
|
|
||||||
|
static inline void update_avg(u64 *avg, u64 sample)
|
||||||
|
{
|
||||||
|
s64 diff = sample - *avg;
|
||||||
|
*avg += diff / 8;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* !! For sched_setattr_nocheck() (kernel) only !!
|
* !! For sched_setattr_nocheck() (kernel) only !!
|
||||||
*
|
*
|
||||||
|
@ -882,7 +888,6 @@ struct rq {
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_NO_HZ_COMMON
|
#ifdef CONFIG_NO_HZ_COMMON
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
unsigned long last_load_update_tick;
|
|
||||||
unsigned long last_blocked_load_update_tick;
|
unsigned long last_blocked_load_update_tick;
|
||||||
unsigned int has_blocked_load;
|
unsigned int has_blocked_load;
|
||||||
#endif /* CONFIG_SMP */
|
#endif /* CONFIG_SMP */
|
||||||
|
|
|
@ -858,7 +858,8 @@ void wq_worker_running(struct task_struct *task)
|
||||||
* @task: task going to sleep
|
* @task: task going to sleep
|
||||||
*
|
*
|
||||||
* This function is called from schedule() when a busy worker is
|
* This function is called from schedule() when a busy worker is
|
||||||
* going to sleep.
|
* going to sleep. Preemption needs to be disabled to protect ->sleeping
|
||||||
|
* assignment.
|
||||||
*/
|
*/
|
||||||
void wq_worker_sleeping(struct task_struct *task)
|
void wq_worker_sleeping(struct task_struct *task)
|
||||||
{
|
{
|
||||||
|
@ -875,7 +876,8 @@ void wq_worker_sleeping(struct task_struct *task)
|
||||||
|
|
||||||
pool = worker->pool;
|
pool = worker->pool;
|
||||||
|
|
||||||
if (WARN_ON_ONCE(worker->sleeping))
|
/* Return if preempted before wq_worker_running() was reached */
|
||||||
|
if (worker->sleeping)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
worker->sleeping = 1;
|
worker->sleeping = 1;
|
||||||
|
|
Loading…
Reference in a new issue