diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d2338927773a..a129b34b8206 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -102,9 +102,12 @@ void update_rq_clock(struct rq *rq) lockdep_assert_held(&rq->lock); - if (rq->clock_skip_update & RQCF_ACT_SKIP) + if (rq->clock_update_flags & RQCF_ACT_SKIP) return; +#ifdef CONFIG_SCHED_DEBUG + rq->clock_update_flags |= RQCF_UPDATED; +#endif delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; if (delta < 0) return; @@ -2889,7 +2892,7 @@ context_switch(struct rq *rq, struct task_struct *prev, rq->prev_mm = oldmm; } - rq->clock_skip_update = 0; + rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); /* * Since the runqueue lock will be released by the next @@ -3364,7 +3367,7 @@ static void __sched notrace __schedule(bool preempt) raw_spin_lock(&rq->lock); rq_pin_lock(rq, &rf); - rq->clock_skip_update <<= 1; /* promote REQ to ACT */ + rq->clock_update_flags <<= 1; /* promote REQ to ACT */ switch_count = &prev->nivcsw; if (!preempt && prev->state) { @@ -3405,7 +3408,7 @@ static void __sched notrace __schedule(bool preempt) trace_sched_switch(preempt, prev, next); rq = context_switch(rq, prev, next, &rf); /* unlocks the rq */ } else { - rq->clock_skip_update = 0; + rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); rq_unpin_lock(rq, &rf); raw_spin_unlock_irq(&rq->lock); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 98e7eee07237..6eeae7ebd99b 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -644,7 +644,7 @@ struct rq { unsigned long next_balance; struct mm_struct *prev_mm; - unsigned int clock_skip_update; + unsigned int clock_update_flags; u64 clock; u64 clock_task; @@ -768,48 +768,110 @@ static inline u64 __rq_clock_broken(struct rq *rq) return READ_ONCE(rq->clock); } +/* + * rq::clock_update_flags bits + * + * %RQCF_REQ_SKIP - will request skipping of clock update on the next + * call to __schedule(). This is an optimisation to avoid + * neighbouring rq clock updates. + * + * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is + * in effect and calls to update_rq_clock() are being ignored. + * + * %RQCF_UPDATED - is a debug flag that indicates whether a call has been + * made to update_rq_clock() since the last time rq::lock was pinned. + * + * If inside of __schedule(), clock_update_flags will have been + * shifted left (a left shift is a cheap operation for the fast path + * to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use, + * + * if (rq-clock_update_flags >= RQCF_UPDATED) + * + * to check if %RQCF_UPADTED is set. It'll never be shifted more than + * one position though, because the next rq_unpin_lock() will shift it + * back. + */ +#define RQCF_REQ_SKIP 0x01 +#define RQCF_ACT_SKIP 0x02 +#define RQCF_UPDATED 0x04 + +static inline void assert_clock_updated(struct rq *rq) +{ + /* + * The only reason for not seeing a clock update since the + * last rq_pin_lock() is if we're currently skipping updates. + */ + SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP); +} + static inline u64 rq_clock(struct rq *rq) { lockdep_assert_held(&rq->lock); + assert_clock_updated(rq); + return rq->clock; } static inline u64 rq_clock_task(struct rq *rq) { lockdep_assert_held(&rq->lock); + assert_clock_updated(rq); + return rq->clock_task; } -#define RQCF_REQ_SKIP 0x01 -#define RQCF_ACT_SKIP 0x02 - static inline void rq_clock_skip_update(struct rq *rq, bool skip) { lockdep_assert_held(&rq->lock); if (skip) - rq->clock_skip_update |= RQCF_REQ_SKIP; + rq->clock_update_flags |= RQCF_REQ_SKIP; else - rq->clock_skip_update &= ~RQCF_REQ_SKIP; + rq->clock_update_flags &= ~RQCF_REQ_SKIP; } struct rq_flags { unsigned long flags; struct pin_cookie cookie; +#ifdef CONFIG_SCHED_DEBUG + /* + * A copy of (rq::clock_update_flags & RQCF_UPDATED) for the + * current pin context is stashed here in case it needs to be + * restored in rq_repin_lock(). + */ + unsigned int clock_update_flags; +#endif }; static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) { rf->cookie = lockdep_pin_lock(&rq->lock); + +#ifdef CONFIG_SCHED_DEBUG + rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); + rf->clock_update_flags = 0; +#endif } static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf) { +#ifdef CONFIG_SCHED_DEBUG + if (rq->clock_update_flags > RQCF_ACT_SKIP) + rf->clock_update_flags = RQCF_UPDATED; +#endif + lockdep_unpin_lock(&rq->lock, rf->cookie); } static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf) { lockdep_repin_lock(&rq->lock, rf->cookie); + +#ifdef CONFIG_SCHED_DEBUG + /* + * Restore the value we stashed in @rf for this pin context. + */ + rq->clock_update_flags |= rf->clock_update_flags; +#endif } #ifdef CONFIG_NUMA