Merge branch 'clocksource' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into timers/core

Pull clocksource watchdog updates from Paul McKenney:

 - Avoid accidental unstable marking of clocksources by rejecting
   clocksource measurements where the source of the skew is the delay
   reading reference clocksource itself.  This change avoids many of the
   current false positives caused by epic cache-thrashing workloads.

 - Reduce the default clocksource_watchdog() retries to 2, thus offsetting
   the increased overhead due to #1 above rereading the reference
   clocksource.

Link: https://lore.kernel.org/lkml/20220105001723.GA536708@paulmck-ThinkPad-P17-Gen-1
This commit is contained in:
Thomas Gleixner 2022-01-10 13:57:17 +01:00
commit 35e13e9da9
7 changed files with 49 additions and 12 deletions

View file

@ -603,8 +603,8 @@
clocksource.max_cswd_read_retries= [KNL]
Number of clocksource_watchdog() retries due to
external delays before the clock will be marked
unstable. Defaults to three retries, that is,
four attempts to read the clock under test.
unstable. Defaults to two retries, that is,
three attempts to read the clock under test.
clocksource.verify_n_cpus= [KNL]
Limit the number of CPUs checked for clocksources

View file

@ -107,7 +107,7 @@ static u64 suspend_start;
* This delay could be due to SMIs, NMIs, or to VCPU preemptions. Used as
* a lower bound for cs->uncertainty_margin values when registering clocks.
*/
#define WATCHDOG_MAX_SKEW (50 * NSEC_PER_USEC)
#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
static void clocksource_watchdog_work(struct work_struct *work);
@ -199,23 +199,30 @@ void clocksource_mark_unstable(struct clocksource *cs)
spin_unlock_irqrestore(&watchdog_lock, flags);
}
ulong max_cswd_read_retries = 3;
ulong max_cswd_read_retries = 2;
module_param(max_cswd_read_retries, ulong, 0644);
EXPORT_SYMBOL_GPL(max_cswd_read_retries);
static int verify_n_cpus = 8;
module_param(verify_n_cpus, int, 0644);
static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
enum wd_read_status {
WD_READ_SUCCESS,
WD_READ_UNSTABLE,
WD_READ_SKIP
};
static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
{
unsigned int nretries;
u64 wd_end, wd_delta;
int64_t wd_delay;
u64 wd_end, wd_end2, wd_delta;
int64_t wd_delay, wd_seq_delay;
for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
local_irq_disable();
*wdnow = watchdog->read(watchdog);
*csnow = cs->read(cs);
wd_end = watchdog->read(watchdog);
wd_end2 = watchdog->read(watchdog);
local_irq_enable();
wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
@ -226,13 +233,34 @@ static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
smp_processor_id(), watchdog->name, nretries);
}
return true;
return WD_READ_SUCCESS;
}
/*
* Now compute delay in consecutive watchdog read to see if
* there is too much external interferences that cause
* significant delay in reading both clocksource and watchdog.
*
* If consecutive WD read-back delay > WATCHDOG_MAX_SKEW/2,
* report system busy, reinit the watchdog and skip the current
* watchdog test.
*/
wd_delta = clocksource_delta(wd_end2, wd_end, watchdog->mask);
wd_seq_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift);
if (wd_seq_delay > WATCHDOG_MAX_SKEW/2)
goto skip_test;
}
pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n",
smp_processor_id(), watchdog->name, wd_delay, nretries);
return false;
return WD_READ_UNSTABLE;
skip_test:
pr_info("timekeeping watchdog on CPU%d: %s wd-wd read-back delay of %lldns\n",
smp_processor_id(), watchdog->name, wd_seq_delay);
pr_info("wd-%s-wd read-back delay of %lldns, clock-skew test skipped!\n",
cs->name, wd_delay);
return WD_READ_SKIP;
}
static u64 csnow_mid;
@ -356,6 +384,7 @@ static void clocksource_watchdog(struct timer_list *unused)
int next_cpu, reset_pending;
int64_t wd_nsec, cs_nsec;
struct clocksource *cs;
enum wd_read_status read_ret;
u32 md;
spin_lock(&watchdog_lock);
@ -373,9 +402,12 @@ static void clocksource_watchdog(struct timer_list *unused)
continue;
}
if (!cs_watchdog_read(cs, &csnow, &wdnow)) {
/* Clock readout unreliable, so give it up. */
__clocksource_unstable(cs);
read_ret = cs_watchdog_read(cs, &csnow, &wdnow);
if (read_ret != WD_READ_SUCCESS) {
if (read_ret == WD_READ_UNSTABLE)
/* Clock readout unreliable, so give it up. */
__clocksource_unstable(cs);
continue;
}

View file

@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_SRCU=y
CONFIG_RCU_TRACE=n
CONFIG_DEBUG_LOCK_ALLOC=y

View file

@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_SRCU=y
CONFIG_RCU_TRACE=n
CONFIG_DEBUG_LOCK_ALLOC=n

View file

@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y

View file

@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_RCU=y
CONFIG_HZ_PERIODIC=y
CONFIG_NO_HZ_IDLE=n

View file

@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y