Merge branch 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 timer updates from Thomas Gleixner:
 "These updates are related to TSC handling:

   - Support platforms which have synchronized TSCs but the boot CPU has
     a non zero TSC_ADJUST value, which is considered a firmware bug on
     normal systems.

     This applies to HPE/SGI UV platforms where the platform firmware
     uses TSC_ADJUST to ensure TSC synchronization across a huge number
     of sockets, but due to power on timings the boot CPU cannot be
     guaranteed to have a zero TSC_ADJUST register value.

   - Fix the ordering of udelay calibration and kvmclock_init()

   - Cleanup the udelay and calibration code"

* 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/tsc: Mark cyc2ns_init() and detect_art() __init
  x86/platform/UV: Mark tsc_check_sync as an init function
  x86/tsc: Make CONFIG_X86_TSC=n build work again
  x86/platform/UV: Add check of TSC state set by UV BIOS
  x86/tsc: Provide a means to disable TSC ART
  x86/tsc: Drastically reduce the number of firmware bug warnings
  x86/tsc: Skip TSC test and error messages if already unstable
  x86/tsc: Add option that TSC on Socket 0 being non-zero is valid
  x86/timers: Move simple_udelay_calibration() past kvmclock_init()
  x86/timers: Make recalibrate_cpu_khz() void
  x86/timers: Move the simple udelay calibration to tsc.h
This commit is contained in:
Linus Torvalds 2017-11-13 19:07:38 -08:00
commit 99306dfc06
7 changed files with 148 additions and 51 deletions

View File

@ -9,7 +9,7 @@
#define TICK_SIZE (tick_nsec / 1000)
unsigned long long native_sched_clock(void);
extern int recalibrate_cpu_khz(void);
extern void recalibrate_cpu_khz(void);
extern int no_timer_check;

View File

@ -32,15 +32,22 @@ static inline cycles_t get_cycles(void)
extern struct system_counterval_t convert_art_to_tsc(u64 art);
extern void tsc_early_delay_calibrate(void);
extern void tsc_init(void);
extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void);
extern void mark_tsc_async_resets(char *reason);
extern unsigned long native_calibrate_cpu(void);
extern unsigned long native_calibrate_tsc(void);
extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
extern int tsc_clocksource_reliable;
#ifdef CONFIG_X86_TSC
extern bool tsc_async_resets;
#else
# define tsc_async_resets false
#endif
/*
* Boot-time check whether the TSCs are synchronized across

View File

@ -776,23 +776,36 @@ static inline int uv_num_possible_blades(void)
extern void uv_nmi_setup(void);
extern void uv_nmi_setup_hubless(void);
/* BIOS/Kernel flags exchange MMR */
#define UVH_BIOS_KERNEL_MMR UVH_SCRATCH5
#define UVH_BIOS_KERNEL_MMR_ALIAS UVH_SCRATCH5_ALIAS
#define UVH_BIOS_KERNEL_MMR_ALIAS_2 UVH_SCRATCH5_ALIAS_2
/* TSC sync valid, set by BIOS */
#define UVH_TSC_SYNC_MMR UVH_BIOS_KERNEL_MMR
#define UVH_TSC_SYNC_SHIFT 10
#define UVH_TSC_SYNC_SHIFT_UV2K 16 /* UV2/3k have different bits */
#define UVH_TSC_SYNC_MASK 3 /* 0011 */
#define UVH_TSC_SYNC_VALID 3 /* 0011 */
#define UVH_TSC_SYNC_INVALID 2 /* 0010 */
/* BMC sets a bit this MMR non-zero before sending an NMI */
#define UVH_NMI_MMR UVH_SCRATCH5
#define UVH_NMI_MMR_CLEAR UVH_SCRATCH5_ALIAS
#define UVH_NMI_MMR UVH_BIOS_KERNEL_MMR
#define UVH_NMI_MMR_CLEAR UVH_BIOS_KERNEL_MMR_ALIAS
#define UVH_NMI_MMR_SHIFT 63
#define UVH_NMI_MMR_TYPE "SCRATCH5"
#define UVH_NMI_MMR_TYPE "SCRATCH5"
/* Newer SMM NMI handler, not present in all systems */
#define UVH_NMI_MMRX UVH_EVENT_OCCURRED0
#define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS
#define UVH_NMI_MMRX_SHIFT UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT
#define UVH_NMI_MMRX_TYPE "EXTIO_INT0"
#define UVH_NMI_MMRX_TYPE "EXTIO_INT0"
/* Non-zero indicates newer SMM NMI handler present */
#define UVH_NMI_MMRX_SUPPORTED UVH_EXTIO_INT0_BROADCAST
/* Indicates to BIOS that we want to use the newer SMM NMI handler */
#define UVH_NMI_MMRX_REQ UVH_SCRATCH5_ALIAS_2
#define UVH_NMI_MMRX_REQ UVH_BIOS_KERNEL_MMR_ALIAS_2
#define UVH_NMI_MMRX_REQ_SHIFT 62
struct uv_hub_nmi_s {

View File

@ -154,6 +154,48 @@ static int __init early_get_pnodeid(void)
return pnode;
}
static void __init uv_tsc_check_sync(void)
{
u64 mmr;
int sync_state;
int mmr_shift;
char *state;
bool valid;
/* Accommodate different UV arch BIOSes */
mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR);
mmr_shift =
is_uv1_hub() ? 0 :
is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT;
if (mmr_shift)
sync_state = (mmr >> mmr_shift) & UVH_TSC_SYNC_MASK;
else
sync_state = 0;
switch (sync_state) {
case UVH_TSC_SYNC_VALID:
state = "in sync";
valid = true;
break;
case UVH_TSC_SYNC_INVALID:
state = "unstable";
valid = false;
break;
default:
state = "unknown: assuming valid";
valid = true;
break;
}
pr_info("UV: TSC sync state from BIOS:0%d(%s)\n", sync_state, state);
/* Mark flag that says TSC != 0 is valid for socket 0 */
if (valid)
mark_tsc_async_resets("UV BIOS");
else
mark_tsc_unstable("UV BIOS");
}
/* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */
#define SMT_LEVEL 0 /* Leaf 0xb SMT level */
@ -288,6 +330,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
}
pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", oem_id, oem_table_id, uv_system_type, uv_min_hub_revision_id, uv_apic);
uv_tsc_check_sync();
return uv_apic;

View File

@ -812,26 +812,6 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
return 0;
}
static void __init simple_udelay_calibration(void)
{
unsigned int tsc_khz, cpu_khz;
unsigned long lpj;
if (!boot_cpu_has(X86_FEATURE_TSC))
return;
cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc();
tsc_khz = tsc_khz ? : cpu_khz;
if (!tsc_khz)
return;
lpj = tsc_khz * 1000;
do_div(lpj, HZ);
loops_per_jiffy = lpj;
}
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@ -1039,8 +1019,6 @@ void __init setup_arch(char **cmdline_p)
*/
init_hypervisor_platform();
simple_udelay_calibration();
x86_init.resources.probe_roms();
/* after parse_early_param, so could debug it */
@ -1125,9 +1103,6 @@ void __init setup_arch(char **cmdline_p)
memblock_set_current_limit(ISA_END_ADDRESS);
e820__memblock_setup();
if (!early_xdbc_setup_hardware())
early_xdbc_register_console();
reserve_bios_regions();
if (efi_enabled(EFI_MEMMAP)) {
@ -1233,6 +1208,10 @@ void __init setup_arch(char **cmdline_p)
kvmclock_init();
#endif
tsc_early_delay_calibrate();
if (!early_xdbc_setup_hardware())
early_xdbc_register_console();
x86_init.paging.pagetable_init();
kasan_init();

View File

@ -112,7 +112,7 @@ static void cyc2ns_data_init(struct cyc2ns_data *data)
data->cyc2ns_offset = 0;
}
static void cyc2ns_init(int cpu)
static void __init cyc2ns_init(int cpu)
{
struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
@ -812,13 +812,13 @@ unsigned long native_calibrate_cpu(void)
return tsc_pit_min;
}
int recalibrate_cpu_khz(void)
void recalibrate_cpu_khz(void)
{
#ifndef CONFIG_SMP
unsigned long cpu_khz_old = cpu_khz;
if (!boot_cpu_has(X86_FEATURE_TSC))
return -ENODEV;
return;
cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc();
@ -828,10 +828,6 @@ int recalibrate_cpu_khz(void)
cpu_khz = tsc_khz;
cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
cpu_khz_old, cpu_khz);
return 0;
#else
return -ENODEV;
#endif
}
@ -959,17 +955,21 @@ core_initcall(cpufreq_register_tsc_scaling);
/*
* If ART is present detect the numerator:denominator to convert to TSC
*/
static void detect_art(void)
static void __init detect_art(void)
{
unsigned int unused[2];
if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
return;
/* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required */
/*
* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required,
* and the TSC counter resets must not occur asynchronously.
*/
if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
!boot_cpu_has(X86_FEATURE_TSC_ADJUST) ||
tsc_async_resets)
return;
cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
@ -1263,6 +1263,25 @@ static int __init init_tsc_clocksource(void)
*/
device_initcall(init_tsc_clocksource);
void __init tsc_early_delay_calibrate(void)
{
unsigned long lpj;
if (!boot_cpu_has(X86_FEATURE_TSC))
return;
cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc();
tsc_khz = tsc_khz ? : cpu_khz;
if (!tsc_khz)
return;
lpj = tsc_khz * 1000;
do_div(lpj, HZ);
loops_per_jiffy = lpj;
}
void __init tsc_init(void)
{
u64 lpj, cyc;

View File

@ -31,6 +31,20 @@ struct tsc_adjust {
static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust);
/*
* TSC's on different sockets may be reset asynchronously.
* This may cause the TSC ADJUST value on socket 0 to be NOT 0.
*/
bool __read_mostly tsc_async_resets;
void mark_tsc_async_resets(char *reason)
{
if (tsc_async_resets)
return;
tsc_async_resets = true;
pr_info("tsc: Marking TSC async resets true due to %s\n", reason);
}
void tsc_verify_tsc_adjust(bool resume)
{
struct tsc_adjust *adj = this_cpu_ptr(&tsc_adjust);
@ -39,6 +53,10 @@ void tsc_verify_tsc_adjust(bool resume)
if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
return;
/* Skip unnecessary error messages if TSC already unstable */
if (check_tsc_unstable())
return;
/* Rate limit the MSR check */
if (!resume && time_before(jiffies, adj->nextcheck))
return;
@ -72,12 +90,22 @@ static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
* non zero. We don't do that on non boot cpus because physical
* hotplug should have set the ADJUST register to a value > 0 so
* the TSC is in sync with the already running cpus.
*
* Also don't force the ADJUST value to zero if that is a valid value
* for socket 0 as determined by the system arch. This is required
* when multiple sockets are reset asynchronously with each other
* and socket 0 may not have an TSC ADJUST value of 0.
*/
if (bootcpu && bootval != 0) {
pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n", cpu,
bootval);
wrmsrl(MSR_IA32_TSC_ADJUST, 0);
bootval = 0;
if (likely(!tsc_async_resets)) {
pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n",
cpu, bootval);
wrmsrl(MSR_IA32_TSC_ADJUST, 0);
bootval = 0;
} else {
pr_info("TSC ADJUST: CPU%u: %lld NOT forced to 0\n",
cpu, bootval);
}
}
cur->adjusted = bootval;
}
@ -91,6 +119,10 @@ bool __init tsc_store_and_check_tsc_adjust(bool bootcpu)
if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
return false;
/* Skip unnecessary error messages if TSC already unstable */
if (check_tsc_unstable())
return false;
rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
cur->bootval = bootval;
cur->nextcheck = jiffies + HZ;
@ -118,6 +150,13 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
cur->nextcheck = jiffies + HZ;
cur->warned = false;
/*
* If a non-zero TSC value for socket 0 may be valid then the default
* adjusted value cannot assumed to be zero either.
*/
if (tsc_async_resets)
cur->adjusted = bootval;
/*
* Check whether this CPU is the first in a package to come up. In
* this case do not check the boot value against another package
@ -139,10 +178,9 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
* Compare the boot value and complain if it differs in the
* package.
*/
if (bootval != ref->bootval) {
pr_warn(FW_BUG "TSC ADJUST differs: Reference CPU%u: %lld CPU%u: %lld\n",
refcpu, ref->bootval, cpu, bootval);
}
if (bootval != ref->bootval)
printk_once(FW_BUG "TSC ADJUST differs within socket(s), fixing all errors\n");
/*
* The TSC_ADJUST values in a package must be the same. If the boot
* value on this newly upcoming CPU differs from the adjustment
@ -150,8 +188,6 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
* adjusted value.
*/
if (bootval != ref->adjusted) {
pr_warn("TSC ADJUST synchronize: Reference CPU%u: %lld CPU%u: %lld\n",
refcpu, ref->adjusted, cpu, bootval);
cur->adjusted = ref->adjusted;
wrmsrl(MSR_IA32_TSC_ADJUST, ref->adjusted);
}