diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 44ee7f6acf7b..82cb2a3f127a 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -303,6 +303,20 @@ config HPET_TIMER as it is off-chip. You can find the HPET spec at . +config X86_PM_TIMER + bool "PM timer" + default y + help + Support the ACPI PM timer for time keeping. This is slow, + but is useful on some chipsets without HPET on systems with more + than one CPU. On a single processor or single socket multi core + system it is normally not required. + When the PM timer is active 64bit vsyscalls are disabled + and should not be enabled (/proc/sys/kernel/vsyscall64 should + not be changed). + The kernel selects the PM timer only as a last resort, so it is + useful to enable just in case. + config HPET_EMULATE_RTC bool "Provide RTC interrupt" depends on HPET_TIMER && RTC=y diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 0a3318e08ab6..5ca4a4598fda 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c new file mode 100644 index 000000000000..feb5f108dd26 --- /dev/null +++ b/arch/x86_64/kernel/pmtimer.c @@ -0,0 +1,101 @@ +/* Ported over from i386 by AK, original copyright was: + * + * (C) Dominik Brodowski 2003 + * + * Driver to use the Power Management Timer (PMTMR) available in some + * southbridges as primary timing source for the Linux kernel. + * + * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, + * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. + * + * This file is licensed under the GPL v2. + * + * Dropped all the hardware bug workarounds for now. Hopefully they + * are not needed on 64bit chipsets. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* The I/O port the PMTMR resides at. + * The location is detected during setup_arch(), + * in arch/i386/kernel/acpi/boot.c */ +u32 pmtmr_ioport; + +/* value of the Power timer at last timer interrupt */ +static u32 offset_delay; +static u32 last_pmtmr_tick; + +#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ + +static inline u32 cyc2us(u32 cycles) +{ + /* The Power Management Timer ticks at 3.579545 ticks per microsecond. + * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] + * + * Even with HZ = 100, delta is at maximum 35796 ticks, so it can + * easily be multiplied with 286 (=0x11E) without having to fear + * u32 overflows. + */ + cycles *= 286; + return (cycles >> 10); +} + +int pmtimer_mark_offset(void) +{ + static int first_run = 1; + unsigned long tsc; + u32 lost; + + u32 tick = inl(pmtmr_ioport); + u32 delta; + + delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK); + + last_pmtmr_tick = tick; + monotonic_base += delta * NSEC_PER_USEC; + + delta += offset_delay; + + lost = delta / (USEC_PER_SEC / HZ); + offset_delay = delta % (USEC_PER_SEC / HZ); + + rdtscll(tsc); + vxtime.last_tsc = tsc - offset_delay * cpu_khz; + + /* don't calculate delay for first run, + or if we've got less then a tick */ + if (first_run || (lost < 1)) { + first_run = 0; + offset_delay = 0; + } + + return lost - 1; +} + +unsigned int do_gettimeoffset_pm(void) +{ + u32 now, offset, delta = 0; + + offset = last_pmtmr_tick; + now = inl(pmtmr_ioport); + delta = (now - offset) & ACPI_PM_MASK; + + return offset_delay + cyc2us(delta); +} + + +static int __init nopmtimer_setup(char *s) +{ + pmtmr_ioport = 0; + return 0; +} + +__setup("nopmtimer", nopmtimer_setup); diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 0e3b60e1ac5b..735b6767c8ed 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include /* for PM timer frequency */ #include #include #include @@ -396,6 +398,10 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; vxtime.last = offset; +#ifdef CONFIG_X86_PM_TIMER + } else if (vxtime.mode == VXTIME_PMTMR) { + lost = pmtimer_mark_offset(); +#endif } else { offset = (((tsc - vxtime.last_tsc) * vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); @@ -898,6 +904,13 @@ void __init time_init(void) hpet_period; cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; +#ifdef CONFIG_X86_PM_TIMER + } else if (pmtmr_ioport) { + vxtime_hz = PM_TIMER_FREQUENCY; + timename = "PM"; + pit_init(); + cpu_khz = pit_calibrate_tsc(); +#endif } else { pit_init(); cpu_khz = pit_calibrate_tsc(); @@ -922,6 +935,27 @@ void __init time_init(void) #endif } +/* + * Make an educated guess if the TSC is trustworthy and synchronized + * over all CPUs. + */ +static __init int unsynchronized_tsc(void) +{ +#ifdef CONFIG_SMP + if (oem_force_hpet_timer()) + return 1; + /* Intel systems are normally all synchronized. Exceptions + are handled in the OEM check above. */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + return 0; + /* All in a single socket - should be synchronized */ + if (cpus_weight(cpu_core_map[0]) == num_online_cpus()) + return 0; +#endif + /* Assume multi socket systems are not synchronized */ + return num_online_cpus() > 1; +} + /* * Decide after all CPUs are booted what mode gettimeofday should use. */ @@ -929,29 +963,23 @@ void __init time_init_gtod(void) { char *timetype; - /* - * AMD systems with more than one CPU don't have fully synchronized - * TSCs. Always use HPET gettimeofday for these, although it is slower. - * Intel SMP systems usually have synchronized TSCs, so use always - * the TSC. - * - * Exceptions: - * IBM Summit2 checked by oem_force_hpet_timer(). - * AMD dual core may also not need HPET. Check me. - * - * Can be turned off with "notsc". - */ - if (num_online_cpus() > 1 && - boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - notsc = 1; - /* Some systems will want to disable TSC and use HPET. */ - if (oem_force_hpet_timer()) + if (unsynchronized_tsc()) notsc = 1; if (vxtime.hpet_address && notsc) { timetype = "HPET"; vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; +#ifdef CONFIG_X86_PM_TIMER + /* Using PM for gettimeofday is quite slow, but we have no other + choice because the TSC is too unreliable on some systems. */ + } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) { + timetype = "PM"; + do_gettimeoffset = do_gettimeoffset_pm; + vxtime.mode = VXTIME_PMTMR; + sysctl_vsyscall = 0; + printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n"); +#endif } else { timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; vxtime.mode = VXTIME_TSC; diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index b4b8dc59663a..1a7541435ef0 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -65,7 +65,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv) usec = (__xtime.tv_nsec / 1000) + (__jiffies - __wall_jiffies) * (1000000 / HZ); - if (__vxtime.mode == VXTIME_TSC) { + if (__vxtime.mode != VXTIME_HPET) { sync_core(); rdtscll(t); if (t < __vxtime.last_tsc) @@ -217,7 +217,6 @@ static int __init vsyscall_init(void) BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); map_vsyscall(); - sysctl_vsyscall = 1; register_sysctl_table(kernel_root_table2, 0); return 0; } diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index d0f8f8b4c394..f2f073642d62 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -30,6 +30,11 @@ extern void ia32_syscall(void); extern void iommu_hole_init(void); extern void time_init_gtod(void); +extern int pmtimer_mark_offset(void); +extern unsigned int do_gettimeoffset_pm(void); +extern u32 pmtmr_ioport; +extern unsigned long long monotonic_base; +extern int sysctl_vsyscall; extern void do_softirq_thunk(void); diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h index b0c8d4339906..2872da23fc7e 100644 --- a/include/asm-x86_64/vsyscall.h +++ b/include/asm-x86_64/vsyscall.h @@ -25,6 +25,7 @@ enum vsyscall_num { #define VXTIME_TSC 1 #define VXTIME_HPET 2 +#define VXTIME_PMTMR 3 struct vxtime_data { long hpet_address; /* HPET base address */ @@ -54,6 +55,8 @@ extern struct timezone sys_tz; extern int sysctl_vsyscall; extern seqlock_t xtime_lock; +extern int sysctl_vsyscall; + #define ARCH_HAVE_XTIME_LOCK 1 #endif /* __KERNEL__ */