Merge branch 'perf/timer' into perf/core

This WIP branch is now ready to be merged.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2015-04-02 13:22:35 +02:00
commit 223aa646d5
116 changed files with 1813 additions and 861 deletions

View file

@ -1186,7 +1186,7 @@ M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/mach-mvebu/
F: drivers/rtc/armada38x-rtc
F: drivers/rtc/rtc-armada38x.c
ARM/Marvell Berlin SoC support
M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
@ -1675,8 +1675,8 @@ F: drivers/misc/eeprom/at24.c
F: include/linux/platform_data/at24.h
ATA OVER ETHERNET (AOE) DRIVER
M: "Ed L. Cashin" <ecashin@coraid.com>
W: http://support.coraid.com/support/linux
M: "Ed L. Cashin" <ed.cashin@acm.org>
W: http://www.openaoe.org/
S: Supported
F: Documentation/aoe/
F: drivers/block/aoe/
@ -3252,6 +3252,13 @@ S: Maintained
F: Documentation/hwmon/dme1737
F: drivers/hwmon/dme1737.c
DMI/SMBIOS SUPPORT
M: Jean Delvare <jdelvare@suse.de>
S: Maintained
F: drivers/firmware/dmi-id.c
F: drivers/firmware/dmi_scan.c
F: include/linux/dmi.h
DOCKING STATION DRIVER
M: Shaohua Li <shaohua.li@intel.com>
L: linux-acpi@vger.kernel.org

View file

@ -103,7 +103,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
/*
* 120000 rough estimate from the calculations in
* __clocksource_updatefreq_scale.
* __clocksource_update_freq_scale.
*/
clocks_calc_mult_shift(&persistent_mult, &persistent_shift,
32768, NSEC_PER_SEC, 120000);

View file

@ -246,14 +246,30 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
__ret; \
})
#define this_cpu_cmpxchg_1(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
#define this_cpu_cmpxchg_2(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
#define this_cpu_cmpxchg_4(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
#define this_cpu_cmpxchg_8(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
#define _protect_cmpxchg_local(pcp, o, n) \
({ \
typeof(*raw_cpu_ptr(&(pcp))) __ret; \
preempt_disable(); \
__ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \
preempt_enable(); \
__ret; \
})
#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \
o1, o2, n1, n2)
#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
({ \
int __ret; \
preempt_disable(); \
__ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \
raw_cpu_ptr(&(ptr2)), \
o1, o2, n1, n2); \
preempt_enable(); \
__ret; \
})
#define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n))
#define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n))

View file

@ -151,6 +151,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
{
unsigned int cpu = smp_processor_id();
/*
* init_mm.pgd does not contain any user mappings and it is always
* active for kernel addresses in TTBR1. Just set the reserved TTBR0.
*/
if (next == &init_mm) {
cpu_set_reserved_ttbr0();
return;
}
if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
check_and_switch_context(next, tsk);
}

View file

@ -204,25 +204,47 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
return ret;
}
#define _percpu_add(pcp, val) \
__percpu_add(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
#define _percpu_read(pcp) \
({ \
typeof(pcp) __retval; \
preempt_disable(); \
__retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), \
sizeof(pcp)); \
preempt_enable(); \
__retval; \
})
#define _percpu_add_return(pcp, val) (typeof(pcp)) (_percpu_add(pcp, val))
#define _percpu_write(pcp, val) \
do { \
preempt_disable(); \
__percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), \
sizeof(pcp)); \
preempt_enable(); \
} while(0) \
#define _pcp_protect(operation, pcp, val) \
({ \
typeof(pcp) __retval; \
preempt_disable(); \
__retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), \
(val), sizeof(pcp)); \
preempt_enable(); \
__retval; \
})
#define _percpu_add(pcp, val) \
_pcp_protect(__percpu_add, pcp, val)
#define _percpu_add_return(pcp, val) _percpu_add(pcp, val)
#define _percpu_and(pcp, val) \
__percpu_and(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
_pcp_protect(__percpu_and, pcp, val)
#define _percpu_or(pcp, val) \
__percpu_or(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
#define _percpu_read(pcp) (typeof(pcp)) \
(__percpu_read(raw_cpu_ptr(&(pcp)), sizeof(pcp)))
#define _percpu_write(pcp, val) \
__percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp))
_pcp_protect(__percpu_or, pcp, val)
#define _percpu_xchg(pcp, val) (typeof(pcp)) \
(__percpu_xchg(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp)))
_pcp_protect(__percpu_xchg, pcp, (unsigned long)(val))
#define this_cpu_add_1(pcp, val) _percpu_add(pcp, val)
#define this_cpu_add_2(pcp, val) _percpu_add(pcp, val)

View file

@ -200,7 +200,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
void update_vsyscall(struct timekeeper *tk)
{
struct timespec xtime_coarse;
u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter");
u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
++vdso_data->tb_seq_count;
smp_wmb();
@ -213,11 +213,11 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
if (!use_syscall) {
vdso_data->cs_cycle_last = tk->tkr.cycle_last;
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
vdso_data->cs_mult = tk->tkr.mult;
vdso_data->cs_shift = tk->tkr.shift;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
vdso_data->cs_mult = tk->tkr_mono.mult;
vdso_data->cs_shift = tk->tkr_mono.shift;
}
smp_wmb();

View file

@ -2,6 +2,7 @@
#define _ASM_METAG_IO_H
#include <linux/types.h>
#include <asm/pgtable-bits.h>
#define IO_SPACE_LIMIT 0

View file

@ -0,0 +1,104 @@
/*
* Meta page table definitions.
*/
#ifndef _METAG_PGTABLE_BITS_H
#define _METAG_PGTABLE_BITS_H
#include <asm/metag_mem.h>
/*
* Definitions for MMU descriptors
*
* These are the hardware bits in the MMCU pte entries.
* Derived from the Meta toolkit headers.
*/
#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT
#define _PAGE_WRITE MMCU_ENTRY_WR_BIT
#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT
/* Write combine bit - this can cause writes to occur out of order */
#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT
/* Sys coherent bit - this bit is never used by Linux */
#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT
#define _PAGE_ALWAYS_ZERO_1 0x020
#define _PAGE_CACHE_CTRL0 0x040
#define _PAGE_CACHE_CTRL1 0x080
#define _PAGE_ALWAYS_ZERO_2 0x100
#define _PAGE_ALWAYS_ZERO_3 0x200
#define _PAGE_ALWAYS_ZERO_4 0x400
#define _PAGE_ALWAYS_ZERO_5 0x800
/* These are software bits that we stuff into the gaps in the hardware
* pte entries that are not used. Note, these DO get stored in the actual
* hardware, but the hardware just does not use them.
*/
#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1
#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2
/* Pages owned, and protected by, the kernel. */
#define _PAGE_KERNEL _PAGE_PRIV
/* No cacheing of this page */
#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
/* burst cacheing - good for data streaming */
#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
/* One cache way per thread */
#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
/* Full on cacheing */
#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
/* which bits are used for cache control ... */
#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
_PAGE_WR_COMBINE)
/* This is a mask of the bits that pte_modify is allowed to change. */
#define _PAGE_CHG_MASK (PAGE_MASK)
#define _PAGE_SZ_SHIFT 1
#define _PAGE_SZ_4K (0x0)
#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT)
#if defined(CONFIG_PAGE_SIZE_4K)
#define _PAGE_SZ (_PAGE_SZ_4K)
#elif defined(CONFIG_PAGE_SIZE_8K)
#define _PAGE_SZ (_PAGE_SZ_8K)
#elif defined(CONFIG_PAGE_SIZE_16K)
#define _PAGE_SZ (_PAGE_SZ_16K)
#endif
#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT)
#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
# define _PAGE_SZHUGE (_PAGE_SZ_8K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
# define _PAGE_SZHUGE (_PAGE_SZ_16K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
# define _PAGE_SZHUGE (_PAGE_SZ_32K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
# define _PAGE_SZHUGE (_PAGE_SZ_64K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
# define _PAGE_SZHUGE (_PAGE_SZ_128K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
# define _PAGE_SZHUGE (_PAGE_SZ_256K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
# define _PAGE_SZHUGE (_PAGE_SZ_512K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
# define _PAGE_SZHUGE (_PAGE_SZ_1M)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
# define _PAGE_SZHUGE (_PAGE_SZ_2M)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
# define _PAGE_SZHUGE (_PAGE_SZ_4M)
#endif
#endif /* _METAG_PGTABLE_BITS_H */

View file

@ -5,6 +5,7 @@
#ifndef _METAG_PGTABLE_H
#define _METAG_PGTABLE_H
#include <asm/pgtable-bits.h>
#include <asm-generic/pgtable-nopmd.h>
/* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
@ -20,100 +21,6 @@
#define VMALLOC_END 0x7FFFFFFF
#endif
/*
* Definitions for MMU descriptors
*
* These are the hardware bits in the MMCU pte entries.
* Derived from the Meta toolkit headers.
*/
#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT
#define _PAGE_WRITE MMCU_ENTRY_WR_BIT
#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT
/* Write combine bit - this can cause writes to occur out of order */
#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT
/* Sys coherent bit - this bit is never used by Linux */
#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT
#define _PAGE_ALWAYS_ZERO_1 0x020
#define _PAGE_CACHE_CTRL0 0x040
#define _PAGE_CACHE_CTRL1 0x080
#define _PAGE_ALWAYS_ZERO_2 0x100
#define _PAGE_ALWAYS_ZERO_3 0x200
#define _PAGE_ALWAYS_ZERO_4 0x400
#define _PAGE_ALWAYS_ZERO_5 0x800
/* These are software bits that we stuff into the gaps in the hardware
* pte entries that are not used. Note, these DO get stored in the actual
* hardware, but the hardware just does not use them.
*/
#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1
#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2
/* Pages owned, and protected by, the kernel. */
#define _PAGE_KERNEL _PAGE_PRIV
/* No cacheing of this page */
#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
/* burst cacheing - good for data streaming */
#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
/* One cache way per thread */
#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
/* Full on cacheing */
#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
/* which bits are used for cache control ... */
#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
_PAGE_WR_COMBINE)
/* This is a mask of the bits that pte_modify is allowed to change. */
#define _PAGE_CHG_MASK (PAGE_MASK)
#define _PAGE_SZ_SHIFT 1
#define _PAGE_SZ_4K (0x0)
#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT)
#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT)
#if defined(CONFIG_PAGE_SIZE_4K)
#define _PAGE_SZ (_PAGE_SZ_4K)
#elif defined(CONFIG_PAGE_SIZE_8K)
#define _PAGE_SZ (_PAGE_SZ_8K)
#elif defined(CONFIG_PAGE_SIZE_16K)
#define _PAGE_SZ (_PAGE_SZ_16K)
#endif
#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT)
#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
# define _PAGE_SZHUGE (_PAGE_SZ_8K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
# define _PAGE_SZHUGE (_PAGE_SZ_16K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
# define _PAGE_SZHUGE (_PAGE_SZ_32K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
# define _PAGE_SZHUGE (_PAGE_SZ_64K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
# define _PAGE_SZHUGE (_PAGE_SZ_128K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
# define _PAGE_SZHUGE (_PAGE_SZ_256K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
# define _PAGE_SZHUGE (_PAGE_SZ_512K)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
# define _PAGE_SZHUGE (_PAGE_SZ_1M)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
# define _PAGE_SZHUGE (_PAGE_SZ_2M)
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
# define _PAGE_SZHUGE (_PAGE_SZ_4M)
#endif
/*
* The Linux memory management assumes a three-level page table setup. On
* Meta, we use that, but "fold" the mid level into the top-level page

View file

@ -153,6 +153,7 @@
#define PPC_INST_MFSPR_PVR_MASK 0xfc1fffff
#define PPC_INST_MFTMR 0x7c0002dc
#define PPC_INST_MSGSND 0x7c00019c
#define PPC_INST_MSGCLR 0x7c0001dc
#define PPC_INST_MSGSNDP 0x7c00011c
#define PPC_INST_MTTMR 0x7c0003dc
#define PPC_INST_NOP 0x60000000
@ -309,6 +310,8 @@
___PPC_RB(b) | __PPC_EH(eh))
#define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \
___PPC_RB(b))
#define PPC_MSGCLR(b) stringify_in_c(.long PPC_INST_MSGCLR | \
___PPC_RB(b))
#define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \
___PPC_RB(b))
#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \

View file

@ -608,13 +608,16 @@
#define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */
#define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */
#define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */
#define SRR1_WAKEMASK_P8 0x003c0000 /* reason for wakeup on POWER8 */
#define SRR1_WAKESYSERR 0x00300000 /* System error */
#define SRR1_WAKEEE 0x00200000 /* External interrupt */
#define SRR1_WAKEMT 0x00280000 /* mtctrl */
#define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */
#define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */
#define SRR1_WAKEDBELL 0x00140000 /* Privileged doorbell on P8 */
#define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */
#define SRR1_WAKERESET 0x00100000 /* System reset */
#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */
#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */
#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained,
* may not be recoverable */

View file

@ -437,6 +437,26 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
{ /* Power8NVL */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004c0000,
.cpu_name = "POWER8NVL (raw)",
.cpu_features = CPU_FTRS_POWER8,
.cpu_user_features = COMMON_USER_POWER8,
.cpu_user_features2 = COMMON_USER2_POWER8,
.mmu_features = MMU_FTRS_POWER8,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power8",
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
.flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
{ /* Power8 DD1: Does not support doorbell IPIs */
.pvr_mask = 0xffffff00,
.pvr_value = 0x004d0100,

View file

@ -17,6 +17,7 @@
#include <asm/dbell.h>
#include <asm/irq_regs.h>
#include <asm/kvm_ppc.h>
#ifdef CONFIG_SMP
void doorbell_setup_this_cpu(void)
@ -41,6 +42,7 @@ void doorbell_exception(struct pt_regs *regs)
may_hard_irq_enable();
kvmppc_set_host_ipi(smp_processor_id(), 0);
__this_cpu_inc(irq_stat.doorbell_irqs);
smp_ipi_demux();

View file

@ -1408,7 +1408,7 @@ machine_check_handle_early:
bne 9f /* continue in V mode if we are. */
5:
#ifdef CONFIG_KVM_BOOK3S_64_HV
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
* We are coming from kernel context. Check if we are coming from
* guest. if yes, then we can continue. We will fall through

View file

@ -33,6 +33,8 @@
#include <asm/runlatch.h>
#include <asm/code-patching.h>
#include <asm/dbell.h>
#include <asm/kvm_ppc.h>
#include <asm/ppc-opcode.h>
#include "powernv.h"
@ -149,7 +151,7 @@ static int pnv_smp_cpu_disable(void)
static void pnv_smp_cpu_kill_self(void)
{
unsigned int cpu;
unsigned long srr1;
unsigned long srr1, wmask;
u32 idle_states;
/* Standard hot unplug procedure */
@ -161,6 +163,10 @@ static void pnv_smp_cpu_kill_self(void)
generic_set_cpu_dead(cpu);
smp_wmb();
wmask = SRR1_WAKEMASK;
if (cpu_has_feature(CPU_FTR_ARCH_207S))
wmask = SRR1_WAKEMASK_P8;
idle_states = pnv_get_supported_cpuidle_states();
/* We don't want to take decrementer interrupts while we are offline,
* so clear LPCR:PECE1. We keep PECE2 enabled.
@ -191,10 +197,14 @@ static void pnv_smp_cpu_kill_self(void)
* having finished executing in a KVM guest, then srr1
* contains 0.
*/
if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) {
if ((srr1 & wmask) == SRR1_WAKEEE) {
icp_native_flush_interrupt();
local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
smp_mb();
} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
kvmppc_set_host_ipi(cpu, 0);
}
if (cpu_core_split_required())

View file

@ -25,10 +25,10 @@
static struct kobject *mobility_kobj;
struct update_props_workarea {
u32 phandle;
u32 state;
u64 reserved;
u32 nprops;
__be32 phandle;
__be32 state;
__be64 reserved;
__be32 nprops;
} __packed;
#define NODE_ACTION_MASK 0xff000000
@ -54,11 +54,11 @@ static int mobility_rtas_call(int token, char *buf, s32 scope)
return rc;
}
static int delete_dt_node(u32 phandle)
static int delete_dt_node(__be32 phandle)
{
struct device_node *dn;
dn = of_find_node_by_phandle(phandle);
dn = of_find_node_by_phandle(be32_to_cpu(phandle));
if (!dn)
return -ENOENT;
@ -127,7 +127,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop,
return 0;
}
static int update_dt_node(u32 phandle, s32 scope)
static int update_dt_node(__be32 phandle, s32 scope)
{
struct update_props_workarea *upwa;
struct device_node *dn;
@ -136,6 +136,7 @@ static int update_dt_node(u32 phandle, s32 scope)
char *prop_data;
char *rtas_buf;
int update_properties_token;
u32 nprops;
u32 vd;
update_properties_token = rtas_token("ibm,update-properties");
@ -146,7 +147,7 @@ static int update_dt_node(u32 phandle, s32 scope)
if (!rtas_buf)
return -ENOMEM;
dn = of_find_node_by_phandle(phandle);
dn = of_find_node_by_phandle(be32_to_cpu(phandle));
if (!dn) {
kfree(rtas_buf);
return -ENOENT;
@ -162,6 +163,7 @@ static int update_dt_node(u32 phandle, s32 scope)
break;
prop_data = rtas_buf + sizeof(*upwa);
nprops = be32_to_cpu(upwa->nprops);
/* On the first call to ibm,update-properties for a node the
* the first property value descriptor contains an empty
@ -170,17 +172,17 @@ static int update_dt_node(u32 phandle, s32 scope)
*/
if (*prop_data == 0) {
prop_data++;
vd = *(u32 *)prop_data;
vd = be32_to_cpu(*(__be32 *)prop_data);
prop_data += vd + sizeof(vd);
upwa->nprops--;
nprops--;
}
for (i = 0; i < upwa->nprops; i++) {
for (i = 0; i < nprops; i++) {
char *prop_name;
prop_name = prop_data;
prop_data += strlen(prop_name) + 1;
vd = *(u32 *)prop_data;
vd = be32_to_cpu(*(__be32 *)prop_data);
prop_data += sizeof(vd);
switch (vd) {
@ -212,13 +214,13 @@ static int update_dt_node(u32 phandle, s32 scope)
return 0;
}
static int add_dt_node(u32 parent_phandle, u32 drc_index)
static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
{
struct device_node *dn;
struct device_node *parent_dn;
int rc;
parent_dn = of_find_node_by_phandle(parent_phandle);
parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle));
if (!parent_dn)
return -ENOENT;
@ -237,7 +239,7 @@ static int add_dt_node(u32 parent_phandle, u32 drc_index)
int pseries_devicetree_update(s32 scope)
{
char *rtas_buf;
u32 *data;
__be32 *data;
int update_nodes_token;
int rc;
@ -254,17 +256,17 @@ int pseries_devicetree_update(s32 scope)
if (rc && rc != 1)
break;
data = (u32 *)rtas_buf + 4;
while (*data & NODE_ACTION_MASK) {
data = (__be32 *)rtas_buf + 4;
while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
int i;
u32 action = *data & NODE_ACTION_MASK;
int node_count = *data & NODE_COUNT_MASK;
u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK;
u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
data++;
for (i = 0; i < node_count; i++) {
u32 phandle = *data++;
u32 drc_index;
__be32 phandle = *data++;
__be32 drc_index;
switch (action) {
case DELETE_DT_NODE:

View file

@ -211,7 +211,7 @@ do { \
extern unsigned long mmap_rnd_mask;
#define STACK_RND_MASK (mmap_rnd_mask)
#define STACK_RND_MASK (test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask)
#define ARCH_DLINFO \
do { \

View file

@ -57,6 +57,44 @@
unsigned long ftrace_plt;
static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)
{
#ifdef CC_USING_HOTPATCH
/* brcl 0,0 */
insn->opc = 0xc004;
insn->disp = 0;
#else
/* stg r14,8(r15) */
insn->opc = 0xe3e0;
insn->disp = 0xf0080024;
#endif
}
static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn)
{
#ifdef CONFIG_KPROBES
if (insn->opc == BREAKPOINT_INSTRUCTION)
return 1;
#endif
return 0;
}
static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn)
{
#ifdef CONFIG_KPROBES
insn->opc = BREAKPOINT_INSTRUCTION;
insn->disp = KPROBE_ON_FTRACE_NOP;
#endif
}
static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn)
{
#ifdef CONFIG_KPROBES
insn->opc = BREAKPOINT_INSTRUCTION;
insn->disp = KPROBE_ON_FTRACE_CALL;
#endif
}
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
@ -72,16 +110,9 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
return -EFAULT;
if (addr == MCOUNT_ADDR) {
/* Initial code replacement */
#ifdef CC_USING_HOTPATCH
/* We expect to see brcl 0,0 */
ftrace_generate_nop_insn(&orig);
#else
/* We expect to see stg r14,8(r15) */
orig.opc = 0xe3e0;
orig.disp = 0xf0080024;
#endif
ftrace_generate_orig_insn(&orig);
ftrace_generate_nop_insn(&new);
} else if (old.opc == BREAKPOINT_INSTRUCTION) {
} else if (is_kprobe_on_ftrace(&old)) {
/*
* If we find a breakpoint instruction, a kprobe has been
* placed at the beginning of the function. We write the
@ -89,9 +120,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
* bytes of the original instruction so that the kprobes
* handler can execute a nop, if it reaches this breakpoint.
*/
new.opc = orig.opc = BREAKPOINT_INSTRUCTION;
orig.disp = KPROBE_ON_FTRACE_CALL;
new.disp = KPROBE_ON_FTRACE_NOP;
ftrace_generate_kprobe_call_insn(&orig);
ftrace_generate_kprobe_nop_insn(&new);
} else {
/* Replace ftrace call with a nop. */
ftrace_generate_call_insn(&orig, rec->ip);
@ -111,7 +141,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
return -EFAULT;
if (old.opc == BREAKPOINT_INSTRUCTION) {
if (is_kprobe_on_ftrace(&old)) {
/*
* If we find a breakpoint instruction, a kprobe has been
* placed at the beginning of the function. We write the
@ -119,9 +149,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* bytes of the original instruction so that the kprobes
* handler can execute a brasl if it reaches this breakpoint.
*/
new.opc = orig.opc = BREAKPOINT_INSTRUCTION;
orig.disp = KPROBE_ON_FTRACE_NOP;
new.disp = KPROBE_ON_FTRACE_CALL;
ftrace_generate_kprobe_nop_insn(&orig);
ftrace_generate_kprobe_call_insn(&new);
} else {
/* Replace nop with an ftrace call. */
ftrace_generate_nop_insn(&orig);

View file

@ -1415,7 +1415,7 @@ CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
static struct attribute *cpumsf_pmu_events_attr[] = {
CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG),
NULL,
NULL,
};
@ -1606,8 +1606,11 @@ static int __init init_cpum_sampling_pmu(void)
return -EINVAL;
}
if (si.ad)
if (si.ad) {
sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
cpumsf_pmu_events_attr[1] =
CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
}
sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
if (!sfdbg)

View file

@ -177,6 +177,17 @@ restart_entry:
lhi %r1,1
sigp %r1,%r0,SIGP_SET_ARCHITECTURE
sam64
#ifdef CONFIG_SMP
larl %r1,smp_cpu_mt_shift
icm %r1,15,0(%r1)
jz smt_done
llgfr %r1,%r1
smt_loop:
sigp %r1,%r0,SIGP_SET_MULTI_THREADING
brc 8,smt_done /* accepted */
brc 2,smt_loop /* busy, try again */
smt_done:
#endif
larl %r1,.Lnew_pgm_check_psw
lpswe 0(%r1)
pgm_check_entry:

View file

@ -215,20 +215,20 @@ void update_vsyscall(struct timekeeper *tk)
{
u64 nsecps;
if (tk->tkr.clock != &clocksource_tod)
if (tk->tkr_mono.clock != &clocksource_tod)
return;
/* Make userspace gettimeofday spin until we're done. */
++vdso_data->tb_update_count;
smp_wmb();
vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
vdso_data->wtom_clock_sec =
tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
while (vdso_data->wtom_clock_nsec >= nsecps) {
vdso_data->wtom_clock_nsec -= nsecps;
vdso_data->wtom_clock_sec++;
@ -236,7 +236,7 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->xtime_coarse_sec = tk->xtime_sec;
vdso_data->xtime_coarse_nsec =
(long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
(long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
vdso_data->wtom_coarse_sec =
vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
vdso_data->wtom_coarse_nsec =
@ -246,8 +246,8 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtom_coarse_sec++;
}
vdso_data->tk_mult = tk->tkr.mult;
vdso_data->tk_shift = tk->tkr.shift;
vdso_data->tk_mult = tk->tkr_mono.mult;
vdso_data->tk_shift = tk->tkr_mono.shift;
smp_wmb();
++vdso_data->tb_update_count;
}
@ -283,7 +283,7 @@ void __init time_init(void)
if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
panic("Couldn't request external interrupt 0x1406");
if (clocksource_register(&clocksource_tod) != 0)
if (__clocksource_register(&clocksource_tod) != 0)
panic("Could not register TOD clock source");
/* Enable TOD clock interrupts on the boot cpu. */

View file

@ -2957,6 +2957,17 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
unsigned long reg_val);
#endif
#define HV_FAST_M7_GET_PERFREG 0x43
#define HV_FAST_M7_SET_PERFREG 0x44
#ifndef __ASSEMBLY__
unsigned long sun4v_m7_get_perfreg(unsigned long reg_num,
unsigned long *reg_val);
unsigned long sun4v_m7_set_perfreg(unsigned long reg_num,
unsigned long reg_val);
#endif
/* Function numbers for HV_CORE_TRAP. */
#define HV_CORE_SET_VER 0x00
#define HV_CORE_PUTCHAR 0x01
@ -2981,6 +2992,7 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
#define HV_GRP_SDIO 0x0108
#define HV_GRP_SDIO_ERR 0x0109
#define HV_GRP_REBOOT_DATA 0x0110
#define HV_GRP_M7_PERF 0x0114
#define HV_GRP_NIAG_PERF 0x0200
#define HV_GRP_FIRE_PERF 0x0201
#define HV_GRP_N2_CPU 0x0202

View file

@ -48,6 +48,7 @@ static struct api_info api_table[] = {
{ .group = HV_GRP_VT_CPU, },
{ .group = HV_GRP_T5_CPU, },
{ .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
{ .group = HV_GRP_M7_PERF, },
};
static DEFINE_SPINLOCK(hvapi_lock);

View file

@ -837,3 +837,19 @@ ENTRY(sun4v_t5_set_perfreg)
retl
nop
ENDPROC(sun4v_t5_set_perfreg)
ENTRY(sun4v_m7_get_perfreg)
mov %o1, %o4
mov HV_FAST_M7_GET_PERFREG, %o5
ta HV_FAST_TRAP
stx %o1, [%o4]
retl
nop
ENDPROC(sun4v_m7_get_perfreg)
ENTRY(sun4v_m7_set_perfreg)
mov HV_FAST_M7_SET_PERFREG, %o5
ta HV_FAST_TRAP
retl
nop
ENDPROC(sun4v_m7_set_perfreg)

View file

@ -217,6 +217,31 @@ static const struct pcr_ops n5_pcr_ops = {
.pcr_nmi_disable = PCR_N4_PICNPT,
};
static u64 m7_pcr_read(unsigned long reg_num)
{
unsigned long val;
(void) sun4v_m7_get_perfreg(reg_num, &val);
return val;
}
static void m7_pcr_write(unsigned long reg_num, u64 val)
{
(void) sun4v_m7_set_perfreg(reg_num, val);
}
static const struct pcr_ops m7_pcr_ops = {
.read_pcr = m7_pcr_read,
.write_pcr = m7_pcr_write,
.read_pic = n4_pic_read,
.write_pic = n4_pic_write,
.nmi_picl_value = n4_picl_value,
.pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE |
PCR_N4_UTRACE | PCR_N4_TOE |
(26 << PCR_N4_SL_SHIFT)),
.pcr_nmi_disable = PCR_N4_PICNPT,
};
static unsigned long perf_hsvc_group;
static unsigned long perf_hsvc_major;
@ -248,6 +273,10 @@ static int __init register_perf_hsvc(void)
perf_hsvc_group = HV_GRP_T5_CPU;
break;
case SUN4V_CHIP_SPARC_M7:
perf_hsvc_group = HV_GRP_M7_PERF;
break;
default:
return -ENODEV;
}
@ -293,6 +322,10 @@ static int __init setup_sun4v_pcr_ops(void)
pcr_ops = &n5_pcr_ops;
break;
case SUN4V_CHIP_SPARC_M7:
pcr_ops = &m7_pcr_ops;
break;
default:
ret = -ENODEV;
break;

View file

@ -792,6 +792,42 @@ static const struct sparc_pmu niagara4_pmu = {
.num_pic_regs = 4,
};
static void sparc_m7_write_pmc(int idx, u64 val)
{
u64 pcr;
pcr = pcr_ops->read_pcr(idx);
/* ensure ov and ntc are reset */
pcr &= ~(PCR_N4_OV | PCR_N4_NTC);
pcr_ops->write_pic(idx, val & 0xffffffff);
pcr_ops->write_pcr(idx, pcr);
}
static const struct sparc_pmu sparc_m7_pmu = {
.event_map = niagara4_event_map,
.cache_map = &niagara4_cache_map,
.max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
.read_pmc = sparc_vt_read_pmc,
.write_pmc = sparc_m7_write_pmc,
.upper_shift = 5,
.lower_shift = 5,
.event_mask = 0x7ff,
.user_bit = PCR_N4_UTRACE,
.priv_bit = PCR_N4_STRACE,
/* We explicitly don't support hypervisor tracing. */
.hv_bit = 0,
.irq_bit = PCR_N4_TOE,
.upper_nop = 0,
.lower_nop = 0,
.flags = 0,
.max_hw_events = 4,
.num_pcrs = 4,
.num_pic_regs = 4,
};
static const struct sparc_pmu *sparc_pmu __read_mostly;
static u64 event_encoding(u64 event_id, int idx)
@ -960,6 +996,8 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc)
cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
}
static void sparc_pmu_start(struct perf_event *event, int flags);
/* On this PMU each PIC has it's own PCR control register. */
static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
{
@ -972,20 +1010,13 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
struct perf_event *cp = cpuc->event[i];
struct hw_perf_event *hwc = &cp->hw;
int idx = hwc->idx;
u64 enc;
if (cpuc->current_idx[i] != PIC_NO_INDEX)
continue;
sparc_perf_event_set_period(cp, hwc, idx);
cpuc->current_idx[i] = idx;
enc = perf_event_get_enc(cpuc->events[i]);
cpuc->pcr[idx] &= ~mask_for_index(idx);
if (hwc->state & PERF_HES_STOPPED)
cpuc->pcr[idx] |= nop_for_index(idx);
else
cpuc->pcr[idx] |= event_encoding(enc, idx);
sparc_pmu_start(cp, PERF_EF_RELOAD);
}
out:
for (i = 0; i < cpuc->n_events; i++) {
@ -1101,7 +1132,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags)
int i;
local_irq_save(flags);
perf_pmu_disable(event->pmu);
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event[i]) {
@ -1127,7 +1157,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags)
}
}
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
@ -1361,7 +1390,6 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
unsigned long flags;
local_irq_save(flags);
perf_pmu_disable(event->pmu);
n0 = cpuc->n_events;
if (n0 >= sparc_pmu->max_hw_events)
@ -1394,7 +1422,6 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
ret = 0;
out:
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
return ret;
}
@ -1667,6 +1694,10 @@ static bool __init supported_pmu(void)
sparc_pmu = &niagara4_pmu;
return true;
}
if (!strcmp(sparc_pmu_type, "sparc-m7")) {
sparc_pmu = &sparc_m7_pmu;
return true;
}
return false;
}

View file

@ -287,6 +287,8 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
printk(" TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n",
gp->tpc, gp->o7, gp->i7, gp->rpc);
}
touch_nmi_watchdog();
}
memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
@ -362,6 +364,8 @@ static void pmu_snapshot_all_cpus(void)
(cpu == this_cpu ? '*' : ' '), cpu,
pp->pcr[0], pp->pcr[1], pp->pcr[2], pp->pcr[3],
pp->pic[0], pp->pic[1], pp->pic[2], pp->pic[3]);
touch_nmi_watchdog();
}
memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));

View file

@ -181,17 +181,13 @@ static struct clocksource timer_cs = {
.rating = 100,
.read = timer_cs_read,
.mask = CLOCKSOURCE_MASK(64),
.shift = 2,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static __init int setup_timer_cs(void)
{
timer_cs_enabled = 1;
timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate,
timer_cs.shift);
return clocksource_register(&timer_cs);
return clocksource_register_hz(&timer_cs, sparc_config.clock_rate);
}
#ifdef CONFIG_SMP

View file

@ -8,9 +8,11 @@
.text
ENTRY(memmove) /* o0=dst o1=src o2=len */
mov %o0, %g1
brz,pn %o2, 99f
mov %o0, %g1
cmp %o0, %o1
bleu,pt %xcc, memcpy
bleu,pt %xcc, 2f
add %o1, %o2, %g7
cmp %g7, %o0
bleu,pt %xcc, memcpy
@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */
stb %g7, [%o0]
bne,pt %icc, 1b
sub %o0, 1, %o0
99:
retl
mov %g1, %o0
/* We can't just call memcpy for these memmove cases. On some
* chips the memcpy uses cache initializing stores and when dst
* and src are close enough, those can clobber the source data
* before we've loaded it in.
*/
2: or %o0, %o1, %g7
or %o2, %g7, %g7
andcc %g7, 0x7, %g0
bne,pn %xcc, 4f
nop
3: ldx [%o1], %g7
add %o1, 8, %o1
subcc %o2, 8, %o2
add %o0, 8, %o0
bne,pt %icc, 3b
stx %g7, [%o0 - 0x8]
ba,a,pt %xcc, 99b
4: ldub [%o1], %g7
add %o1, 1, %o1
subcc %o2, 1, %o2
add %o0, 1, %o0
bne,pt %icc, 4b
stb %g7, [%o0 - 0x1]
ba,a,pt %xcc, 99b
ENDPROC(memmove)

View file

@ -257,34 +257,34 @@ void update_vsyscall_tz(void)
void update_vsyscall(struct timekeeper *tk)
{
if (tk->tkr.clock != &cycle_counter_cs)
if (tk->tkr_mono.clock != &cycle_counter_cs)
return;
write_seqcount_begin(&vdso_data->tb_seq);
vdso_data->cycle_last = tk->tkr.cycle_last;
vdso_data->mask = tk->tkr.mask;
vdso_data->mult = tk->tkr.mult;
vdso_data->shift = tk->tkr.shift;
vdso_data->cycle_last = tk->tkr_mono.cycle_last;
vdso_data->mask = tk->tkr_mono.mask;
vdso_data->mult = tk->tkr_mono.mult;
vdso_data->shift = tk->tkr_mono.shift;
vdso_data->wall_time_sec = tk->xtime_sec;
vdso_data->wall_time_snsec = tk->tkr.xtime_nsec;
vdso_data->wall_time_snsec = tk->tkr_mono.xtime_nsec;
vdso_data->monotonic_time_sec = tk->xtime_sec
+ tk->wall_to_monotonic.tv_sec;
vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec
vdso_data->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
+ ((u64)tk->wall_to_monotonic.tv_nsec
<< tk->tkr.shift);
<< tk->tkr_mono.shift);
while (vdso_data->monotonic_time_snsec >=
(((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
vdso_data->monotonic_time_snsec -=
((u64)NSEC_PER_SEC) << tk->tkr.shift;
((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
vdso_data->monotonic_time_sec++;
}
vdso_data->wall_time_coarse_sec = tk->xtime_sec;
vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
tk->tkr.shift);
vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
tk->tkr_mono.shift);
vdso_data->monotonic_time_coarse_sec =
vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;

View file

@ -1978,13 +1978,23 @@ void arch_perf_update_userpage(struct perf_event *event,
data = cyc2ns_read_begin();
/*
* Internal timekeeping for enabled/running/stopped times
* is always in the local_clock domain.
*/
userpg->cap_user_time = 1;
userpg->time_mult = data->cyc2ns_mul;
userpg->time_shift = data->cyc2ns_shift;
userpg->time_offset = data->cyc2ns_offset - now;
userpg->cap_user_time_zero = 1;
userpg->time_zero = data->cyc2ns_offset;
/*
* cap_user_time_zero doesn't make sense when we're using a different
* time base for the records.
*/
if (event->clock == &local_clock) {
userpg->cap_user_time_zero = 1;
userpg->time_zero = data->cyc2ns_offset;
}
cyc2ns_read_end(data);
}

View file

@ -31,30 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
gtod_write_begin(vdata);
/* copy vsyscall data */
vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode;
vdata->cycle_last = tk->tkr.cycle_last;
vdata->mask = tk->tkr.mask;
vdata->mult = tk->tkr.mult;
vdata->shift = tk->tkr.shift;
vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
vdata->cycle_last = tk->tkr_mono.cycle_last;
vdata->mask = tk->tkr_mono.mask;
vdata->mult = tk->tkr_mono.mult;
vdata->shift = tk->tkr_mono.shift;
vdata->wall_time_sec = tk->xtime_sec;
vdata->wall_time_snsec = tk->tkr.xtime_nsec;
vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;
vdata->monotonic_time_sec = tk->xtime_sec
+ tk->wall_to_monotonic.tv_sec;
vdata->monotonic_time_snsec = tk->tkr.xtime_nsec
vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
+ ((u64)tk->wall_to_monotonic.tv_nsec
<< tk->tkr.shift);
<< tk->tkr_mono.shift);
while (vdata->monotonic_time_snsec >=
(((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
vdata->monotonic_time_snsec -=
((u64)NSEC_PER_SEC) << tk->tkr.shift;
((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
vdata->monotonic_time_sec++;
}
vdata->wall_time_coarse_sec = tk->xtime_sec;
vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
tk->tkr.shift);
vdata->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
tk->tkr_mono.shift);
vdata->monotonic_time_coarse_sec =
vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;

View file

@ -422,6 +422,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
struct kvm_ioapic *ioapic, int vector, int trigger_mode)
{
int i;
struct kvm_lapic *apic = vcpu->arch.apic;
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
@ -443,7 +444,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
spin_lock(&ioapic->lock);
if (trigger_mode != IOAPIC_LEVEL_TRIG)
if (trigger_mode != IOAPIC_LEVEL_TRIG ||
kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
continue;
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);

View file

@ -833,8 +833,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
{
if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
int trigger_mode;
if (apic_test_vector(vector, apic->regs + APIC_TMR))
trigger_mode = IOAPIC_LEVEL_TRIG;

View file

@ -2479,8 +2479,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
if (enable_ept) {
/* nested EPT: emulate EPT also to L1 */
vmx->nested.nested_vmx_secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_EPT |
SECONDARY_EXEC_UNRESTRICTED_GUEST;
SECONDARY_EXEC_ENABLE_EPT;
vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
VMX_EPT_INVEPT_BIT;
@ -2494,6 +2493,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
} else
vmx->nested.nested_vmx_ept_caps = 0;
if (enable_unrestricted_guest)
vmx->nested.nested_vmx_secondary_ctls_high |=
SECONDARY_EXEC_UNRESTRICTED_GUEST;
/* miscellaneous data */
rdmsr(MSR_IA32_VMX_MISC,
vmx->nested.nested_vmx_misc_low,

View file

@ -1070,19 +1070,19 @@ static void update_pvclock_gtod(struct timekeeper *tk)
struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
u64 boot_ns;
boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
write_seqcount_begin(&vdata->seq);
/* copy pvclock gtod data */
vdata->clock.vclock_mode = tk->tkr.clock->archdata.vclock_mode;
vdata->clock.cycle_last = tk->tkr.cycle_last;
vdata->clock.mask = tk->tkr.mask;
vdata->clock.mult = tk->tkr.mult;
vdata->clock.shift = tk->tkr.shift;
vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
vdata->clock.mask = tk->tkr_mono.mask;
vdata->clock.mult = tk->tkr_mono.mult;
vdata->clock.shift = tk->tkr_mono.shift;
vdata->boot_ns = boot_ns;
vdata->nsec_base = tk->tkr.xtime_nsec;
vdata->nsec_base = tk->tkr_mono.xtime_nsec;
write_seqcount_end(&vdata->seq);
}

View file

@ -592,7 +592,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) {
struct bio_vec *bprev;
bprev = &rq->biotail->bi_io_vec[bio->bi_vcnt - 1];
bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1];
if (bvec_gap_to_prev(bprev, bio->bi_io_vec[0].bv_offset))
return false;
}

View file

@ -278,9 +278,11 @@ static int bt_get(struct blk_mq_alloc_data *data,
/*
* We're out of tags on this hardware queue, kick any
* pending IO submits before going to sleep waiting for
* some to complete.
* some to complete. Note that hctx can be NULL here for
* reserved tag allocation.
*/
blk_mq_run_hw_queue(hctx, false);
if (hctx)
blk_mq_run_hw_queue(hctx, false);
/*
* Retry tag allocation after running the hardware queue,

View file

@ -1938,7 +1938,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
*/
if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
goto err_map;
goto err_mq_usage;
setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
blk_queue_rq_timeout(q, 30000);
@ -1981,7 +1981,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
if (blk_mq_init_hw_queues(q, set))
goto err_hw;
goto err_mq_usage;
mutex_lock(&all_q_mutex);
list_add_tail(&q->all_q_node, &all_q_list);
@ -1993,7 +1993,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
return q;
err_hw:
err_mq_usage:
blk_cleanup_queue(q);
err_hctxs:
kfree(map);

View file

@ -4737,7 +4737,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
return NULL;
/* libsas case */
if (!ap->scsi_host) {
if (ap->flags & ATA_FLAG_SAS_HOST) {
tag = ata_sas_allocate_tag(ap);
if (tag < 0)
return NULL;
@ -4776,7 +4776,7 @@ void ata_qc_free(struct ata_queued_cmd *qc)
tag = qc->tag;
if (likely(ata_tag_valid(tag))) {
qc->tag = ATA_TAG_POISON;
if (!ap->scsi_host)
if (ap->flags & ATA_FLAG_SAS_HOST)
ata_sas_free_tag(tag, ap);
}
}

View file

@ -243,4 +243,12 @@ extern struct regcache_ops regcache_rbtree_ops;
extern struct regcache_ops regcache_lzo_ops;
extern struct regcache_ops regcache_flat_ops;
static inline const char *regmap_name(const struct regmap *map)
{
if (map->dev)
return dev_name(map->dev);
return map->name;
}
#endif

View file

@ -218,7 +218,7 @@ int regcache_read(struct regmap *map,
ret = map->cache_ops->read(map, reg, value);
if (ret == 0)
trace_regmap_reg_read_cache(map->dev, reg, *value);
trace_regmap_reg_read_cache(map, reg, *value);
return ret;
}
@ -311,7 +311,7 @@ int regcache_sync(struct regmap *map)
dev_dbg(map->dev, "Syncing %s cache\n",
map->cache_ops->name);
name = map->cache_ops->name;
trace_regcache_sync(map->dev, name, "start");
trace_regcache_sync(map, name, "start");
if (!map->cache_dirty)
goto out;
@ -346,7 +346,7 @@ int regcache_sync(struct regmap *map)
regmap_async_complete(map);
trace_regcache_sync(map->dev, name, "stop");
trace_regcache_sync(map, name, "stop");
return ret;
}
@ -381,7 +381,7 @@ int regcache_sync_region(struct regmap *map, unsigned int min,
name = map->cache_ops->name;
dev_dbg(map->dev, "Syncing %s cache from %d-%d\n", name, min, max);
trace_regcache_sync(map->dev, name, "start region");
trace_regcache_sync(map, name, "start region");
if (!map->cache_dirty)
goto out;
@ -401,7 +401,7 @@ int regcache_sync_region(struct regmap *map, unsigned int min,
regmap_async_complete(map);
trace_regcache_sync(map->dev, name, "stop region");
trace_regcache_sync(map, name, "stop region");
return ret;
}
@ -428,7 +428,7 @@ int regcache_drop_region(struct regmap *map, unsigned int min,
map->lock(map->lock_arg);
trace_regcache_drop_region(map->dev, min, max);
trace_regcache_drop_region(map, min, max);
ret = map->cache_ops->drop(map, min, max);
@ -455,7 +455,7 @@ void regcache_cache_only(struct regmap *map, bool enable)
map->lock(map->lock_arg);
WARN_ON(map->cache_bypass && enable);
map->cache_only = enable;
trace_regmap_cache_only(map->dev, enable);
trace_regmap_cache_only(map, enable);
map->unlock(map->lock_arg);
}
EXPORT_SYMBOL_GPL(regcache_cache_only);
@ -493,7 +493,7 @@ void regcache_cache_bypass(struct regmap *map, bool enable)
map->lock(map->lock_arg);
WARN_ON(map->cache_only && enable);
map->cache_bypass = enable;
trace_regmap_cache_bypass(map->dev, enable);
trace_regmap_cache_bypass(map, enable);
map->unlock(map->lock_arg);
}
EXPORT_SYMBOL_GPL(regcache_cache_bypass);

View file

@ -1281,7 +1281,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
if (map->async && map->bus->async_write) {
struct regmap_async *async;
trace_regmap_async_write_start(map->dev, reg, val_len);
trace_regmap_async_write_start(map, reg, val_len);
spin_lock_irqsave(&map->async_lock, flags);
async = list_first_entry_or_null(&map->async_free,
@ -1339,8 +1339,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
return ret;
}
trace_regmap_hw_write_start(map->dev, reg,
val_len / map->format.val_bytes);
trace_regmap_hw_write_start(map, reg, val_len / map->format.val_bytes);
/* If we're doing a single register write we can probably just
* send the work_buf directly, otherwise try to do a gather
@ -1372,8 +1371,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
kfree(buf);
}
trace_regmap_hw_write_done(map->dev, reg,
val_len / map->format.val_bytes);
trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes);
return ret;
}
@ -1407,12 +1405,12 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
map->format.format_write(map, reg, val);
trace_regmap_hw_write_start(map->dev, reg, 1);
trace_regmap_hw_write_start(map, reg, 1);
ret = map->bus->write(map->bus_context, map->work_buf,
map->format.buf_size);
trace_regmap_hw_write_done(map->dev, reg, 1);
trace_regmap_hw_write_done(map, reg, 1);
return ret;
}
@ -1470,7 +1468,7 @@ int _regmap_write(struct regmap *map, unsigned int reg,
dev_info(map->dev, "%x <= %x\n", reg, val);
#endif
trace_regmap_reg_write(map->dev, reg, val);
trace_regmap_reg_write(map, reg, val);
return map->reg_write(context, reg, val);
}
@ -1773,7 +1771,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
for (i = 0; i < num_regs; i++) {
int reg = regs[i].reg;
int val = regs[i].def;
trace_regmap_hw_write_start(map->dev, reg, 1);
trace_regmap_hw_write_start(map, reg, 1);
map->format.format_reg(u8, reg, map->reg_shift);
u8 += reg_bytes + pad_bytes;
map->format.format_val(u8, val, 0);
@ -1788,7 +1786,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
for (i = 0; i < num_regs; i++) {
int reg = regs[i].reg;
trace_regmap_hw_write_done(map->dev, reg, 1);
trace_regmap_hw_write_done(map, reg, 1);
}
return ret;
}
@ -2059,15 +2057,13 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
*/
u8[0] |= map->read_flag_mask;
trace_regmap_hw_read_start(map->dev, reg,
val_len / map->format.val_bytes);
trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes);
ret = map->bus->read(map->bus_context, map->work_buf,
map->format.reg_bytes + map->format.pad_bytes,
val, val_len);
trace_regmap_hw_read_done(map->dev, reg,
val_len / map->format.val_bytes);
trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes);
return ret;
}
@ -2123,7 +2119,7 @@ static int _regmap_read(struct regmap *map, unsigned int reg,
dev_info(map->dev, "%x => %x\n", reg, *val);
#endif
trace_regmap_reg_read(map->dev, reg, *val);
trace_regmap_reg_read(map, reg, *val);
if (!map->cache_bypass)
regcache_write(map, reg, *val);
@ -2480,7 +2476,7 @@ void regmap_async_complete_cb(struct regmap_async *async, int ret)
struct regmap *map = async->map;
bool wake;
trace_regmap_async_io_complete(map->dev);
trace_regmap_async_io_complete(map);
spin_lock(&map->async_lock);
list_move(&async->list, &map->async_free);
@ -2525,7 +2521,7 @@ int regmap_async_complete(struct regmap *map)
if (!map->bus || !map->bus->async_write)
return 0;
trace_regmap_async_complete_start(map->dev);
trace_regmap_async_complete_start(map);
wait_event(map->async_waitq, regmap_async_is_done(map));
@ -2534,7 +2530,7 @@ int regmap_async_complete(struct regmap *map)
map->async_ret = 0;
spin_unlock_irqrestore(&map->async_lock, flags);
trace_regmap_async_complete_done(map->dev);
trace_regmap_async_complete_done(map);
return ret;
}

View file

@ -803,10 +803,6 @@ static int __init nbd_init(void)
return -EINVAL;
}
nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
if (!nbd_dev)
return -ENOMEM;
part_shift = 0;
if (max_part > 0) {
part_shift = fls(max_part);
@ -828,6 +824,10 @@ static int __init nbd_init(void)
if (nbds_max > 1UL << (MINORBITS - part_shift))
return -EINVAL;
nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
if (!nbd_dev)
return -ENOMEM;
for (i = 0; i < nbds_max; i++) {
struct gendisk *disk = alloc_disk(1 << part_shift);
if (!disk)

View file

@ -3003,6 +3003,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
}
get_device(dev->device);
INIT_LIST_HEAD(&dev->node);
INIT_WORK(&dev->probe_work, nvme_async_probe);
schedule_work(&dev->probe_work);
return 0;

View file

@ -210,7 +210,7 @@ static int em_sti_clocksource_enable(struct clocksource *cs)
ret = em_sti_start(p, USER_CLOCKSOURCE);
if (!ret)
__clocksource_updatefreq_hz(cs, p->rate);
__clocksource_update_freq_hz(cs, p->rate);
return ret;
}

View file

@ -641,7 +641,7 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs)
ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE);
if (!ret) {
__clocksource_updatefreq_hz(cs, ch->rate);
__clocksource_update_freq_hz(cs, ch->rate);
ch->cs_enabled = true;
}
return ret;

View file

@ -272,7 +272,7 @@ static int sh_tmu_clocksource_enable(struct clocksource *cs)
ret = sh_tmu_enable(ch);
if (!ret) {
__clocksource_updatefreq_hz(cs, ch->rate);
__clocksource_update_freq_hz(cs, ch->rate);
ch->cs_enabled = true;
}

View file

@ -525,17 +525,6 @@ void drm_framebuffer_reference(struct drm_framebuffer *fb)
}
EXPORT_SYMBOL(drm_framebuffer_reference);
static void drm_framebuffer_free_bug(struct kref *kref)
{
BUG();
}
static void __drm_framebuffer_unreference(struct drm_framebuffer *fb)
{
DRM_DEBUG("%p: FB ID: %d (%d)\n", fb, fb->base.id, atomic_read(&fb->refcount.refcount));
kref_put(&fb->refcount, drm_framebuffer_free_bug);
}
/**
* drm_framebuffer_unregister_private - unregister a private fb from the lookup idr
* @fb: fb to unregister
@ -1320,7 +1309,7 @@ void drm_plane_force_disable(struct drm_plane *plane)
return;
}
/* disconnect the plane from the fb and crtc: */
__drm_framebuffer_unreference(plane->old_fb);
drm_framebuffer_unreference(plane->old_fb);
plane->old_fb = NULL;
plane->fb = NULL;
plane->crtc = NULL;

View file

@ -2737,24 +2737,11 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
WARN_ON(i915_verify_lists(ring->dev));
/* Move any buffers on the active list that are no longer referenced
* by the ringbuffer to the flushing/inactive lists as appropriate,
* before we free the context associated with the requests.
/* Retire requests first as we use it above for the early return.
* If we retire requests last, we may use a later seqno and so clear
* the requests lists without clearing the active list, leading to
* confusion.
*/
while (!list_empty(&ring->active_list)) {
struct drm_i915_gem_object *obj;
obj = list_first_entry(&ring->active_list,
struct drm_i915_gem_object,
ring_list);
if (!i915_gem_request_completed(obj->last_read_req, true))
break;
i915_gem_object_move_to_inactive(obj);
}
while (!list_empty(&ring->request_list)) {
struct drm_i915_gem_request *request;
struct intel_ringbuffer *ringbuf;
@ -2789,6 +2776,23 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
i915_gem_free_request(request);
}
/* Move any buffers on the active list that are no longer referenced
* by the ringbuffer to the flushing/inactive lists as appropriate,
* before we free the context associated with the requests.
*/
while (!list_empty(&ring->active_list)) {
struct drm_i915_gem_object *obj;
obj = list_first_entry(&ring->active_list,
struct drm_i915_gem_object,
ring_list);
if (!i915_gem_request_completed(obj->last_read_req, true))
break;
i915_gem_object_move_to_inactive(obj);
}
if (unlikely(ring->trace_irq_req &&
i915_gem_request_completed(ring->trace_irq_req, true))) {
ring->irq_put(ring);

View file

@ -2438,8 +2438,15 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc,
if (!intel_crtc->base.primary->fb)
return;
if (intel_alloc_plane_obj(intel_crtc, plane_config))
if (intel_alloc_plane_obj(intel_crtc, plane_config)) {
struct drm_plane *primary = intel_crtc->base.primary;
primary->state->crtc = &intel_crtc->base;
primary->crtc = &intel_crtc->base;
update_state_fb(primary);
return;
}
kfree(intel_crtc->base.primary->fb);
intel_crtc->base.primary->fb = NULL;
@ -2462,11 +2469,15 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc,
continue;
if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) {
struct drm_plane *primary = intel_crtc->base.primary;
if (obj->tiling_mode != I915_TILING_NONE)
dev_priv->preserve_bios_swizzle = true;
drm_framebuffer_reference(c->primary->fb);
intel_crtc->base.primary->fb = c->primary->fb;
primary->fb = c->primary->fb;
primary->state->crtc = &intel_crtc->base;
primary->crtc = &intel_crtc->base;
obj->frontbuffer_bits |= INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe);
break;
}
@ -6663,7 +6674,6 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc,
plane_config->size);
crtc->base.primary->fb = fb;
update_state_fb(crtc->base.primary);
}
static void chv_crtc_clock_get(struct intel_crtc *crtc,
@ -7704,7 +7714,6 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
plane_config->size);
crtc->base.primary->fb = fb;
update_state_fb(crtc->base.primary);
return;
error:
@ -7798,7 +7807,6 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc,
plane_config->size);
crtc->base.primary->fb = fb;
update_state_fb(crtc->base.primary);
}
static bool ironlake_get_pipe_config(struct intel_crtc *crtc,

View file

@ -433,7 +433,6 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
dm_get(md);
atomic_inc(&md->open_count);
out:
spin_unlock(&_minor_lock);
@ -442,16 +441,20 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
static void dm_blk_close(struct gendisk *disk, fmode_t mode)
{
struct mapped_device *md = disk->private_data;
struct mapped_device *md;
spin_lock(&_minor_lock);
md = disk->private_data;
if (WARN_ON(!md))
goto out;
if (atomic_dec_and_test(&md->open_count) &&
(test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
queue_work(deferred_remove_workqueue, &deferred_remove_work);
dm_put(md);
out:
spin_unlock(&_minor_lock);
}
@ -2241,7 +2244,6 @@ static void free_dev(struct mapped_device *md)
int minor = MINOR(disk_devt(md->disk));
unlock_fs(md);
bdput(md->bdev);
destroy_workqueue(md->wq);
if (md->kworker_task)
@ -2252,19 +2254,22 @@ static void free_dev(struct mapped_device *md)
mempool_destroy(md->rq_pool);
if (md->bs)
bioset_free(md->bs);
blk_integrity_unregister(md->disk);
del_gendisk(md->disk);
cleanup_srcu_struct(&md->io_barrier);
free_table_devices(&md->table_devices);
free_minor(minor);
dm_stats_cleanup(&md->stats);
spin_lock(&_minor_lock);
md->disk->private_data = NULL;
spin_unlock(&_minor_lock);
if (blk_get_integrity(md->disk))
blk_integrity_unregister(md->disk);
del_gendisk(md->disk);
put_disk(md->disk);
blk_cleanup_queue(md->queue);
dm_stats_cleanup(&md->stats);
bdput(md->bdev);
free_minor(minor);
module_put(THIS_MODULE);
kfree(md);
}
@ -2642,8 +2647,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
might_sleep();
spin_lock(&_minor_lock);
map = dm_get_live_table(md, &srcu_idx);
spin_lock(&_minor_lock);
idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
set_bit(DMF_FREEING, &md->flags);
spin_unlock(&_minor_lock);

View file

@ -739,7 +739,7 @@ static int __init kempld_init(void)
for (id = kempld_dmi_table;
id->matches[0].slot != DMI_NONE; id++)
if (strstr(id->ident, force_device_id))
if (id->callback && id->callback(id))
if (id->callback && !id->callback(id))
break;
if (id->matches[0].slot == DMI_NONE)
return -ENODEV;

View file

@ -196,18 +196,27 @@ EXPORT_SYMBOL_GPL(rtsx_usb_ep0_write_register);
int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data)
{
u16 value;
u8 *buf;
int ret;
if (!data)
return -EINVAL;
*data = 0;
buf = kzalloc(sizeof(u8), GFP_KERNEL);
if (!buf)
return -ENOMEM;
addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT;
value = swab16(addr);
return usb_control_msg(ucr->pusb_dev,
ret = usb_control_msg(ucr->pusb_dev,
usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP,
USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
value, 0, data, 1, 100);
value, 0, buf, 1, 100);
*data = *buf;
kfree(buf);
return ret;
}
EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register);
@ -288,18 +297,27 @@ static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status)
int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status)
{
int ret;
u16 *buf;
if (!status)
return -EINVAL;
if (polling_pipe == 0)
if (polling_pipe == 0) {
buf = kzalloc(sizeof(u16), GFP_KERNEL);
if (!buf)
return -ENOMEM;
ret = usb_control_msg(ucr->pusb_dev,
usb_rcvctrlpipe(ucr->pusb_dev, 0),
RTSX_USB_REQ_POLL,
USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
0, 0, status, 2, 100);
else
0, 0, buf, 2, 100);
*status = *buf;
kfree(buf);
} else {
ret = rtsx_usb_get_status_with_bulk(ucr, status);
}
/* usb_control_msg may return positive when success */
if (ret < 0)

View file

@ -1543,7 +1543,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
{
struct pcnet32_private *lp;
int i, media;
int fdx, mii, fset, dxsuflo;
int fdx, mii, fset, dxsuflo, sram;
int chip_version;
char *chipname;
struct net_device *dev;
@ -1580,7 +1580,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
}
/* initialize variables */
fdx = mii = fset = dxsuflo = 0;
fdx = mii = fset = dxsuflo = sram = 0;
chip_version = (chip_version >> 12) & 0xffff;
switch (chip_version) {
@ -1613,6 +1613,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
chipname = "PCnet/FAST III 79C973"; /* PCI */
fdx = 1;
mii = 1;
sram = 1;
break;
case 0x2626:
chipname = "PCnet/Home 79C978"; /* PCI */
@ -1636,6 +1637,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
chipname = "PCnet/FAST III 79C975"; /* PCI */
fdx = 1;
mii = 1;
sram = 1;
break;
case 0x2628:
chipname = "PCnet/PRO 79C976";
@ -1664,6 +1666,31 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
dxsuflo = 1;
}
/*
* The Am79C973/Am79C975 controllers come with 12K of SRAM
* which we can use for the Tx/Rx buffers but most importantly,
* the use of SRAM allow us to use the BCR18:NOUFLO bit to avoid
* Tx fifo underflows.
*/
if (sram) {
/*
* The SRAM is being configured in two steps. First we
* set the SRAM size in the BCR25:SRAM_SIZE bits. According
* to the datasheet, each bit corresponds to a 512-byte
* page so we can have at most 24 pages. The SRAM_SIZE
* holds the value of the upper 8 bits of the 16-bit SRAM size.
* The low 8-bits start at 0x00 and end at 0xff. So the
* address range is from 0x0000 up to 0x17ff. Therefore,
* the SRAM_SIZE is set to 0x17. The next step is to set
* the BCR26:SRAM_BND midway through so the Tx and Rx
* buffers can share the SRAM equally.
*/
a->write_bcr(ioaddr, 25, 0x17);
a->write_bcr(ioaddr, 26, 0xc);
/* And finally enable the NOUFLO bit */
a->write_bcr(ioaddr, 18, a->read_bcr(ioaddr, 18) | (1 << 11));
}
dev = alloc_etherdev(sizeof(*lp));
if (!dev) {
ret = -ENOMEM;

View file

@ -354,6 +354,7 @@ struct be_vf_cfg {
u16 vlan_tag;
u32 tx_rate;
u32 plink_tracking;
u32 privileges;
};
enum vf_state {
@ -423,6 +424,7 @@ struct be_adapter {
u8 __iomem *csr; /* CSR BAR used only for BE2/3 */
u8 __iomem *db; /* Door Bell */
u8 __iomem *pcicfg; /* On SH,BEx only. Shadow of PCI config space */
struct mutex mbox_lock; /* For serializing mbox cmds to BE card */
struct be_dma_mem mbox_mem;

View file

@ -1902,15 +1902,11 @@ int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *set_eqd,
{
int num_eqs, i = 0;
if (lancer_chip(adapter) && num > 8) {
while (num) {
num_eqs = min(num, 8);
__be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs);
i += num_eqs;
num -= num_eqs;
}
} else {
__be_cmd_modify_eqd(adapter, set_eqd, num);
while (num) {
num_eqs = min(num, 8);
__be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs);
i += num_eqs;
num -= num_eqs;
}
return 0;
@ -1918,7 +1914,7 @@ int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *set_eqd,
/* Uses sycnhronous mcc */
int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,
u32 num)
u32 num, u32 domain)
{
struct be_mcc_wrb *wrb;
struct be_cmd_req_vlan_config *req;
@ -1936,6 +1932,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,
be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
OPCODE_COMMON_NTWK_VLAN_CONFIG, sizeof(*req),
wrb, NULL);
req->hdr.domain = domain;
req->interface_id = if_id;
req->untagged = BE_IF_FLAGS_UNTAGGED & be_if_cap_flags(adapter) ? 1 : 0;

View file

@ -2256,7 +2256,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter,
int be_cmd_get_fw_ver(struct be_adapter *adapter);
int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *, int num);
int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,
u32 num);
u32 num, u32 domain);
int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 status);
int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc);
int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc);

View file

@ -1171,7 +1171,7 @@ static int be_vid_config(struct be_adapter *adapter)
for_each_set_bit(i, adapter->vids, VLAN_N_VID)
vids[num++] = cpu_to_le16(i);
status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num);
status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
if (status) {
dev_err(dev, "Setting HW VLAN filtering failed\n");
/* Set to VLAN promisc mode as setting VLAN filter failed */
@ -1380,11 +1380,67 @@ static int be_get_vf_config(struct net_device *netdev, int vf,
return 0;
}
static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
{
struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
u16 vids[BE_NUM_VLANS_SUPPORTED];
int vf_if_id = vf_cfg->if_handle;
int status;
/* Enable Transparent VLAN Tagging */
status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0);
if (status)
return status;
/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
vids[0] = 0;
status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
if (!status)
dev_info(&adapter->pdev->dev,
"Cleared guest VLANs on VF%d", vf);
/* After TVT is enabled, disallow VFs to program VLAN filters */
if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
~BE_PRIV_FILTMGMT, vf + 1);
if (!status)
vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
}
return 0;
}
static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
{
struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
struct device *dev = &adapter->pdev->dev;
int status;
/* Reset Transparent VLAN Tagging. */
status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
vf_cfg->if_handle, 0);
if (status)
return status;
/* Allow VFs to program VLAN filtering */
if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
BE_PRIV_FILTMGMT, vf + 1);
if (!status) {
vf_cfg->privileges |= BE_PRIV_FILTMGMT;
dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
}
}
dev_info(dev,
"Disable/re-enable i/f in VM to clear Transparent VLAN tag");
return 0;
}
static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
{
struct be_adapter *adapter = netdev_priv(netdev);
struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
int status = 0;
int status;
if (!sriov_enabled(adapter))
return -EPERM;
@ -1394,24 +1450,19 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
if (vlan || qos) {
vlan |= qos << VLAN_PRIO_SHIFT;
if (vf_cfg->vlan_tag != vlan)
status = be_cmd_set_hsw_config(adapter, vlan, vf + 1,
vf_cfg->if_handle, 0);
status = be_set_vf_tvt(adapter, vf, vlan);
} else {
/* Reset Transparent Vlan Tagging. */
status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID,
vf + 1, vf_cfg->if_handle, 0);
status = be_clear_vf_tvt(adapter, vf);
}
if (status) {
dev_err(&adapter->pdev->dev,
"VLAN %d config on VF %d failed : %#x\n", vlan,
vf, status);
"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
status);
return be_cmd_status(status);
}
vf_cfg->vlan_tag = vlan;
return 0;
}
@ -2772,14 +2823,12 @@ void be_detect_error(struct be_adapter *adapter)
}
}
} else {
pci_read_config_dword(adapter->pdev,
PCICFG_UE_STATUS_LOW, &ue_lo);
pci_read_config_dword(adapter->pdev,
PCICFG_UE_STATUS_HIGH, &ue_hi);
pci_read_config_dword(adapter->pdev,
PCICFG_UE_STATUS_LOW_MASK, &ue_lo_mask);
pci_read_config_dword(adapter->pdev,
PCICFG_UE_STATUS_HI_MASK, &ue_hi_mask);
ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
ue_lo_mask = ioread32(adapter->pcicfg +
PCICFG_UE_STATUS_LOW_MASK);
ue_hi_mask = ioread32(adapter->pcicfg +
PCICFG_UE_STATUS_HI_MASK);
ue_lo = (ue_lo & ~ue_lo_mask);
ue_hi = (ue_hi & ~ue_hi_mask);
@ -3339,7 +3388,6 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle,
u32 cap_flags, u32 vf)
{
u32 en_flags;
int status;
en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS |
@ -3347,10 +3395,7 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle,
en_flags &= cap_flags;
status = be_cmd_if_create(adapter, cap_flags, en_flags,
if_handle, vf);
return status;
return be_cmd_if_create(adapter, cap_flags, en_flags, if_handle, vf);
}
static int be_vfs_if_create(struct be_adapter *adapter)
@ -3368,8 +3413,13 @@ static int be_vfs_if_create(struct be_adapter *adapter)
if (!BE3_chip(adapter)) {
status = be_cmd_get_profile_config(adapter, &res,
vf + 1);
if (!status)
if (!status) {
cap_flags = res.if_cap_flags;
/* Prevent VFs from enabling VLAN promiscuous
* mode
*/
cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
}
}
status = be_if_create(adapter, &vf_cfg->if_handle,
@ -3403,7 +3453,6 @@ static int be_vf_setup(struct be_adapter *adapter)
struct device *dev = &adapter->pdev->dev;
struct be_vf_cfg *vf_cfg;
int status, old_vfs, vf;
u32 privileges;
old_vfs = pci_num_vf(adapter->pdev);
@ -3433,15 +3482,18 @@ static int be_vf_setup(struct be_adapter *adapter)
for_all_vfs(adapter, vf_cfg, vf) {
/* Allow VFs to programs MAC/VLAN filters */
status = be_cmd_get_fn_privileges(adapter, &privileges, vf + 1);
if (!status && !(privileges & BE_PRIV_FILTMGMT)) {
status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
vf + 1);
if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
status = be_cmd_set_fn_privileges(adapter,
privileges |
vf_cfg->privileges |
BE_PRIV_FILTMGMT,
vf + 1);
if (!status)
if (!status) {
vf_cfg->privileges |= BE_PRIV_FILTMGMT;
dev_info(dev, "VF%d has FILTMGMT privilege\n",
vf);
}
}
/* Allow full available bandwidth */
@ -4820,24 +4872,37 @@ static int be_roce_map_pci_bars(struct be_adapter *adapter)
static int be_map_pci_bars(struct be_adapter *adapter)
{
struct pci_dev *pdev = adapter->pdev;
u8 __iomem *addr;
if (BEx_chip(adapter) && be_physfn(adapter)) {
adapter->csr = pci_iomap(adapter->pdev, 2, 0);
adapter->csr = pci_iomap(pdev, 2, 0);
if (!adapter->csr)
return -ENOMEM;
}
addr = pci_iomap(adapter->pdev, db_bar(adapter), 0);
addr = pci_iomap(pdev, db_bar(adapter), 0);
if (!addr)
goto pci_map_err;
adapter->db = addr;
if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
if (be_physfn(adapter)) {
/* PCICFG is the 2nd BAR in BE2 */
addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
if (!addr)
goto pci_map_err;
adapter->pcicfg = addr;
} else {
adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
}
}
be_roce_map_pci_bars(adapter);
return 0;
pci_map_err:
dev_err(&adapter->pdev->dev, "Error in mapping PCI BARs\n");
dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
be_unmap_pci_bars(adapter);
return -ENOMEM;
}

View file

@ -46,8 +46,7 @@ enum cx82310_status {
};
#define CMD_PACKET_SIZE 64
/* first command after power on can take around 8 seconds */
#define CMD_TIMEOUT 15000
#define CMD_TIMEOUT 100
#define CMD_REPLY_RETRY 5
#define CX82310_MTU 1514
@ -78,8 +77,9 @@ static int cx82310_cmd(struct usbnet *dev, enum cx82310_cmd cmd, bool reply,
ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, CMD_EP), buf,
CMD_PACKET_SIZE, &actual_len, CMD_TIMEOUT);
if (ret < 0) {
dev_err(&dev->udev->dev, "send command %#x: error %d\n",
cmd, ret);
if (cmd != CMD_GET_LINK_STATUS)
dev_err(&dev->udev->dev, "send command %#x: error %d\n",
cmd, ret);
goto end;
}
@ -90,8 +90,10 @@ static int cx82310_cmd(struct usbnet *dev, enum cx82310_cmd cmd, bool reply,
buf, CMD_PACKET_SIZE, &actual_len,
CMD_TIMEOUT);
if (ret < 0) {
dev_err(&dev->udev->dev,
"reply receive error %d\n", ret);
if (cmd != CMD_GET_LINK_STATUS)
dev_err(&dev->udev->dev,
"reply receive error %d\n",
ret);
goto end;
}
if (actual_len > 0)
@ -134,6 +136,8 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf)
int ret;
char buf[15];
struct usb_device *udev = dev->udev;
u8 link[3];
int timeout = 50;
/* avoid ADSL modems - continue only if iProduct is "USB NET CARD" */
if (usb_string(udev, udev->descriptor.iProduct, buf, sizeof(buf)) > 0
@ -160,6 +164,20 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf)
if (!dev->partial_data)
return -ENOMEM;
/* wait for firmware to become ready (indicated by the link being up) */
while (--timeout) {
ret = cx82310_cmd(dev, CMD_GET_LINK_STATUS, true, NULL, 0,
link, sizeof(link));
/* the command can time out during boot - it's not an error */
if (!ret && link[0] == 1 && link[2] == 1)
break;
msleep(500);
};
if (!timeout) {
dev_err(&udev->dev, "firmware not ready in time\n");
return -ETIMEDOUT;
}
/* enable ethernet mode (?) */
ret = cx82310_cmd(dev, CMD_ETHERNET_MODE, true, "\x01", 1, NULL, 0);
if (ret) {

View file

@ -1572,6 +1572,10 @@ static int palmas_regulators_probe(struct platform_device *pdev)
if (!pmic)
return -ENOMEM;
if (of_device_is_compatible(node, "ti,tps659038-pmic"))
palmas_generic_regs_info[PALMAS_REG_REGEN2].ctrl_addr =
TPS659038_REGEN2_CTRL;
pmic->dev = &pdev->dev;
pmic->palmas = palmas;
palmas->pmic = pmic;

View file

@ -413,8 +413,8 @@ static void rtc_mrst_do_remove(struct device *dev)
mrst->dev = NULL;
}
#ifdef CONFIG_PM
static int mrst_suspend(struct device *dev, pm_message_t mesg)
#ifdef CONFIG_PM_SLEEP
static int mrst_suspend(struct device *dev)
{
struct mrst_rtc *mrst = dev_get_drvdata(dev);
unsigned char tmp;
@ -453,7 +453,7 @@ static int mrst_suspend(struct device *dev, pm_message_t mesg)
*/
static inline int mrst_poweroff(struct device *dev)
{
return mrst_suspend(dev, PMSG_HIBERNATE);
return mrst_suspend(dev);
}
static int mrst_resume(struct device *dev)
@ -490,9 +490,11 @@ static int mrst_resume(struct device *dev)
return 0;
}
static SIMPLE_DEV_PM_OPS(mrst_pm_ops, mrst_suspend, mrst_resume);
#define MRST_PM_OPS (&mrst_pm_ops)
#else
#define mrst_suspend NULL
#define mrst_resume NULL
#define MRST_PM_OPS NULL
static inline int mrst_poweroff(struct device *dev)
{
@ -529,9 +531,8 @@ static struct platform_driver vrtc_mrst_platform_driver = {
.remove = vrtc_mrst_platform_remove,
.shutdown = vrtc_mrst_platform_shutdown,
.driver = {
.name = (char *) driver_name,
.suspend = mrst_suspend,
.resume = mrst_resume,
.name = driver_name,
.pm = MRST_PM_OPS,
}
};

View file

@ -6815,7 +6815,8 @@ static struct ata_port_operations ipr_sata_ops = {
};
static struct ata_port_info sata_port_info = {
.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
ATA_FLAG_SAS_HOST,
.pio_mask = ATA_PIO4_ONLY,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,

View file

@ -547,7 +547,8 @@ static struct ata_port_operations sas_sata_ops = {
};
static struct ata_port_info sata_port_info = {
.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ,
.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ |
ATA_FLAG_SAS_HOST,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,

View file

@ -108,7 +108,8 @@ static void dw_spi_dma_tx_done(void *arg)
{
struct dw_spi *dws = arg;
if (test_and_clear_bit(TX_BUSY, &dws->dma_chan_busy) & BIT(RX_BUSY))
clear_bit(TX_BUSY, &dws->dma_chan_busy);
if (test_bit(RX_BUSY, &dws->dma_chan_busy))
return;
dw_spi_xfer_done(dws);
}
@ -156,7 +157,8 @@ static void dw_spi_dma_rx_done(void *arg)
{
struct dw_spi *dws = arg;
if (test_and_clear_bit(RX_BUSY, &dws->dma_chan_busy) & BIT(TX_BUSY))
clear_bit(RX_BUSY, &dws->dma_chan_busy);
if (test_bit(TX_BUSY, &dws->dma_chan_busy))
return;
dw_spi_xfer_done(dws);
}

View file

@ -498,7 +498,7 @@ static int spi_qup_probe(struct platform_device *pdev)
struct resource *res;
struct device *dev;
void __iomem *base;
u32 max_freq, iomode;
u32 max_freq, iomode, num_cs;
int ret, irq, size;
dev = &pdev->dev;
@ -550,10 +550,11 @@ static int spi_qup_probe(struct platform_device *pdev)
}
/* use num-cs unless not present or out of range */
if (of_property_read_u16(dev->of_node, "num-cs",
&master->num_chipselect) ||
(master->num_chipselect > SPI_NUM_CHIPSELECTS))
if (of_property_read_u32(dev->of_node, "num-cs", &num_cs) ||
num_cs > SPI_NUM_CHIPSELECTS)
master->num_chipselect = SPI_NUM_CHIPSELECTS;
else
master->num_chipselect = num_cs;
master->bus_num = pdev->id;
master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;

View file

@ -1105,13 +1105,14 @@ void spi_finalize_current_message(struct spi_master *master)
"failed to unprepare message: %d\n", ret);
}
}
trace_spi_message_done(mesg);
master->cur_msg_prepared = false;
mesg->state = NULL;
if (mesg->complete)
mesg->complete(mesg->context);
trace_spi_message_done(mesg);
}
EXPORT_SYMBOL_GPL(spi_finalize_current_message);

View file

@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
boff = tmp % bsize;
if (boff) {
bh = affs_bread_ino(inode, bidx, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
if (IS_ERR(bh)) {
written = PTR_ERR(bh);
goto err_first_bh;
}
tmp = min(bsize - boff, to - from);
BUG_ON(boff + tmp > bsize || tmp > bsize);
memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
bidx++;
} else if (bidx) {
bh = affs_bread_ino(inode, bidx - 1, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
if (IS_ERR(bh)) {
written = PTR_ERR(bh);
goto err_first_bh;
}
}
while (from + bsize <= to) {
prev_bh = bh;
bh = affs_getemptyblk_ino(inode, bidx);
if (IS_ERR(bh))
goto out;
goto err_bh;
memcpy(AFFS_DATA(bh), data + from, bsize);
if (buffer_new(bh)) {
AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
prev_bh = bh;
bh = affs_bread_ino(inode, bidx, 1);
if (IS_ERR(bh))
goto out;
goto err_bh;
tmp = min(bsize, to - from);
BUG_ON(tmp > bsize);
memcpy(AFFS_DATA(bh), data + from, tmp);
@ -790,12 +794,13 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
if (tmp > inode->i_size)
inode->i_size = AFFS_I(inode)->mmu_private = tmp;
err_first_bh:
unlock_page(page);
page_cache_release(page);
return written;
out:
err_bh:
bh = prev_bh;
if (!written)
written = PTR_ERR(bh);

View file

@ -131,13 +131,16 @@ int hfs_brec_insert(struct hfs_find_data *fd, void *entry, int entry_len)
hfs_bnode_write(node, entry, data_off + key_len, entry_len);
hfs_bnode_dump(node);
if (new_node) {
/* update parent key if we inserted a key
* at the start of the first node
*/
if (!rec && new_node != node)
hfs_brec_update_parent(fd);
/*
* update parent key if we inserted a key
* at the start of the node and it is not the new node
*/
if (!rec && new_node != node) {
hfs_bnode_read_key(node, fd->search_key, data_off + size);
hfs_brec_update_parent(fd);
}
if (new_node) {
hfs_bnode_put(fd->bnode);
if (!new_node->parent) {
hfs_btree_inc_height(tree);
@ -168,9 +171,6 @@ int hfs_brec_insert(struct hfs_find_data *fd, void *entry, int entry_len)
goto again;
}
if (!rec)
hfs_brec_update_parent(fd);
return 0;
}
@ -370,6 +370,8 @@ static int hfs_brec_update_parent(struct hfs_find_data *fd)
if (IS_ERR(parent))
return PTR_ERR(parent);
__hfs_brec_find(parent, fd, hfs_find_rec_by_key);
if (fd->record < 0)
return -ENOENT;
hfs_bnode_dump(parent);
rec = fd->record;

View file

@ -39,6 +39,8 @@ enum clock_event_mode {
CLOCK_EVT_MODE_PERIODIC,
CLOCK_EVT_MODE_ONESHOT,
CLOCK_EVT_MODE_RESUME,
/* Legacy ->set_mode() callback doesn't support below modes */
};
/*
@ -81,7 +83,11 @@ enum clock_event_mode {
* @mode: operating mode assigned by the management code
* @features: features
* @retries: number of forced programming retries
* @set_mode: set mode function
* @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
* @set_mode_periodic: switch mode to periodic, if !set_mode
* @set_mode_oneshot: switch mode to oneshot, if !set_mode
* @set_mode_shutdown: switch mode to shutdown, if !set_mode
* @set_mode_resume: resume clkevt device, if !set_mode
* @broadcast: function to broadcast events
* @min_delta_ticks: minimum delta value in ticks stored for reconfiguration
* @max_delta_ticks: maximum delta value in ticks stored for reconfiguration
@ -108,9 +114,20 @@ struct clock_event_device {
unsigned int features;
unsigned long retries;
void (*broadcast)(const struct cpumask *mask);
/*
* Mode transition callback(s): Only one of the two groups should be
* defined:
* - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
* - set_mode_{shutdown|periodic|oneshot|resume}().
*/
void (*set_mode)(enum clock_event_mode mode,
struct clock_event_device *);
int (*set_mode_periodic)(struct clock_event_device *);
int (*set_mode_oneshot)(struct clock_event_device *);
int (*set_mode_shutdown)(struct clock_event_device *);
int (*set_mode_resume)(struct clock_event_device *);
void (*broadcast)(const struct cpumask *mask);
void (*suspend)(struct clock_event_device *);
void (*resume)(struct clock_event_device *);
unsigned long min_delta_ticks;

View file

@ -56,6 +56,7 @@ struct module;
* @shift: cycle to nanosecond divisor (power of two)
* @max_idle_ns: max idle time permitted by the clocksource (nsecs)
* @maxadj: maximum adjustment value to mult (~11%)
* @max_cycles: maximum safe cycle value which won't overflow on multiplication
* @flags: flags describing special properties
* @archdata: arch-specific data
* @suspend: suspend function for the clocksource, if necessary
@ -76,7 +77,7 @@ struct clocksource {
#ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
struct arch_clocksource_data archdata;
#endif
u64 max_cycles;
const char *name;
struct list_head list;
int rating;
@ -178,7 +179,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
}
extern int clocksource_register(struct clocksource*);
extern int clocksource_unregister(struct clocksource*);
extern void clocksource_touch_watchdog(void);
extern struct clocksource* clocksource_get_next(void);
@ -189,7 +189,7 @@ extern struct clocksource * __init clocksource_default_clock(void);
extern void clocksource_mark_unstable(struct clocksource *cs);
extern u64
clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask);
clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);
extern void
clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
@ -200,7 +200,16 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
extern int
__clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
extern void
__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq);
__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq);
/*
* Don't call this unless you are a default clocksource
* (AKA: jiffies) and absolutely have to.
*/
static inline int __clocksource_register(struct clocksource *cs)
{
return __clocksource_register_scale(cs, 1, 0);
}
static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
{
@ -212,14 +221,14 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
return __clocksource_register_scale(cs, 1000, khz);
}
static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz)
static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz)
{
__clocksource_updatefreq_scale(cs, 1, hz);
__clocksource_update_freq_scale(cs, 1, hz);
}
static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz)
{
__clocksource_updatefreq_scale(cs, 1000, khz);
__clocksource_update_freq_scale(cs, 1000, khz);
}

View file

@ -232,6 +232,7 @@ enum {
* led */
ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */
ATA_FLAG_LOWTAG = (1 << 24), /* host wants lowest available tag */
ATA_FLAG_SAS_HOST = (1 << 25), /* SAS host */
/* bits 24:31 of ap->flags are reserved for LLD specific flags */

View file

@ -2999,6 +2999,9 @@ enum usb_irq_events {
#define PALMAS_GPADC_TRIM15 0x0E
#define PALMAS_GPADC_TRIM16 0x0F
/* TPS659038 regen2_ctrl offset iss different from palmas */
#define TPS659038_REGEN2_CTRL 0x12
/* TPS65917 Interrupt registers */
/* Registers for function INTERRUPT */

View file

@ -173,6 +173,7 @@ struct perf_event;
* pmu::capabilities flags
*/
#define PERF_PMU_CAP_NO_INTERRUPT 0x01
#define PERF_PMU_CAP_NO_NMI 0x02
/**
* struct pmu - generic performance monitoring unit
@ -457,6 +458,7 @@ struct perf_event {
struct pid_namespace *ns;
u64 id;
u64 (*clock)(void);
perf_overflow_handler_t overflow_handler;
void *overflow_handler_context;

View file

@ -316,7 +316,7 @@ struct regulator_desc {
* @driver_data: private regulator data
* @of_node: OpenFirmware node to parse for device tree bindings (may be
* NULL).
* @regmap: regmap to use for core regmap helpers if dev_get_regulator() is
* @regmap: regmap to use for core regmap helpers if dev_get_regmap() is
* insufficient.
* @ena_gpio_initialized: GPIO controlling regulator enable was properly
* initialized, meaning that >= 0 is a valid gpio

View file

@ -1625,11 +1625,11 @@ struct task_struct {
/*
* numa_faults_locality tracks if faults recorded during the last
* scan window were remote/local. The task scan period is adapted
* based on the locality of the faults with different weights
* depending on whether they were shared or private faults
* scan window were remote/local or failed to migrate. The task scan
* period is adapted based on the locality of the faults with different
* weights depending on whether they were shared or private faults
*/
unsigned long numa_faults_locality[2];
unsigned long numa_faults_locality[3];
unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */
@ -1719,6 +1719,7 @@ struct task_struct {
#define TNF_NO_GROUP 0x02
#define TNF_SHARED 0x04
#define TNF_FAULT_LOCAL 0x08
#define TNF_MIGRATE_FAIL 0x10
#ifdef CONFIG_NUMA_BALANCING
extern void task_numa_fault(int last_node, int node, int pages, int flags);

View file

@ -16,16 +16,16 @@
* @read: Read function of @clock
* @mask: Bitmask for two's complement subtraction of non 64bit clocks
* @cycle_last: @clock cycle value at last update
* @mult: NTP adjusted multiplier for scaled math conversion
* @mult: (NTP adjusted) multiplier for scaled math conversion
* @shift: Shift value for scaled math conversion
* @xtime_nsec: Shifted (fractional) nano seconds offset for readout
* @base_mono: ktime_t (nanoseconds) base time for readout
* @base: ktime_t (nanoseconds) base time for readout
*
* This struct has size 56 byte on 64 bit. Together with a seqcount it
* occupies a single 64byte cache line.
*
* The struct is separate from struct timekeeper as it is also used
* for a fast NMI safe accessor to clock monotonic.
* for a fast NMI safe accessors.
*/
struct tk_read_base {
struct clocksource *clock;
@ -35,12 +35,13 @@ struct tk_read_base {
u32 mult;
u32 shift;
u64 xtime_nsec;
ktime_t base_mono;
ktime_t base;
};
/**
* struct timekeeper - Structure holding internal timekeeping values.
* @tkr: The readout base structure
* @tkr_mono: The readout base structure for CLOCK_MONOTONIC
* @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
* @xtime_sec: Current CLOCK_REALTIME time in seconds
* @ktime_sec: Current CLOCK_MONOTONIC time in seconds
* @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
@ -48,7 +49,6 @@ struct tk_read_base {
* @offs_boot: Offset clock monotonic -> clock boottime
* @offs_tai: Offset clock monotonic -> clock tai
* @tai_offset: The current UTC to TAI offset in seconds
* @base_raw: Monotonic raw base time in ktime_t format
* @raw_time: Monotonic raw base time in timespec64 format
* @cycle_interval: Number of clock cycles in one NTP interval
* @xtime_interval: Number of clock shifted nano seconds in one NTP
@ -76,7 +76,8 @@ struct tk_read_base {
* used instead.
*/
struct timekeeper {
struct tk_read_base tkr;
struct tk_read_base tkr_mono;
struct tk_read_base tkr_raw;
u64 xtime_sec;
unsigned long ktime_sec;
struct timespec64 wall_to_monotonic;
@ -84,7 +85,6 @@ struct timekeeper {
ktime_t offs_boot;
ktime_t offs_tai;
s32 tai_offset;
ktime_t base_raw;
struct timespec64 raw_time;
/* The following members are for timekeeping internal use */

View file

@ -214,12 +214,18 @@ static inline u64 ktime_get_boot_ns(void)
return ktime_to_ns(ktime_get_boottime());
}
static inline u64 ktime_get_tai_ns(void)
{
return ktime_to_ns(ktime_get_clocktai());
}
static inline u64 ktime_get_raw_ns(void)
{
return ktime_to_ns(ktime_get_raw());
}
extern u64 ktime_get_mono_fast_ns(void);
extern u64 ktime_get_raw_fast_ns(void);
/*
* Timespec interfaces utilizing the ktime based ones

View file

@ -79,6 +79,16 @@ void nf_log_packet(struct net *net,
const struct nf_loginfo *li,
const char *fmt, ...);
__printf(8, 9)
void nf_log_trace(struct net *net,
u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
const struct nf_loginfo *li,
const char *fmt, ...);
struct nf_log_buf;
struct nf_log_buf *nf_log_buf_open(void);

View file

@ -7,27 +7,26 @@
#include <linux/ktime.h>
#include <linux/tracepoint.h>
struct device;
struct regmap;
#include "../../../drivers/base/regmap/internal.h"
/*
* Log register events
*/
DECLARE_EVENT_CLASS(regmap_reg,
TP_PROTO(struct device *dev, unsigned int reg,
TP_PROTO(struct regmap *map, unsigned int reg,
unsigned int val),
TP_ARGS(dev, reg, val),
TP_ARGS(map, reg, val),
TP_STRUCT__entry(
__string( name, dev_name(dev) )
__field( unsigned int, reg )
__field( unsigned int, val )
__string( name, regmap_name(map) )
__field( unsigned int, reg )
__field( unsigned int, val )
),
TP_fast_assign(
__assign_str(name, dev_name(dev));
__assign_str(name, regmap_name(map));
__entry->reg = reg;
__entry->val = val;
),
@ -39,45 +38,45 @@ DECLARE_EVENT_CLASS(regmap_reg,
DEFINE_EVENT(regmap_reg, regmap_reg_write,
TP_PROTO(struct device *dev, unsigned int reg,
TP_PROTO(struct regmap *map, unsigned int reg,
unsigned int val),
TP_ARGS(dev, reg, val)
TP_ARGS(map, reg, val)
);
DEFINE_EVENT(regmap_reg, regmap_reg_read,
TP_PROTO(struct device *dev, unsigned int reg,
TP_PROTO(struct regmap *map, unsigned int reg,
unsigned int val),
TP_ARGS(dev, reg, val)
TP_ARGS(map, reg, val)
);
DEFINE_EVENT(regmap_reg, regmap_reg_read_cache,
TP_PROTO(struct device *dev, unsigned int reg,
TP_PROTO(struct regmap *map, unsigned int reg,
unsigned int val),
TP_ARGS(dev, reg, val)
TP_ARGS(map, reg, val)
);
DECLARE_EVENT_CLASS(regmap_block,
TP_PROTO(struct device *dev, unsigned int reg, int count),
TP_PROTO(struct regmap *map, unsigned int reg, int count),
TP_ARGS(dev, reg, count),
TP_ARGS(map, reg, count),
TP_STRUCT__entry(
__string( name, dev_name(dev) )
__field( unsigned int, reg )
__field( int, count )
__string( name, regmap_name(map) )
__field( unsigned int, reg )
__field( int, count )
),
TP_fast_assign(
__assign_str(name, dev_name(dev));
__assign_str(name, regmap_name(map));
__entry->reg = reg;
__entry->count = count;
),
@ -89,48 +88,48 @@ DECLARE_EVENT_CLASS(regmap_block,
DEFINE_EVENT(regmap_block, regmap_hw_read_start,
TP_PROTO(struct device *dev, unsigned int reg, int count),
TP_PROTO(struct regmap *map, unsigned int reg, int count),
TP_ARGS(dev, reg, count)
TP_ARGS(map, reg, count)
);
DEFINE_EVENT(regmap_block, regmap_hw_read_done,
TP_PROTO(struct device *dev, unsigned int reg, int count),
TP_PROTO(struct regmap *map, unsigned int reg, int count),
TP_ARGS(dev, reg, count)
TP_ARGS(map, reg, count)
);
DEFINE_EVENT(regmap_block, regmap_hw_write_start,
TP_PROTO(struct device *dev, unsigned int reg, int count),
TP_PROTO(struct regmap *map, unsigned int reg, int count),
TP_ARGS(dev, reg, count)
TP_ARGS(map, reg, count)
);
DEFINE_EVENT(regmap_block, regmap_hw_write_done,
TP_PROTO(struct device *dev, unsigned int reg, int count),
TP_PROTO(struct regmap *map, unsigned int reg, int count),
TP_ARGS(dev, reg, count)
TP_ARGS(map, reg, count)
);
TRACE_EVENT(regcache_sync,
TP_PROTO(struct device *dev, const char *type,
TP_PROTO(struct regmap *map, const char *type,
const char *status),
TP_ARGS(dev, type, status),
TP_ARGS(map, type, status),
TP_STRUCT__entry(
__string( name, dev_name(dev) )
__string( status, status )
__string( type, type )
__field( int, type )
__string( name, regmap_name(map) )
__string( status, status )
__string( type, type )
__field( int, type )
),
TP_fast_assign(
__assign_str(name, dev_name(dev));
__assign_str(name, regmap_name(map));
__assign_str(status, status);
__assign_str(type, type);
),
@ -141,17 +140,17 @@ TRACE_EVENT(regcache_sync,
DECLARE_EVENT_CLASS(regmap_bool,
TP_PROTO(struct device *dev, bool flag),
TP_PROTO(struct regmap *map, bool flag),
TP_ARGS(dev, flag),
TP_ARGS(map, flag),
TP_STRUCT__entry(
__string( name, dev_name(dev) )
__field( int, flag )
__string( name, regmap_name(map) )
__field( int, flag )
),
TP_fast_assign(
__assign_str(name, dev_name(dev));
__assign_str(name, regmap_name(map));
__entry->flag = flag;
),
@ -161,32 +160,32 @@ DECLARE_EVENT_CLASS(regmap_bool,
DEFINE_EVENT(regmap_bool, regmap_cache_only,
TP_PROTO(struct device *dev, bool flag),
TP_PROTO(struct regmap *map, bool flag),
TP_ARGS(dev, flag)
TP_ARGS(map, flag)
);
DEFINE_EVENT(regmap_bool, regmap_cache_bypass,
TP_PROTO(struct device *dev, bool flag),
TP_PROTO(struct regmap *map, bool flag),
TP_ARGS(dev, flag)
TP_ARGS(map, flag)
);
DECLARE_EVENT_CLASS(regmap_async,
TP_PROTO(struct device *dev),
TP_PROTO(struct regmap *map),
TP_ARGS(dev),
TP_ARGS(map),
TP_STRUCT__entry(
__string( name, dev_name(dev) )
__string( name, regmap_name(map) )
),
TP_fast_assign(
__assign_str(name, dev_name(dev));
__assign_str(name, regmap_name(map));
),
TP_printk("%s", __get_str(name))
@ -194,50 +193,50 @@ DECLARE_EVENT_CLASS(regmap_async,
DEFINE_EVENT(regmap_block, regmap_async_write_start,
TP_PROTO(struct device *dev, unsigned int reg, int count),
TP_PROTO(struct regmap *map, unsigned int reg, int count),
TP_ARGS(dev, reg, count)
TP_ARGS(map, reg, count)
);
DEFINE_EVENT(regmap_async, regmap_async_io_complete,
TP_PROTO(struct device *dev),
TP_PROTO(struct regmap *map),
TP_ARGS(dev)
TP_ARGS(map)
);
DEFINE_EVENT(regmap_async, regmap_async_complete_start,
TP_PROTO(struct device *dev),
TP_PROTO(struct regmap *map),
TP_ARGS(dev)
TP_ARGS(map)
);
DEFINE_EVENT(regmap_async, regmap_async_complete_done,
TP_PROTO(struct device *dev),
TP_PROTO(struct regmap *map),
TP_ARGS(dev)
TP_ARGS(map)
);
TRACE_EVENT(regcache_drop_region,
TP_PROTO(struct device *dev, unsigned int from,
TP_PROTO(struct regmap *map, unsigned int from,
unsigned int to),
TP_ARGS(dev, from, to),
TP_ARGS(map, from, to),
TP_STRUCT__entry(
__string( name, dev_name(dev) )
__field( unsigned int, from )
__field( unsigned int, to )
__string( name, regmap_name(map) )
__field( unsigned int, from )
__field( unsigned int, to )
),
TP_fast_assign(
__assign_str(name, dev_name(dev));
__assign_str(name, regmap_name(map));
__entry->from = from;
__entry->to = to;
),

View file

@ -326,7 +326,8 @@ struct perf_event_attr {
exclude_callchain_user : 1, /* exclude user callchains */
mmap2 : 1, /* include mmap with inode data */
comm_exec : 1, /* flag comm events that are due to an exec */
__reserved_1 : 39;
use_clockid : 1, /* use @clockid for time fields */
__reserved_1 : 38;
union {
__u32 wakeup_events; /* wakeup every n events */
@ -355,8 +356,7 @@ struct perf_event_attr {
*/
__u32 sample_stack_user;
/* Align to u64. */
__u32 __reserved_2;
__s32 clockid;
/*
* Defines set of regs to dump for each sample
* state captured on:

View file

@ -327,6 +327,11 @@ static inline u64 perf_clock(void)
return local_clock();
}
static inline u64 perf_event_clock(struct perf_event *event)
{
return event->clock();
}
static inline struct perf_cpu_context *
__get_cpu_context(struct perf_event_context *ctx)
{
@ -4762,7 +4767,7 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
}
if (sample_type & PERF_SAMPLE_TIME)
data->time = perf_clock();
data->time = perf_event_clock(event);
if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
data->id = primary_event_id(event);
@ -5340,6 +5345,8 @@ static void perf_event_task_output(struct perf_event *event,
task_event->event_id.tid = perf_event_tid(event, task);
task_event->event_id.ptid = perf_event_tid(event, current);
task_event->event_id.time = perf_event_clock(event);
perf_output_put(&handle, task_event->event_id);
perf_event__output_id_sample(event, &handle, &sample);
@ -5373,7 +5380,7 @@ static void perf_event_task(struct task_struct *task,
/* .ppid */
/* .tid */
/* .ptid */
.time = perf_clock(),
/* .time */
},
};
@ -5749,7 +5756,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
.misc = 0,
.size = sizeof(throttle_event),
},
.time = perf_clock(),
.time = perf_event_clock(event),
.id = primary_event_id(event),
.stream_id = event->id,
};
@ -6293,6 +6300,8 @@ static int perf_swevent_init(struct perf_event *event)
static struct pmu perf_swevent = {
.task_ctx_nr = perf_sw_context,
.capabilities = PERF_PMU_CAP_NO_NMI,
.event_init = perf_swevent_init,
.add = perf_swevent_add,
.del = perf_swevent_del,
@ -6636,6 +6645,8 @@ static int cpu_clock_event_init(struct perf_event *event)
static struct pmu perf_cpu_clock = {
.task_ctx_nr = perf_sw_context,
.capabilities = PERF_PMU_CAP_NO_NMI,
.event_init = cpu_clock_event_init,
.add = cpu_clock_event_add,
.del = cpu_clock_event_del,
@ -6715,6 +6726,8 @@ static int task_clock_event_init(struct perf_event *event)
static struct pmu perf_task_clock = {
.task_ctx_nr = perf_sw_context,
.capabilities = PERF_PMU_CAP_NO_NMI,
.event_init = task_clock_event_init,
.add = task_clock_event_add,
.del = task_clock_event_del,
@ -7200,6 +7213,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
event->hw.target = task;
}
event->clock = &local_clock;
if (parent_event)
event->clock = parent_event->clock;
if (!overflow_handler && parent_event) {
overflow_handler = parent_event->overflow_handler;
context = parent_event->overflow_handler_context;
@ -7422,6 +7439,12 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
if (output_event->cpu == -1 && output_event->ctx != event->ctx)
goto out;
/*
* Mixing clocks in the same buffer is trouble you don't need.
*/
if (output_event->clock != event->clock)
goto out;
set:
mutex_lock(&event->mmap_mutex);
/* Can't redirect output if we've got an active mmap() */
@ -7454,6 +7477,43 @@ static void mutex_lock_double(struct mutex *a, struct mutex *b)
mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
}
static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
{
bool nmi_safe = false;
switch (clk_id) {
case CLOCK_MONOTONIC:
event->clock = &ktime_get_mono_fast_ns;
nmi_safe = true;
break;
case CLOCK_MONOTONIC_RAW:
event->clock = &ktime_get_raw_fast_ns;
nmi_safe = true;
break;
case CLOCK_REALTIME:
event->clock = &ktime_get_real_ns;
break;
case CLOCK_BOOTTIME:
event->clock = &ktime_get_boot_ns;
break;
case CLOCK_TAI:
event->clock = &ktime_get_tai_ns;
break;
default:
return -EINVAL;
}
if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI))
return -EINVAL;
return 0;
}
/**
* sys_perf_event_open - open a performance event, associate it to a task/cpu
*
@ -7569,6 +7629,12 @@ SYSCALL_DEFINE5(perf_event_open,
*/
pmu = event->pmu;
if (attr.use_clockid) {
err = perf_event_set_clock(event, attr.clockid);
if (err)
goto err_alloc;
}
if (group_leader &&
(is_software_event(event) != is_software_event(group_leader))) {
if (is_software_event(event)) {
@ -7618,6 +7684,11 @@ SYSCALL_DEFINE5(perf_event_open,
*/
if (group_leader->group_leader != group_leader)
goto err_context;
/* All events in a group should have the same clock */
if (group_leader->clock != event->clock)
goto err_context;
/*
* Do not allow to attach to a group in a different
* task or CPU context:

View file

@ -1609,9 +1609,11 @@ static void update_task_scan_period(struct task_struct *p,
/*
* If there were no record hinting faults then either the task is
* completely idle or all activity is areas that are not of interest
* to automatic numa balancing. Scan slower
* to automatic numa balancing. Related to that, if there were failed
* migration then it implies we are migrating too quickly or the local
* node is overloaded. In either case, scan slower
*/
if (local + shared == 0) {
if (local + shared == 0 || p->numa_faults_locality[2]) {
p->numa_scan_period = min(p->numa_scan_period_max,
p->numa_scan_period << 1);
@ -2080,6 +2082,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
if (migrated)
p->numa_pages_migrated += pages;
if (flags & TNF_MIGRATE_FAIL)
p->numa_faults_locality[2] += pages;
p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages;
p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages;

View file

@ -94,6 +94,57 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
}
EXPORT_SYMBOL_GPL(clockevent_delta2ns);
static int __clockevents_set_mode(struct clock_event_device *dev,
enum clock_event_mode mode)
{
/* Transition with legacy set_mode() callback */
if (dev->set_mode) {
/* Legacy callback doesn't support new modes */
if (mode > CLOCK_EVT_MODE_RESUME)
return -ENOSYS;
dev->set_mode(mode, dev);
return 0;
}
if (dev->features & CLOCK_EVT_FEAT_DUMMY)
return 0;
/* Transition with new mode-specific callbacks */
switch (mode) {
case CLOCK_EVT_MODE_UNUSED:
/*
* This is an internal state, which is guaranteed to go from
* SHUTDOWN to UNUSED. No driver interaction required.
*/
return 0;
case CLOCK_EVT_MODE_SHUTDOWN:
return dev->set_mode_shutdown(dev);
case CLOCK_EVT_MODE_PERIODIC:
/* Core internal bug */
if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
return -ENOSYS;
return dev->set_mode_periodic(dev);
case CLOCK_EVT_MODE_ONESHOT:
/* Core internal bug */
if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
return -ENOSYS;
return dev->set_mode_oneshot(dev);
case CLOCK_EVT_MODE_RESUME:
/* Optional callback */
if (dev->set_mode_resume)
return dev->set_mode_resume(dev);
else
return 0;
default:
return -ENOSYS;
}
}
/**
* clockevents_set_mode - set the operating mode of a clock event device
* @dev: device to modify
@ -105,7 +156,9 @@ void clockevents_set_mode(struct clock_event_device *dev,
enum clock_event_mode mode)
{
if (dev->mode != mode) {
dev->set_mode(mode, dev);
if (__clockevents_set_mode(dev, mode))
return;
dev->mode = mode;
/*
@ -373,6 +426,35 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
}
EXPORT_SYMBOL_GPL(clockevents_unbind);
/* Sanity check of mode transition callbacks */
static int clockevents_sanity_check(struct clock_event_device *dev)
{
/* Legacy set_mode() callback */
if (dev->set_mode) {
/* We shouldn't be supporting new modes now */
WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot ||
dev->set_mode_shutdown || dev->set_mode_resume);
return 0;
}
if (dev->features & CLOCK_EVT_FEAT_DUMMY)
return 0;
/* New mode-specific callbacks */
if (!dev->set_mode_shutdown)
return -EINVAL;
if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
!dev->set_mode_periodic)
return -EINVAL;
if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
!dev->set_mode_oneshot)
return -EINVAL;
return 0;
}
/**
* clockevents_register_device - register a clock event device
* @dev: device to register
@ -382,6 +464,8 @@ void clockevents_register_device(struct clock_event_device *dev)
unsigned long flags;
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
BUG_ON(clockevents_sanity_check(dev));
if (!dev->cpumask) {
WARN_ON(num_possible_cpus() > 1);
dev->cpumask = cpumask_of(smp_processor_id());
@ -449,7 +533,7 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
return clockevents_program_event(dev, dev->next_event, false);
if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);
return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
return 0;
}

View file

@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs)
schedule_work(&watchdog_work);
}
static void clocksource_unstable(struct clocksource *cs, int64_t delta)
{
printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
cs->name, delta);
__clocksource_unstable(cs);
}
/**
* clocksource_mark_unstable - mark clocksource unstable via watchdog
* @cs: clocksource to be marked unstable
@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
static void clocksource_watchdog(unsigned long data)
{
struct clocksource *cs;
cycle_t csnow, wdnow, delta;
cycle_t csnow, wdnow, cslast, wdlast, delta;
int64_t wd_nsec, cs_nsec;
int next_cpu, reset_pending;
@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data)
delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
wdlast = cs->wd_last; /* save these in case we print them */
cslast = cs->cs_last;
cs->cs_last = csnow;
cs->wd_last = wdnow;
@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data)
/* Check the deviation from the watchdog clocksource. */
if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
clocksource_unstable(cs, cs_nsec - wd_nsec);
pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);
pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
watchdog->name, wdnow, wdlast, watchdog->mask);
pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
cs->name, csnow, cslast, cs->mask);
__clocksource_unstable(cs);
continue;
}
@ -469,26 +469,22 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
* @shift: cycle to nanosecond divisor (power of two)
* @maxadj: maximum adjustment value to mult (~11%)
* @mask: bitmask for two's complement subtraction of non 64 bit counters
* @max_cyc: maximum cycle value before potential overflow (does not include
* any safety margin)
*
* NOTE: This function includes a safety margin of 50%, so that bad clock values
* can be detected.
*/
u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
{
u64 max_nsecs, max_cycles;
/*
* Calculate the maximum number of cycles that we can pass to the
* cyc2ns function without overflowing a 64-bit signed result. The
* maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
* which is equivalent to the below.
* max_cycles < (2^63)/(mult + maxadj)
* max_cycles < 2^(log2((2^63)/(mult + maxadj)))
* max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
* max_cycles < 2^(63 - log2(mult + maxadj))
* max_cycles < 1 << (63 - log2(mult + maxadj))
* Please note that we add 1 to the result of the log2 to account for
* any rounding errors, ensure the above inequality is satisfied and
* no overflow will occur.
* cyc2ns() function without overflowing a 64-bit result.
*/
max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));
max_cycles = ULLONG_MAX;
do_div(max_cycles, mult+maxadj);
/*
* The actual maximum number of cycles we can defer the clocksource is
@ -499,27 +495,26 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
max_cycles = min(max_cycles, mask);
max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
/* return the max_cycles value as well if requested */
if (max_cyc)
*max_cyc = max_cycles;
/* Return 50% of the actual maximum, so we can detect bad values */
max_nsecs >>= 1;
return max_nsecs;
}
/**
* clocksource_max_deferment - Returns max time the clocksource can be deferred
* @cs: Pointer to clocksource
* clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
* @cs: Pointer to clocksource to be updated
*
*/
static u64 clocksource_max_deferment(struct clocksource *cs)
static inline void clocksource_update_max_deferment(struct clocksource *cs)
{
u64 max_nsecs;
max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
cs->mask);
/*
* To ensure that the clocksource does not wrap whilst we are idle,
* limit the time the clocksource can be deferred by 12.5%. Please
* note a margin of 12.5% is used because this can be computed with
* a shift, versus say 10% which would require division.
*/
return max_nsecs - (max_nsecs >> 3);
cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
cs->maxadj, cs->mask,
&cs->max_cycles);
}
#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
@ -648,7 +643,7 @@ static void clocksource_enqueue(struct clocksource *cs)
}
/**
* __clocksource_updatefreq_scale - Used update clocksource with new freq
* __clocksource_update_freq_scale - Used update clocksource with new freq
* @cs: clocksource to be registered
* @scale: Scale factor multiplied against freq to get clocksource hz
* @freq: clocksource frequency (cycles per second) divided by scale
@ -656,48 +651,64 @@ static void clocksource_enqueue(struct clocksource *cs)
* This should only be called from the clocksource->enable() method.
*
* This *SHOULD NOT* be called directly! Please use the
* clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
* __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
* functions.
*/
void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
{
u64 sec;
/*
* Calc the maximum number of seconds which we can run before
* wrapping around. For clocksources which have a mask > 32bit
* we need to limit the max sleep time to have a good
* conversion precision. 10 minutes is still a reasonable
* amount. That results in a shift value of 24 for a
* clocksource with mask >= 40bit and f >= 4GHz. That maps to
* ~ 0.06ppm granularity for NTP. We apply the same 12.5%
* margin as we do in clocksource_max_deferment()
* Default clocksources are *special* and self-define their mult/shift.
* But, you're not special, so you should specify a freq value.
*/
sec = (cs->mask - (cs->mask >> 3));
do_div(sec, freq);
do_div(sec, scale);
if (!sec)
sec = 1;
else if (sec > 600 && cs->mask > UINT_MAX)
sec = 600;
clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
NSEC_PER_SEC / scale, sec * scale);
if (freq) {
/*
* Calc the maximum number of seconds which we can run before
* wrapping around. For clocksources which have a mask > 32-bit
* we need to limit the max sleep time to have a good
* conversion precision. 10 minutes is still a reasonable
* amount. That results in a shift value of 24 for a
* clocksource with mask >= 40-bit and f >= 4GHz. That maps to
* ~ 0.06ppm granularity for NTP.
*/
sec = cs->mask;
do_div(sec, freq);
do_div(sec, scale);
if (!sec)
sec = 1;
else if (sec > 600 && cs->mask > UINT_MAX)
sec = 600;
clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
NSEC_PER_SEC / scale, sec * scale);
}
/*
* for clocksources that have large mults, to avoid overflow.
* Since mult may be adjusted by ntp, add an safety extra margin
*
* Ensure clocksources that have large 'mult' values don't overflow
* when adjusted.
*/
cs->maxadj = clocksource_max_adjustment(cs);
while ((cs->mult + cs->maxadj < cs->mult)
|| (cs->mult - cs->maxadj > cs->mult)) {
while (freq && ((cs->mult + cs->maxadj < cs->mult)
|| (cs->mult - cs->maxadj > cs->mult))) {
cs->mult >>= 1;
cs->shift--;
cs->maxadj = clocksource_max_adjustment(cs);
}
cs->max_idle_ns = clocksource_max_deferment(cs);
/*
* Only warn for *special* clocksources that self-define
* their mult/shift values and don't specify a freq.
*/
WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
"timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
cs->name);
clocksource_update_max_deferment(cs);
pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
}
EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
/**
* __clocksource_register_scale - Used to install new clocksources
@ -714,7 +725,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
{
/* Initialize mult/shift and max_idle_ns */
__clocksource_updatefreq_scale(cs, scale, freq);
__clocksource_update_freq_scale(cs, scale, freq);
/* Add clocksource to the clocksource list */
mutex_lock(&clocksource_mutex);
@ -726,33 +737,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
}
EXPORT_SYMBOL_GPL(__clocksource_register_scale);
/**
* clocksource_register - Used to install new clocksources
* @cs: clocksource to be registered
*
* Returns -EBUSY if registration fails, zero otherwise.
*/
int clocksource_register(struct clocksource *cs)
{
/* calculate max adjustment for given mult/shift */
cs->maxadj = clocksource_max_adjustment(cs);
WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
"Clocksource %s might overflow on 11%% adjustment\n",
cs->name);
/* calculate max idle time permitted for this clocksource */
cs->max_idle_ns = clocksource_max_deferment(cs);
mutex_lock(&clocksource_mutex);
clocksource_enqueue(cs);
clocksource_enqueue_watchdog(cs);
clocksource_select();
mutex_unlock(&clocksource_mutex);
return 0;
}
EXPORT_SYMBOL(clocksource_register);
static void __clocksource_change_rating(struct clocksource *cs, int rating)
{
list_del(&cs->list);

View file

@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = {
.mask = 0xffffffff, /*32bits*/
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
.shift = JIFFIES_SHIFT,
.max_cycles = 10,
};
__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
@ -94,7 +95,7 @@ EXPORT_SYMBOL(jiffies);
static int __init init_jiffies_clocksource(void)
{
return clocksource_register(&clocksource_jiffies);
return __clocksource_register(&clocksource_jiffies);
}
core_initcall(init_jiffies_clocksource);
@ -130,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second)
refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
clocksource_register(&refined_jiffies);
__clocksource_register(&refined_jiffies);
return 0;
}

View file

@ -1,5 +1,6 @@
/*
* sched_clock.c: support for extending counters to full 64-bit ns counter
* sched_clock.c: Generic sched_clock() support, to extend low level
* hardware time counters to full 64-bit ns values.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@ -18,15 +19,53 @@
#include <linux/seqlock.h>
#include <linux/bitops.h>
struct clock_data {
ktime_t wrap_kt;
/**
* struct clock_read_data - data required to read from sched_clock()
*
* @epoch_ns: sched_clock() value at last update
* @epoch_cyc: Clock cycle value at last update.
* @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit
* clocks.
* @read_sched_clock: Current clock source (or dummy source when suspended).
* @mult: Multipler for scaled math conversion.
* @shift: Shift value for scaled math conversion.
*
* Care must be taken when updating this structure; it is read by
* some very hot code paths. It occupies <=40 bytes and, when combined
* with the seqcount used to synchronize access, comfortably fits into
* a 64 byte cache line.
*/
struct clock_read_data {
u64 epoch_ns;
u64 epoch_cyc;
seqcount_t seq;
unsigned long rate;
u64 sched_clock_mask;
u64 (*read_sched_clock)(void);
u32 mult;
u32 shift;
bool suspended;
};
/**
* struct clock_data - all data needed for sched_clock() (including
* registration of a new clock source)
*
* @seq: Sequence counter for protecting updates. The lowest
* bit is the index for @read_data.
* @read_data: Data required to read from sched_clock.
* @wrap_kt: Duration for which clock can run before wrapping.
* @rate: Tick rate of the registered clock.
* @actual_read_sched_clock: Registered hardware level clock read function.
*
* The ordering of this structure has been chosen to optimize cache
* performance. In particular 'seq' and 'read_data[0]' (combined) should fit
* into a single 64-byte cache line.
*/
struct clock_data {
seqcount_t seq;
struct clock_read_data read_data[2];
ktime_t wrap_kt;
unsigned long rate;
u64 (*actual_read_sched_clock)(void);
};
static struct hrtimer sched_clock_timer;
@ -34,12 +73,6 @@ static int irqtime = -1;
core_param(irqtime, irqtime, int, 0400);
static struct clock_data cd = {
.mult = NSEC_PER_SEC / HZ,
};
static u64 __read_mostly sched_clock_mask;
static u64 notrace jiffy_sched_clock_read(void)
{
/*
@ -49,7 +82,11 @@ static u64 notrace jiffy_sched_clock_read(void)
return (u64)(jiffies - INITIAL_JIFFIES);
}
static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
static struct clock_data cd ____cacheline_aligned = {
.read_data[0] = { .mult = NSEC_PER_SEC / HZ,
.read_sched_clock = jiffy_sched_clock_read, },
.actual_read_sched_clock = jiffy_sched_clock_read,
};
static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
{
@ -58,111 +95,136 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
unsigned long long notrace sched_clock(void)
{
u64 epoch_ns;
u64 epoch_cyc;
u64 cyc;
u64 cyc, res;
unsigned long seq;
if (cd.suspended)
return cd.epoch_ns;
struct clock_read_data *rd;
do {
seq = raw_read_seqcount_begin(&cd.seq);
epoch_cyc = cd.epoch_cyc;
epoch_ns = cd.epoch_ns;
seq = raw_read_seqcount(&cd.seq);
rd = cd.read_data + (seq & 1);
cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
rd->sched_clock_mask;
res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
} while (read_seqcount_retry(&cd.seq, seq));
cyc = read_sched_clock();
cyc = (cyc - epoch_cyc) & sched_clock_mask;
return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
return res;
}
/*
* Atomically update the sched_clock epoch.
* Updating the data required to read the clock.
*
* sched_clock() will never observe mis-matched data even if called from
* an NMI. We do this by maintaining an odd/even copy of the data and
* steering sched_clock() to one or the other using a sequence counter.
* In order to preserve the data cache profile of sched_clock() as much
* as possible the system reverts back to the even copy when the update
* completes; the odd copy is used *only* during an update.
*/
static void notrace update_sched_clock(void)
static void update_clock_read_data(struct clock_read_data *rd)
{
/* update the backup (odd) copy with the new data */
cd.read_data[1] = *rd;
/* steer readers towards the odd copy */
raw_write_seqcount_latch(&cd.seq);
/* now its safe for us to update the normal (even) copy */
cd.read_data[0] = *rd;
/* switch readers back to the even copy */
raw_write_seqcount_latch(&cd.seq);
}
/*
* Atomically update the sched_clock() epoch.
*/
static void update_sched_clock(void)
{
unsigned long flags;
u64 cyc;
u64 ns;
struct clock_read_data rd;
cyc = read_sched_clock();
ns = cd.epoch_ns +
cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
cd.mult, cd.shift);
rd = cd.read_data[0];
raw_local_irq_save(flags);
raw_write_seqcount_begin(&cd.seq);
cd.epoch_ns = ns;
cd.epoch_cyc = cyc;
raw_write_seqcount_end(&cd.seq);
raw_local_irq_restore(flags);
cyc = cd.actual_read_sched_clock();
ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
rd.epoch_ns = ns;
rd.epoch_cyc = cyc;
update_clock_read_data(&rd);
}
static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
{
update_sched_clock();
hrtimer_forward_now(hrt, cd.wrap_kt);
return HRTIMER_RESTART;
}
void __init sched_clock_register(u64 (*read)(void), int bits,
unsigned long rate)
void __init
sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
{
u64 res, wrap, new_mask, new_epoch, cyc, ns;
u32 new_mult, new_shift;
ktime_t new_wrap_kt;
unsigned long r;
char r_unit;
struct clock_read_data rd;
if (cd.rate > rate)
return;
WARN_ON(!irqs_disabled());
/* calculate the mult/shift to convert counter ticks to ns. */
/* Calculate the mult/shift to convert counter ticks to ns. */
clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
new_mask = CLOCKSOURCE_MASK(bits);
/* calculate how many ns until we wrap */
wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
/* update epoch for new counter and update epoch_ns from old counter*/
new_epoch = read();
cyc = read_sched_clock();
ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
cd.mult, cd.shift);
raw_write_seqcount_begin(&cd.seq);
read_sched_clock = read;
sched_clock_mask = new_mask;
cd.rate = rate;
cd.wrap_kt = new_wrap_kt;
cd.mult = new_mult;
cd.shift = new_shift;
cd.epoch_cyc = new_epoch;
cd.epoch_ns = ns;
raw_write_seqcount_end(&cd.seq);
/* Calculate how many nanosecs until we risk wrapping */
wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
cd.wrap_kt = ns_to_ktime(wrap);
rd = cd.read_data[0];
/* Update epoch for new counter and update 'epoch_ns' from old counter*/
new_epoch = read();
cyc = cd.actual_read_sched_clock();
ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
cd.actual_read_sched_clock = read;
rd.read_sched_clock = read;
rd.sched_clock_mask = new_mask;
rd.mult = new_mult;
rd.shift = new_shift;
rd.epoch_cyc = new_epoch;
rd.epoch_ns = ns;
update_clock_read_data(&rd);
r = rate;
if (r >= 4000000) {
r /= 1000000;
r_unit = 'M';
} else if (r >= 1000) {
r /= 1000;
r_unit = 'k';
} else
r_unit = ' ';
} else {
if (r >= 1000) {
r /= 1000;
r_unit = 'k';
} else {
r_unit = ' ';
}
}
/* calculate the ns resolution of this counter */
/* Calculate the ns resolution of this counter */
res = cyc_to_ns(1ULL, new_mult, new_shift);
pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
bits, r, r_unit, res, wrap);
/* Enable IRQ time accounting if we have a fast enough sched_clock */
/* Enable IRQ time accounting if we have a fast enough sched_clock() */
if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
enable_sched_clock_irqtime();
@ -172,10 +234,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
void __init sched_clock_postinit(void)
{
/*
* If no sched_clock function has been provided at that point,
* If no sched_clock() function has been provided at that point,
* make it the final one one.
*/
if (read_sched_clock == jiffy_sched_clock_read)
if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
update_sched_clock();
@ -189,29 +251,53 @@ void __init sched_clock_postinit(void)
hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
}
/*
* Clock read function for use when the clock is suspended.
*
* This function makes it appear to sched_clock() as if the clock
* stopped counting at its last update.
*
* This function must only be called from the critical
* section in sched_clock(). It relies on the read_seqcount_retry()
* at the end of the critical section to be sure we observe the
* correct copy of 'epoch_cyc'.
*/
static u64 notrace suspended_sched_clock_read(void)
{
unsigned long seq = raw_read_seqcount(&cd.seq);
return cd.read_data[seq & 1].epoch_cyc;
}
static int sched_clock_suspend(void)
{
struct clock_read_data *rd = &cd.read_data[0];
update_sched_clock();
hrtimer_cancel(&sched_clock_timer);
cd.suspended = true;
rd->read_sched_clock = suspended_sched_clock_read;
return 0;
}
static void sched_clock_resume(void)
{
cd.epoch_cyc = read_sched_clock();
struct clock_read_data *rd = &cd.read_data[0];
rd->epoch_cyc = cd.actual_read_sched_clock();
hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
cd.suspended = false;
rd->read_sched_clock = cd.actual_read_sched_clock;
}
static struct syscore_ops sched_clock_ops = {
.suspend = sched_clock_suspend,
.resume = sched_clock_resume,
.suspend = sched_clock_suspend,
.resume = sched_clock_resume,
};
static int __init sched_clock_syscore_init(void)
{
register_syscore_ops(&sched_clock_ops);
return 0;
}
device_initcall(sched_clock_syscore_init);

View file

@ -59,6 +59,7 @@ struct tk_fast {
};
static struct tk_fast tk_fast_mono ____cacheline_aligned;
static struct tk_fast tk_fast_raw ____cacheline_aligned;
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;
@ -68,8 +69,8 @@ bool __read_mostly persistent_clock_exist = false;
static inline void tk_normalize_xtime(struct timekeeper *tk)
{
while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
tk->xtime_sec++;
}
}
@ -79,20 +80,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
struct timespec64 ts;
ts.tv_sec = tk->xtime_sec;
ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
return ts;
}
static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
{
tk->xtime_sec = ts->tv_sec;
tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
}
static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
{
tk->xtime_sec += ts->tv_sec;
tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
tk_normalize_xtime(tk);
}
@ -118,6 +119,117 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
tk->offs_boot = ktime_add(tk->offs_boot, delta);
}
#ifdef CONFIG_DEBUG_TIMEKEEPING
#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
/*
* These simple flag variables are managed
* without locks, which is racy, but ok since
* we don't really care about being super
* precise about how many events were seen,
* just that a problem was observed.
*/
static int timekeeping_underflow_seen;
static int timekeeping_overflow_seen;
/* last_warning is only modified under the timekeeping lock */
static long timekeeping_last_warning;
static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
{
cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
const char *name = tk->tkr_mono.clock->name;
if (offset > max_cycles) {
printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
offset, name, max_cycles);
printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
} else {
if (offset > (max_cycles >> 1)) {
printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
offset, name, max_cycles >> 1);
printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
}
}
if (timekeeping_underflow_seen) {
if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
printk_deferred(" Your kernel is probably still fine.\n");
timekeeping_last_warning = jiffies;
}
timekeeping_underflow_seen = 0;
}
if (timekeeping_overflow_seen) {
if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
printk_deferred(" Your kernel is probably still fine.\n");
timekeeping_last_warning = jiffies;
}
timekeeping_overflow_seen = 0;
}
}
static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
{
cycle_t now, last, mask, max, delta;
unsigned int seq;
/*
* Since we're called holding a seqlock, the data may shift
* under us while we're doing the calculation. This can cause
* false positives, since we'd note a problem but throw the
* results away. So nest another seqlock here to atomically
* grab the points we are checking with.
*/
do {
seq = read_seqcount_begin(&tk_core.seq);
now = tkr->read(tkr->clock);
last = tkr->cycle_last;
mask = tkr->mask;
max = tkr->clock->max_cycles;
} while (read_seqcount_retry(&tk_core.seq, seq));
delta = clocksource_delta(now, last, mask);
/*
* Try to catch underflows by checking if we are seeing small
* mask-relative negative values.
*/
if (unlikely((~delta & mask) < (mask >> 3))) {
timekeeping_underflow_seen = 1;
delta = 0;
}
/* Cap delta value to the max_cycles values to avoid mult overflows */
if (unlikely(delta > max)) {
timekeeping_overflow_seen = 1;
delta = tkr->clock->max_cycles;
}
return delta;
}
#else
static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
{
}
static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
{
cycle_t cycle_now, delta;
/* read clocksource */
cycle_now = tkr->read(tkr->clock);
/* calculate the delta since the last update_wall_time */
delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
return delta;
}
#endif
/**
* tk_setup_internals - Set up internals to use clocksource clock.
*
@ -135,11 +247,16 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
u64 tmp, ntpinterval;
struct clocksource *old_clock;
old_clock = tk->tkr.clock;
tk->tkr.clock = clock;
tk->tkr.read = clock->read;
tk->tkr.mask = clock->mask;
tk->tkr.cycle_last = tk->tkr.read(clock);
old_clock = tk->tkr_mono.clock;
tk->tkr_mono.clock = clock;
tk->tkr_mono.read = clock->read;
tk->tkr_mono.mask = clock->mask;
tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
tk->tkr_raw.clock = clock;
tk->tkr_raw.read = clock->read;
tk->tkr_raw.mask = clock->mask;
tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
/* Do the ns -> cycle conversion first, using original mult */
tmp = NTP_INTERVAL_LENGTH;
@ -163,11 +280,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
if (old_clock) {
int shift_change = clock->shift - old_clock->shift;
if (shift_change < 0)
tk->tkr.xtime_nsec >>= -shift_change;
tk->tkr_mono.xtime_nsec >>= -shift_change;
else
tk->tkr.xtime_nsec <<= shift_change;
tk->tkr_mono.xtime_nsec <<= shift_change;
}
tk->tkr.shift = clock->shift;
tk->tkr_raw.xtime_nsec = 0;
tk->tkr_mono.shift = clock->shift;
tk->tkr_raw.shift = clock->shift;
tk->ntp_error = 0;
tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
@ -178,7 +298,8 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
* active clocksource. These value will be adjusted via NTP
* to counteract clock drifting.
*/
tk->tkr.mult = clock->mult;
tk->tkr_mono.mult = clock->mult;
tk->tkr_raw.mult = clock->mult;
tk->ntp_err_mult = 0;
}
@ -193,14 +314,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
{
cycle_t cycle_now, delta;
cycle_t delta;
s64 nsec;
/* read clocksource: */
cycle_now = tkr->read(tkr->clock);
/* calculate the delta since the last update_wall_time: */
delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
delta = timekeeping_get_delta(tkr);
nsec = delta * tkr->mult + tkr->xtime_nsec;
nsec >>= tkr->shift;
@ -209,25 +326,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
return nsec + arch_gettimeoffset();
}
static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
{
struct clocksource *clock = tk->tkr.clock;
cycle_t cycle_now, delta;
s64 nsec;
/* read clocksource: */
cycle_now = tk->tkr.read(clock);
/* calculate the delta since the last update_wall_time: */
delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
/* convert delta to nanoseconds. */
nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
/* If arch requires, add in get_arch_timeoffset() */
return nsec + arch_gettimeoffset();
}
/**
* update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
* @tkr: Timekeeping readout base from which we take the update
@ -267,18 +365,18 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
* slightly wrong timestamp (a few nanoseconds). See
* @ktime_get_mono_fast_ns.
*/
static void update_fast_timekeeper(struct tk_read_base *tkr)
static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
{
struct tk_read_base *base = tk_fast_mono.base;
struct tk_read_base *base = tkf->base;
/* Force readers off to base[1] */
raw_write_seqcount_latch(&tk_fast_mono.seq);
raw_write_seqcount_latch(&tkf->seq);
/* Update base[0] */
memcpy(base, tkr, sizeof(*base));
/* Force readers back to base[0] */
raw_write_seqcount_latch(&tk_fast_mono.seq);
raw_write_seqcount_latch(&tkf->seq);
/* Update base[1] */
memcpy(base + 1, base, sizeof(*base));
@ -316,22 +414,33 @@ static void update_fast_timekeeper(struct tk_read_base *tkr)
* of the following timestamps. Callers need to be aware of that and
* deal with it.
*/
u64 notrace ktime_get_mono_fast_ns(void)
static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
{
struct tk_read_base *tkr;
unsigned int seq;
u64 now;
do {
seq = raw_read_seqcount(&tk_fast_mono.seq);
tkr = tk_fast_mono.base + (seq & 0x01);
now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
seq = raw_read_seqcount(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
} while (read_seqcount_retry(&tkf->seq, seq));
} while (read_seqcount_retry(&tk_fast_mono.seq, seq));
return now;
}
u64 ktime_get_mono_fast_ns(void)
{
return __ktime_get_fast_ns(&tk_fast_mono);
}
EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
u64 ktime_get_raw_fast_ns(void)
{
return __ktime_get_fast_ns(&tk_fast_raw);
}
EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
/* Suspend-time cycles value for halted fast timekeeper. */
static cycle_t cycles_at_suspend;
@ -353,12 +462,17 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
static void halt_fast_timekeeper(struct timekeeper *tk)
{
static struct tk_read_base tkr_dummy;
struct tk_read_base *tkr = &tk->tkr;
struct tk_read_base *tkr = &tk->tkr_mono;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
cycles_at_suspend = tkr->read(tkr->clock);
tkr_dummy.read = dummy_clock_read;
update_fast_timekeeper(&tkr_dummy);
update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
tkr = &tk->tkr_raw;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
tkr_dummy.read = dummy_clock_read;
update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
}
#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
@ -369,8 +483,8 @@ static inline void update_vsyscall(struct timekeeper *tk)
xt = timespec64_to_timespec(tk_xtime(tk));
wm = timespec64_to_timespec(tk->wall_to_monotonic);
update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult,
tk->tkr.cycle_last);
update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
tk->tkr_mono.cycle_last);
}
static inline void old_vsyscall_fixup(struct timekeeper *tk)
@ -387,11 +501,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
* (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
* users are removed, this can be killed.
*/
remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
tk->tkr.xtime_nsec -= remainder;
tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
tk->tkr_mono.xtime_nsec -= remainder;
tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
tk->ntp_error += remainder << tk->ntp_error_shift;
tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
}
#else
#define old_vsyscall_fixup(tk)
@ -456,17 +570,17 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
*/
seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
nsec = (u32) tk->wall_to_monotonic.tv_nsec;
tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
/* Update the monotonic raw base */
tk->base_raw = timespec64_to_ktime(tk->raw_time);
tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
/*
* The sum of the nanoseconds portions of xtime and
* wall_to_monotonic can be greater/equal one second. Take
* this into account before updating tk->ktime_sec.
*/
nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift);
nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
if (nsec >= NSEC_PER_SEC)
seconds++;
tk->ktime_sec = seconds;
@ -489,7 +603,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
memcpy(&shadow_timekeeper, &tk_core.timekeeper,
sizeof(tk_core.timekeeper));
update_fast_timekeeper(&tk->tkr);
update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
}
/**
@ -501,22 +616,23 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
*/
static void timekeeping_forward_now(struct timekeeper *tk)
{
struct clocksource *clock = tk->tkr.clock;
struct clocksource *clock = tk->tkr_mono.clock;
cycle_t cycle_now, delta;
s64 nsec;
cycle_now = tk->tkr.read(clock);
delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
tk->tkr.cycle_last = cycle_now;
cycle_now = tk->tkr_mono.read(clock);
delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
tk->tkr.xtime_nsec += delta * tk->tkr.mult;
tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
/* If arch requires, add in get_arch_timeoffset() */
tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
tk_normalize_xtime(tk);
nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
timespec64_add_ns(&tk->raw_time, nsec);
}
@ -537,7 +653,7 @@ int __getnstimeofday64(struct timespec64 *ts)
seq = read_seqcount_begin(&tk_core.seq);
ts->tv_sec = tk->xtime_sec;
nsecs = timekeeping_get_ns(&tk->tkr);
nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
@ -577,8 +693,8 @@ ktime_t ktime_get(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr.base_mono;
nsecs = timekeeping_get_ns(&tk->tkr);
base = tk->tkr_mono.base;
nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
@ -603,8 +719,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
do {
seq = read_seqcount_begin(&tk_core.seq);
base = ktime_add(tk->tkr.base_mono, *offset);
nsecs = timekeeping_get_ns(&tk->tkr);
base = ktime_add(tk->tkr_mono.base, *offset);
nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
@ -645,8 +761,8 @@ ktime_t ktime_get_raw(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->base_raw;
nsecs = timekeeping_get_ns_raw(tk);
base = tk->tkr_raw.base;
nsecs = timekeeping_get_ns(&tk->tkr_raw);
} while (read_seqcount_retry(&tk_core.seq, seq));
@ -674,7 +790,7 @@ void ktime_get_ts64(struct timespec64 *ts)
do {
seq = read_seqcount_begin(&tk_core.seq);
ts->tv_sec = tk->xtime_sec;
nsec = timekeeping_get_ns(&tk->tkr);
nsec = timekeeping_get_ns(&tk->tkr_mono);
tomono = tk->wall_to_monotonic;
} while (read_seqcount_retry(&tk_core.seq, seq));
@ -759,8 +875,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
ts_real->tv_sec = tk->xtime_sec;
ts_real->tv_nsec = 0;
nsecs_raw = timekeeping_get_ns_raw(tk);
nsecs_real = timekeeping_get_ns(&tk->tkr);
nsecs_raw = timekeeping_get_ns(&tk->tkr_raw);
nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
@ -943,7 +1059,7 @@ static int change_clocksource(void *data)
*/
if (try_module_get(new->owner)) {
if (!new->enable || new->enable(new) == 0) {
old = tk->tkr.clock;
old = tk->tkr_mono.clock;
tk_setup_internals(tk, new);
if (old->disable)
old->disable(old);
@ -971,11 +1087,11 @@ int timekeeping_notify(struct clocksource *clock)
{
struct timekeeper *tk = &tk_core.timekeeper;
if (tk->tkr.clock == clock)
if (tk->tkr_mono.clock == clock)
return 0;
stop_machine(change_clocksource, clock, NULL);
tick_clock_notify();
return tk->tkr.clock == clock ? 0 : -1;
return tk->tkr_mono.clock == clock ? 0 : -1;
}
/**
@ -993,7 +1109,7 @@ void getrawmonotonic64(struct timespec64 *ts)
do {
seq = read_seqcount_begin(&tk_core.seq);
nsecs = timekeeping_get_ns_raw(tk);
nsecs = timekeeping_get_ns(&tk->tkr_raw);
ts64 = tk->raw_time;
} while (read_seqcount_retry(&tk_core.seq, seq));
@ -1016,7 +1132,7 @@ int timekeeping_valid_for_hres(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
} while (read_seqcount_retry(&tk_core.seq, seq));
@ -1035,7 +1151,7 @@ u64 timekeeping_max_deferment(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
ret = tk->tkr.clock->max_idle_ns;
ret = tk->tkr_mono.clock->max_idle_ns;
} while (read_seqcount_retry(&tk_core.seq, seq));
@ -1114,7 +1230,6 @@ void __init timekeeping_init(void)
tk_set_xtime(tk, &now);
tk->raw_time.tv_sec = 0;
tk->raw_time.tv_nsec = 0;
tk->base_raw.tv64 = 0;
if (boot.tv_sec == 0 && boot.tv_nsec == 0)
boot = tk_xtime(tk);
@ -1200,7 +1315,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
void timekeeping_resume(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
struct clocksource *clock = tk->tkr.clock;
struct clocksource *clock = tk->tkr_mono.clock;
unsigned long flags;
struct timespec64 ts_new, ts_delta;
struct timespec tmp;
@ -1228,16 +1343,16 @@ void timekeeping_resume(void)
* The less preferred source will only be tried if there is no better
* usable source. The rtc part is handled separately in rtc core code.
*/
cycle_now = tk->tkr.read(clock);
cycle_now = tk->tkr_mono.read(clock);
if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
cycle_now > tk->tkr.cycle_last) {
cycle_now > tk->tkr_mono.cycle_last) {
u64 num, max = ULLONG_MAX;
u32 mult = clock->mult;
u32 shift = clock->shift;
s64 nsec = 0;
cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
tk->tkr.mask);
cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
tk->tkr_mono.mask);
/*
* "cycle_delta * mutl" may cause 64 bits overflow, if the
@ -1263,7 +1378,9 @@ void timekeeping_resume(void)
__timekeeping_inject_sleeptime(tk, &ts_delta);
/* Re-base the last cycle value */
tk->tkr.cycle_last = cycle_now;
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
tk->ntp_error = 0;
timekeeping_suspended = 0;
timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@ -1416,15 +1533,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
*
* XXX - TODO: Doc ntp_error calculation.
*/
if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) {
if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
/* NTP adjustment caused clocksource mult overflow */
WARN_ON_ONCE(1);
return;
}
tk->tkr.mult += mult_adj;
tk->tkr_mono.mult += mult_adj;
tk->xtime_interval += interval;
tk->tkr.xtime_nsec -= offset;
tk->tkr_mono.xtime_nsec -= offset;
tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
}
@ -1486,13 +1603,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
tk->ntp_err_mult = 0;
}
if (unlikely(tk->tkr.clock->maxadj &&
(abs(tk->tkr.mult - tk->tkr.clock->mult)
> tk->tkr.clock->maxadj))) {
if (unlikely(tk->tkr_mono.clock->maxadj &&
(abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
> tk->tkr_mono.clock->maxadj))) {
printk_once(KERN_WARNING
"Adjusting %s more than 11%% (%ld vs %ld)\n",
tk->tkr.clock->name, (long)tk->tkr.mult,
(long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
(long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
}
/*
@ -1509,9 +1626,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
* We'll correct this error next time through this function, when
* xtime_nsec is not as small.
*/
if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
s64 neg = -(s64)tk->tkr.xtime_nsec;
tk->tkr.xtime_nsec = 0;
if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
tk->tkr_mono.xtime_nsec = 0;
tk->ntp_error += neg << tk->ntp_error_shift;
}
}
@ -1526,13 +1643,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
*/
static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
{
u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
unsigned int clock_set = 0;
while (tk->tkr.xtime_nsec >= nsecps) {
while (tk->tkr_mono.xtime_nsec >= nsecps) {
int leap;
tk->tkr.xtime_nsec -= nsecps;
tk->tkr_mono.xtime_nsec -= nsecps;
tk->xtime_sec++;
/* Figure out if its a leap sec and apply if needed */
@ -1577,9 +1694,10 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
/* Accumulate one shifted interval */
offset -= interval;
tk->tkr.cycle_last += interval;
tk->tkr_mono.cycle_last += interval;
tk->tkr_raw.cycle_last += interval;
tk->tkr.xtime_nsec += tk->xtime_interval << shift;
tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
*clock_set |= accumulate_nsecs_to_secs(tk);
/* Accumulate raw time */
@ -1622,14 +1740,17 @@ void update_wall_time(void)
#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
offset = real_tk->cycle_interval;
#else
offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
tk->tkr.cycle_last, tk->tkr.mask);
offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
#endif
/* Check if there's really nothing to do */
if (offset < real_tk->cycle_interval)
goto out;
/* Do some additional sanity checking */
timekeeping_check_update(real_tk, offset);
/*
* With NO_HZ we may have to accumulate many cycle_intervals
* (think "ticks") worth of time at once. To do this efficiently,
@ -1784,8 +1905,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr.base_mono;
nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
base = tk->tkr_mono.base;
nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
*offs_real = tk->offs_real;
*offs_boot = tk->offs_boot;
@ -1816,8 +1937,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr.base_mono;
nsecs = timekeeping_get_ns(&tk->tkr);
base = tk->tkr_mono.base;
nsecs = timekeeping_get_ns(&tk->tkr_mono);
*offs_real = tk->offs_real;
*offs_boot = tk->offs_boot;

View file

@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
print_name_offset(m, dev->set_next_event);
SEQ_printf(m, "\n");
SEQ_printf(m, " set_mode: ");
print_name_offset(m, dev->set_mode);
SEQ_printf(m, "\n");
if (dev->set_mode) {
SEQ_printf(m, " set_mode: ");
print_name_offset(m, dev->set_mode);
SEQ_printf(m, "\n");
} else {
if (dev->set_mode_shutdown) {
SEQ_printf(m, " shutdown: ");
print_name_offset(m, dev->set_mode_shutdown);
SEQ_printf(m, "\n");
}
if (dev->set_mode_periodic) {
SEQ_printf(m, " periodic: ");
print_name_offset(m, dev->set_mode_periodic);
SEQ_printf(m, "\n");
}
if (dev->set_mode_oneshot) {
SEQ_printf(m, " oneshot: ");
print_name_offset(m, dev->set_mode_oneshot);
SEQ_printf(m, "\n");
}
if (dev->set_mode_resume) {
SEQ_printf(m, " resume: ");
print_name_offset(m, dev->set_mode_resume);
SEQ_printf(m, "\n");
}
}
SEQ_printf(m, " event_handler: ");
print_name_offset(m, dev->event_handler);

View file

@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK
data corruption or a sporadic crash at a later stage once the region
is examined. The runtime overhead introduced is minimal.
config DEBUG_TIMEKEEPING
bool "Enable extra timekeeping sanity checking"
help
This option will enable additional timekeeping sanity checks
which may be helpful when diagnosing issues where timekeeping
problems are suspected.
This may include checks in the timekeeping hotpaths, so this
option may have a (very small) performance impact to some
workloads.
If unsure, say N.
config TIMER_STATS
bool "Collect kernel timers statistics"
depends on DEBUG_KERNEL && PROC_FS

View file

@ -1260,6 +1260,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
int target_nid, last_cpupid = -1;
bool page_locked;
bool migrated = false;
bool was_writable;
int flags = 0;
/* A PROT_NONE fault should not end up here */
@ -1291,17 +1292,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
flags |= TNF_FAULT_LOCAL;
}
/*
* Avoid grouping on DSO/COW pages in specific and RO pages
* in general, RO pages shouldn't hurt as much anyway since
* they can be in shared cache state.
*
* FIXME! This checks "pmd_dirty()" as an approximation of
* "is this a read-only page", since checking "pmd_write()"
* is even more broken. We haven't actually turned this into
* a writable page, so pmd_write() will always be false.
*/
if (!pmd_dirty(pmd))
/* See similar comment in do_numa_page for explanation */
if (!(vma->vm_flags & VM_WRITE))
flags |= TNF_NO_GROUP;
/*
@ -1358,12 +1350,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (migrated) {
flags |= TNF_MIGRATED;
page_nid = target_nid;
}
} else
flags |= TNF_MIGRATE_FAIL;
goto out;
clear_pmdnuma:
BUG_ON(!PageLocked(page));
was_writable = pmd_write(pmd);
pmd = pmd_modify(pmd, vma->vm_page_prot);
pmd = pmd_mkyoung(pmd);
if (was_writable)
pmd = pmd_mkwrite(pmd);
set_pmd_at(mm, haddr, pmdp, pmd);
update_mmu_cache_pmd(vma, addr, pmdp);
unlock_page(page);
@ -1487,6 +1484,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
pmd_t entry;
bool preserve_write = prot_numa && pmd_write(*pmd);
ret = 1;
/*
@ -1502,9 +1500,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
if (!prot_numa || !pmd_protnone(*pmd)) {
entry = pmdp_get_and_clear_notify(mm, addr, pmd);
entry = pmd_modify(entry, newprot);
if (preserve_write)
entry = pmd_mkwrite(entry);
ret = HPAGE_PMD_NR;
set_pmd_at(mm, addr, pmd, entry);
BUG_ON(pmd_write(entry));
BUG_ON(!preserve_write && pmd_write(entry));
}
spin_unlock(ptl);
}

View file

@ -3035,6 +3035,7 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
int last_cpupid;
int target_nid;
bool migrated = false;
bool was_writable = pte_write(pte);
int flags = 0;
/* A PROT_NONE fault should not end up here */
@ -3059,6 +3060,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Make it present again */
pte = pte_modify(pte, vma->vm_page_prot);
pte = pte_mkyoung(pte);
if (was_writable)
pte = pte_mkwrite(pte);
set_pte_at(mm, addr, ptep, pte);
update_mmu_cache(vma, addr, ptep);
@ -3069,16 +3072,14 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
/*
* Avoid grouping on DSO/COW pages in specific and RO pages
* in general, RO pages shouldn't hurt as much anyway since
* they can be in shared cache state.
*
* FIXME! This checks "pmd_dirty()" as an approximation of
* "is this a read-only page", since checking "pmd_write()"
* is even more broken. We haven't actually turned this into
* a writable page, so pmd_write() will always be false.
* Avoid grouping on RO pages in general. RO pages shouldn't hurt as
* much anyway since they can be in shared cache state. This misses
* the case where a mapping is writable but the process never writes
* to it but pte_write gets cleared during protection updates and
* pte_dirty has unpredictable behaviour between PTE scan updates,
* background writeback, dirty balancing and application behaviour.
*/
if (!pte_dirty(pte))
if (!(vma->vm_flags & VM_WRITE))
flags |= TNF_NO_GROUP;
/*
@ -3102,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (migrated) {
page_nid = target_nid;
flags |= TNF_MIGRATED;
}
} else
flags |= TNF_MIGRATE_FAIL;
out:
if (page_nid != -1)

View file

@ -1092,6 +1092,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
return NULL;
arch_refresh_nodedata(nid, pgdat);
} else {
/* Reset the nr_zones and classzone_idx to 0 before reuse */
pgdat->nr_zones = 0;
pgdat->classzone_idx = 0;
}
/* we can use NODE_DATA(nid) from here */
@ -1977,15 +1981,6 @@ void try_offline_node(int nid)
if (is_vmalloc_addr(zone->wait_table))
vfree(zone->wait_table);
}
/*
* Since there is no way to guarentee the address of pgdat/zone is not
* on stack of any kernel threads or used by other kernel objects
* without reference counting or other symchronizing method, do not
* reset node_data and free pgdat here. Just reset it to 0 and reuse
* the memory when the node is online again.
*/
memset(pgdat, 0, sizeof(*pgdat));
}
EXPORT_SYMBOL(try_offline_node);

View file

@ -774,10 +774,8 @@ again: remove_next = 1 + (end > next->vm_end);
importer->anon_vma = exporter->anon_vma;
error = anon_vma_clone(importer, exporter);
if (error) {
importer->anon_vma = NULL;
if (error)
return error;
}
}
}

View file

@ -75,6 +75,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
oldpte = *pte;
if (pte_present(oldpte)) {
pte_t ptent;
bool preserve_write = prot_numa && pte_write(oldpte);
/*
* Avoid trapping faults against the zero or KSM
@ -94,6 +95,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
ptent = ptep_modify_prot_start(mm, addr, pte);
ptent = pte_modify(ptent, newprot);
if (preserve_write)
ptent = pte_mkwrite(ptent);
/* Avoid taking write faults for known dirty pages */
if (dirty_accountable && pte_dirty(ptent) &&

View file

@ -857,8 +857,11 @@ static void bdi_update_write_bandwidth(struct backing_dev_info *bdi,
* bw * elapsed + write_bandwidth * (period - elapsed)
* write_bandwidth = ---------------------------------------------------
* period
*
* @written may have decreased due to account_page_redirty().
* Avoid underflowing @bw calculation.
*/
bw = written - bdi->written_stamp;
bw = written - min(written, bdi->written_stamp);
bw *= HZ;
if (unlikely(elapsed > period)) {
do_div(bw, elapsed);
@ -922,7 +925,7 @@ static void global_update_bandwidth(unsigned long thresh,
unsigned long now)
{
static DEFINE_SPINLOCK(dirty_lock);
static unsigned long update_time;
static unsigned long update_time = INITIAL_JIFFIES;
/*
* check locklessly first to optimize away locking for the most time

View file

@ -103,6 +103,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
if (!is_migrate_isolate_page(buddy)) {
__isolate_free_page(page, order);
kernel_map_pages(page, (1 << order), 1);
set_page_refcounted(page);
isolated_page = page;
}

View file

@ -265,8 +265,15 @@ int walk_page_range(unsigned long start, unsigned long end,
vma = vma->vm_next;
err = walk_page_test(start, next, walk);
if (err > 0)
if (err > 0) {
/*
* positive return values are purely for
* controlling the pagewalk, so should never
* be passed to the callers.
*/
err = 0;
continue;
}
if (err < 0)
break;
}

View file

@ -287,6 +287,13 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
return 0;
enomem_failure:
/*
* dst->anon_vma is dropped here otherwise its degree can be incorrectly
* decremented in unlink_anon_vmas().
* We can safely do this because callers of anon_vma_clone() don't care
* about dst->anon_vma if anon_vma_clone() failed.
*/
dst->anon_vma = NULL;
unlink_anon_vmas(dst);
return -ENOMEM;
}

View file

@ -2449,7 +2449,8 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
do {
tid = this_cpu_read(s->cpu_slab->tid);
c = raw_cpu_ptr(s->cpu_slab);
} while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));
} while (IS_ENABLED(CONFIG_PREEMPT) &&
unlikely(tid != READ_ONCE(c->tid)));
/*
* Irqless object alloc/free algorithm used here depends on sequence
@ -2718,7 +2719,8 @@ static __always_inline void slab_free(struct kmem_cache *s,
do {
tid = this_cpu_read(s->cpu_slab->tid);
c = raw_cpu_ptr(s->cpu_slab);
} while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));
} while (IS_ENABLED(CONFIG_PREEMPT) &&
unlikely(tid != READ_ONCE(c->tid)));
/* Same with comment on barrier() in slab_alloc_node() */
barrier();

Some files were not shown because too many files have changed in this diff Show more