diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 18d8f17f755c..0904f5676e4d 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -73,10 +73,8 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, unsigned int nr) { if (likely(nr < IA32_NR_syscalls)) { - instrumentation_begin(); nr = array_index_nospec(nr, IA32_NR_syscalls); regs->ax = ia32_sys_call_table[nr](regs); - instrumentation_end(); } } @@ -91,8 +89,11 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) * or may not be necessary, but it matches the old asm behavior. */ nr = (unsigned int)syscall_enter_from_user_mode(regs, nr); + instrumentation_begin(); do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); syscall_exit_to_user_mode(regs); } @@ -121,11 +122,12 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) res = get_user(*(u32 *)®s->bp, (u32 __user __force *)(unsigned long)(u32)regs->sp); } - instrumentation_end(); if (res) { /* User code screwed up. */ regs->ax = -EFAULT; + + instrumentation_end(); syscall_exit_to_user_mode(regs); return false; } @@ -135,6 +137,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) /* Now this is just like a normal syscall. */ do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); syscall_exit_to_user_mode(regs); return true; } diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index a5aba4ab0224..67a4f1cb2aac 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -16,14 +16,25 @@ * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It * disables preemption so be careful if you intend to use it for long periods * of time. - * If you intend to use the FPU in softirq you need to check first with + * If you intend to use the FPU in irq/softirq you need to check first with * irq_fpu_usable() if it is possible. */ -extern void kernel_fpu_begin(void); + +/* Kernel FPU states to initialize in kernel_fpu_begin_mask() */ +#define KFPU_387 _BITUL(0) /* 387 state will be initialized */ +#define KFPU_MXCSR _BITUL(1) /* MXCSR will be initialized */ + +extern void kernel_fpu_begin_mask(unsigned int kfpu_mask); extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); extern void fpregs_mark_activate(void); +/* Code that is unaware of kernel_fpu_begin_mask() can use this */ +static inline void kernel_fpu_begin(void) +{ + kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR); +} + /* * Use fpregs_lock() while editing CPU's FPU registers or fpu->state. * A context switch will (and softirq might) save CPU's FPU registers to diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 5e658ba2654a..9abe842dbd84 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -97,6 +97,7 @@ #define INTEL_FAM6_LAKEFIELD 0x8A #define INTEL_FAM6_ALDERLAKE 0x97 +#define INTEL_FAM6_ALDERLAKE_L 0x9A /* "Small Core" Processors (Atom) */ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 0b4920a7238e..e16cccdd0420 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -86,7 +86,7 @@ static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {} * think of extending them - you will be slapped with a stinking trout or a frozen * shark will reach you, wherever you are! You've been warned. */ -static inline unsigned long long notrace __rdmsr(unsigned int msr) +static __always_inline unsigned long long __rdmsr(unsigned int msr) { DECLARE_ARGS(val, low, high); @@ -98,7 +98,7 @@ static inline unsigned long long notrace __rdmsr(unsigned int msr) return EAX_EDX_VAL(val, low, high); } -static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high) +static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high) { asm volatile("1: wrmsr\n" "2:\n" diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 488a8e848754..9239399e5491 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -110,6 +110,8 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); #define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) +extern unsigned int __max_die_per_package; + #ifdef CONFIG_SMP #define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) @@ -118,8 +120,6 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); extern unsigned int __max_logical_packages; #define topology_max_packages() (__max_logical_packages) -extern unsigned int __max_die_per_package; - static inline int topology_max_die_per_package(void) { return __max_die_per_package; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f8ca66f3d861..347a956f71ca 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -542,12 +542,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) u32 ecx; ecx = cpuid_ecx(0x8000001e); - nodes_per_socket = ((ecx >> 8) & 7) + 1; + __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1; } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); - nodes_per_socket = ((value >> 3) & 7) + 1; + __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1; } if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 13d3f1cbda17..e133ce1e562b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1992,10 +1992,9 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) * that out because it's an indirect call. Annotate it. */ instrumentation_begin(); - trace_hardirqs_off_finish(); + machine_check_vector(regs); - if (regs->flags & X86_EFLAGS_IF) - trace_hardirqs_on_prepare(); + instrumentation_end(); irqentry_nmi_exit(regs, irq_state); } @@ -2004,7 +2003,9 @@ static __always_inline void exc_machine_check_user(struct pt_regs *regs) { irqentry_enter_from_user_mode(regs); instrumentation_begin(); + machine_check_vector(regs); + instrumentation_end(); irqentry_exit_to_user_mode(regs); } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 1068002c8532..8678864ce712 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -25,10 +25,10 @@ #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) -#ifdef CONFIG_SMP unsigned int __max_die_per_package __read_mostly = 1; EXPORT_SYMBOL(__max_die_per_package); +#ifdef CONFIG_SMP /* * Check if given CPUID extended toplogy "leaf" is implemented */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index eb86a2b831b1..571220ac8bea 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -121,7 +121,7 @@ int copy_fpregs_to_fpstate(struct fpu *fpu) } EXPORT_SYMBOL(copy_fpregs_to_fpstate); -void kernel_fpu_begin(void) +void kernel_fpu_begin_mask(unsigned int kfpu_mask) { preempt_disable(); @@ -141,13 +141,14 @@ void kernel_fpu_begin(void) } __cpu_invalidate_fpregs_state(); - if (boot_cpu_has(X86_FEATURE_XMM)) + /* Put sane initial values into the control registers. */ + if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM)) ldmxcsr(MXCSR_DEFAULT); - if (boot_cpu_has(X86_FEATURE_FPU)) + if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU)) asm volatile ("fninit"); } -EXPORT_SYMBOL_GPL(kernel_fpu_begin); +EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask); void kernel_fpu_end(void) { diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 0bd1a0fc587e..84c1821819af 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -225,7 +225,7 @@ static inline u64 sev_es_rd_ghcb_msr(void) return __rdmsr(MSR_AMD64_SEV_ES_GHCB); } -static inline void sev_es_wr_ghcb_msr(u64 val) +static __always_inline void sev_es_wr_ghcb_msr(u64 val) { u32 low, high; @@ -286,6 +286,12 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; + /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ + if (!user_mode(ctxt->regs) && !access_ok(target, size)) { + memcpy(dst, buf, size); + return ES_OK; + } + switch (size) { case 1: memcpy(&d1, buf, 1); @@ -335,6 +341,12 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; + /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ + if (!user_mode(ctxt->regs) && !access_ok(s, size)) { + memcpy(buf, src, size); + return ES_OK; + } + switch (size) { case 1: if (get_user(d1, s)) diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c index 4321fa02e18d..419365c48b2a 100644 --- a/arch/x86/lib/mmx_32.c +++ b/arch/x86/lib/mmx_32.c @@ -26,6 +26,16 @@ #include #include +/* + * Use KFPU_387. MMX instructions are not affected by MXCSR, + * but both AMD and Intel documentation states that even integer MMX + * operations will result in #MF if an exception is pending in FCW. + * + * EMMS is not needed afterwards because, after calling kernel_fpu_end(), + * any subsequent user of the 387 stack will reinitialize it using + * KFPU_387. + */ + void *_mmx_memcpy(void *to, const void *from, size_t len) { void *p; @@ -37,7 +47,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) p = to; i = len >> 6; /* len/64 */ - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); __asm__ __volatile__ ( "1: prefetch (%0)\n" /* This set is 28 bytes */ @@ -127,7 +137,7 @@ static void fast_clear_page(void *page) { int i; - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); __asm__ __volatile__ ( " pxor %%mm0, %%mm0\n" : : @@ -160,7 +170,7 @@ static void fast_copy_page(void *to, void *from) { int i; - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); /* * maybe the prefetch stuff can go before the expensive fnsave... @@ -247,7 +257,7 @@ static void fast_clear_page(void *page) { int i; - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); __asm__ __volatile__ ( " pxor %%mm0, %%mm0\n" : : @@ -282,7 +292,7 @@ static void fast_copy_page(void *to, void *from) { int i; - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); __asm__ __volatile__ ( "1: prefetch (%0)\n" diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c1418b47f625..bdaf4829098c 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -79,7 +79,7 @@ module_param(lock_stat, int, 0644); DEFINE_PER_CPU(unsigned int, lockdep_recursion); EXPORT_PER_CPU_SYMBOL_GPL(lockdep_recursion); -static inline bool lockdep_enabled(void) +static __always_inline bool lockdep_enabled(void) { if (!debug_locks) return false; @@ -5271,12 +5271,15 @@ static void __lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie /* * Check whether we follow the irq-flags state precisely: */ -static void check_flags(unsigned long flags) +static noinstr void check_flags(unsigned long flags) { #if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_DEBUG_LOCKDEP) if (!debug_locks) return; + /* Get the warning out.. */ + instrumentation_begin(); + if (irqs_disabled_flags(flags)) { if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled())) { printk("possible reason: unannotated irqs-off.\n"); @@ -5304,6 +5307,8 @@ static void check_flags(unsigned long flags) if (!debug_locks) print_irqtrace_events(current); + + instrumentation_end(); #endif }