mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-10-01 22:54:01 +00:00
Misc fixes:
- Remove the broken vsyscall emulation code from the page fault code. - Fix kexec crash triggered by certain SEV RMP table layouts. - Fix unchecked MSR access error when disabling the x2APIC via iommu=off. Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmY3TNURHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1gocA//TLVBMhvtanCiPOkN5UnfnFyKnkKOIb6w nEe7UhtD/0ASXlCRYr6KtubQjBVXVf850paa59gaHPXJ2JxD0iFe1YuiHVSoV6dL Xp2O0NsL8xunUbW2qXquzoerPsWlCmxJtWofDfZvk3unTPr9bMWObpXo2DBoLK9z yJRr9y0h0ceNWBdA7vQJVs4kgbhe93SPagvI5KAmtk0aHzb2qW4aVN/zNbhFhxDb UkL3hoi7TocPdZE2v1vJ+78yh+My309U/yNIZDpQw4MAiBDPAes+PyViTMB78Jl/ aWWdd9of0US4avgVqp6Z9afLbI46v5C4NJhpJFV/2AOzFoilZ3xIMF8V4fklk6sZ rizv6AR0gIJRzOtVxHayCetI95gLgmu0ctn+xcl0qKIHdxbacBxcPjWFHzfJUjG1 jMESa/bCYSbjsoQCUr84Haymky/sUPRSiI+CQ8vg1ZCDze+gqrsNRndi2TFpeNGY iAMw3YkW40pNhKOpWgWu1+Tm58c7O0HAk69GRvVJidciNEZ2wbPfsGasC8/eEJrD 8vbGqMnFtbWvlGIZIphaS9FdaFnPOs2ACBiwGZdpvHmhcJHzO7uyRB6NE7inoCem h/AJyClnjUXr1WgtHPwd8nc8pjNm/pw3x8XZqH82NkQclg4QhQ3qxuTb9YRZKPoF 6itlrfcqgw8= =o9E8 -----END PGP SIGNATURE----- Merge tag 'x86-urgent-2024-05-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull misc x86 fixes from Ingo Molnar: - Remove the broken vsyscall emulation code from the page fault code - Fix kexec crash triggered by certain SEV RMP table layouts - Fix unchecked MSR access error when disabling the x2APIC via iommu=off * tag 'x86-urgent-2024-05-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm: Remove broken vsyscall emulation code from the page fault code x86/apic: Don't access the APIC when disabling x2APIC x86/sev: Add callback to apply RMP table fixups for kexec x86/e820: Add a new e820 table update helper
This commit is contained in:
commit
d099637d07
9 changed files with 64 additions and 67 deletions
|
@ -98,11 +98,6 @@ static int addr_to_vsyscall_nr(unsigned long addr)
|
|||
|
||||
static bool write_ok_or_segv(unsigned long ptr, size_t size)
|
||||
{
|
||||
/*
|
||||
* XXX: if access_ok, get_user, and put_user handled
|
||||
* sig_on_uaccess_err, this could go away.
|
||||
*/
|
||||
|
||||
if (!access_ok((void __user *)ptr, size)) {
|
||||
struct thread_struct *thread = ¤t->thread;
|
||||
|
||||
|
@ -120,10 +115,8 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
|
|||
bool emulate_vsyscall(unsigned long error_code,
|
||||
struct pt_regs *regs, unsigned long address)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
unsigned long caller;
|
||||
int vsyscall_nr, syscall_nr, tmp;
|
||||
int prev_sig_on_uaccess_err;
|
||||
long ret;
|
||||
unsigned long orig_dx;
|
||||
|
||||
|
@ -172,8 +165,6 @@ bool emulate_vsyscall(unsigned long error_code,
|
|||
goto sigsegv;
|
||||
}
|
||||
|
||||
tsk = current;
|
||||
|
||||
/*
|
||||
* Check for access_ok violations and find the syscall nr.
|
||||
*
|
||||
|
@ -234,12 +225,8 @@ bool emulate_vsyscall(unsigned long error_code,
|
|||
goto do_ret; /* skip requested */
|
||||
|
||||
/*
|
||||
* With a real vsyscall, page faults cause SIGSEGV. We want to
|
||||
* preserve that behavior to make writing exploits harder.
|
||||
* With a real vsyscall, page faults cause SIGSEGV.
|
||||
*/
|
||||
prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err;
|
||||
current->thread.sig_on_uaccess_err = 1;
|
||||
|
||||
ret = -EFAULT;
|
||||
switch (vsyscall_nr) {
|
||||
case 0:
|
||||
|
@ -262,23 +249,12 @@ bool emulate_vsyscall(unsigned long error_code,
|
|||
break;
|
||||
}
|
||||
|
||||
current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err;
|
||||
|
||||
check_fault:
|
||||
if (ret == -EFAULT) {
|
||||
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
|
||||
warn_bad_vsyscall(KERN_INFO, regs,
|
||||
"vsyscall fault (exploit attempt?)");
|
||||
|
||||
/*
|
||||
* If we failed to generate a signal for any reason,
|
||||
* generate one here. (This should be impossible.)
|
||||
*/
|
||||
if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
|
||||
!sigismember(&tsk->pending.signal, SIGSEGV)))
|
||||
goto sigsegv;
|
||||
|
||||
return true; /* Don't emulate the ret. */
|
||||
goto sigsegv;
|
||||
}
|
||||
|
||||
regs->ax = ret;
|
||||
|
|
|
@ -17,6 +17,7 @@ extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type);
|
|||
extern void e820__range_add (u64 start, u64 size, enum e820_type type);
|
||||
extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
|
||||
extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type);
|
||||
extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
|
||||
|
||||
extern void e820__print_table(char *who);
|
||||
extern int e820__update_table(struct e820_table *table);
|
||||
|
|
|
@ -472,7 +472,6 @@ struct thread_struct {
|
|||
unsigned long iopl_emul;
|
||||
|
||||
unsigned int iopl_warn:1;
|
||||
unsigned int sig_on_uaccess_err:1;
|
||||
|
||||
/*
|
||||
* Protection Keys Register for Userspace. Loaded immediately on
|
||||
|
|
|
@ -269,6 +269,7 @@ int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immut
|
|||
int rmp_make_shared(u64 pfn, enum pg_level level);
|
||||
void snp_leak_pages(u64 pfn, unsigned int npages);
|
||||
void kdump_sev_callback(void);
|
||||
void snp_fixup_e820_tables(void);
|
||||
#else
|
||||
static inline bool snp_probe_rmptable_info(void) { return false; }
|
||||
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
|
||||
|
@ -282,6 +283,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
|
|||
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
|
||||
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
|
||||
static inline void kdump_sev_callback(void) { }
|
||||
static inline void snp_fixup_e820_tables(void) {}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1771,7 +1771,7 @@ void x2apic_setup(void)
|
|||
__x2apic_enable();
|
||||
}
|
||||
|
||||
static __init void apic_set_fixmap(void);
|
||||
static __init void apic_set_fixmap(bool read_apic);
|
||||
|
||||
static __init void x2apic_disable(void)
|
||||
{
|
||||
|
@ -1793,7 +1793,12 @@ static __init void x2apic_disable(void)
|
|||
}
|
||||
|
||||
__x2apic_disable();
|
||||
apic_set_fixmap();
|
||||
/*
|
||||
* Don't reread the APIC ID as it was already done from
|
||||
* check_x2apic() and the APIC driver still is a x2APIC variant,
|
||||
* which fails to do the read after x2APIC was disabled.
|
||||
*/
|
||||
apic_set_fixmap(false);
|
||||
}
|
||||
|
||||
static __init void x2apic_enable(void)
|
||||
|
@ -2057,13 +2062,14 @@ void __init init_apic_mappings(void)
|
|||
}
|
||||
}
|
||||
|
||||
static __init void apic_set_fixmap(void)
|
||||
static __init void apic_set_fixmap(bool read_apic)
|
||||
{
|
||||
set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
|
||||
apic_mmio_base = APIC_BASE;
|
||||
apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
|
||||
apic_mmio_base, mp_lapic_addr);
|
||||
apic_read_boot_cpu_id(false);
|
||||
if (read_apic)
|
||||
apic_read_boot_cpu_id(false);
|
||||
}
|
||||
|
||||
void __init register_lapic_address(unsigned long address)
|
||||
|
@ -2073,7 +2079,7 @@ void __init register_lapic_address(unsigned long address)
|
|||
mp_lapic_addr = address;
|
||||
|
||||
if (!x2apic_mode)
|
||||
apic_set_fixmap();
|
||||
apic_set_fixmap(true);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -532,9 +532,10 @@ u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum
|
|||
return __e820__range_update(e820_table, start, size, old_type, new_type);
|
||||
}
|
||||
|
||||
static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
|
||||
u64 __init e820__range_update_table(struct e820_table *t, u64 start, u64 size,
|
||||
enum e820_type old_type, enum e820_type new_type)
|
||||
{
|
||||
return __e820__range_update(e820_table_kexec, start, size, old_type, new_type);
|
||||
return __e820__range_update(t, start, size, old_type, new_type);
|
||||
}
|
||||
|
||||
/* Remove a range of memory from the E820 table: */
|
||||
|
@ -806,7 +807,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
|
|||
|
||||
addr = memblock_phys_alloc(size, align);
|
||||
if (addr) {
|
||||
e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
|
||||
e820__range_update_table(e820_table_kexec, addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
|
||||
pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
|
||||
e820__update_table_kexec();
|
||||
}
|
||||
|
|
|
@ -723,39 +723,8 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
|
|||
WARN_ON_ONCE(user_mode(regs));
|
||||
|
||||
/* Are we prepared to handle this kernel fault? */
|
||||
if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
|
||||
/*
|
||||
* Any interrupt that takes a fault gets the fixup. This makes
|
||||
* the below recursive fault logic only apply to a faults from
|
||||
* task context.
|
||||
*/
|
||||
if (in_interrupt())
|
||||
return;
|
||||
|
||||
/*
|
||||
* Per the above we're !in_interrupt(), aka. task context.
|
||||
*
|
||||
* In this case we need to make sure we're not recursively
|
||||
* faulting through the emulate_vsyscall() logic.
|
||||
*/
|
||||
if (current->thread.sig_on_uaccess_err && signal) {
|
||||
sanitize_error_code(address, &error_code);
|
||||
|
||||
set_signal_archinfo(address, error_code);
|
||||
|
||||
if (si_code == SEGV_PKUERR) {
|
||||
force_sig_pkuerr((void __user *)address, pkey);
|
||||
} else {
|
||||
/* XXX: hwpoison faults will set the wrong code. */
|
||||
force_sig_fault(signal, si_code, (void __user *)address);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Barring that, we can do the fixup and be happy.
|
||||
*/
|
||||
if (fixup_exception(regs, X86_TRAP_PF, error_code, address))
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD erratum #91 manifests as a spurious page fault on a PREFETCH
|
||||
|
|
|
@ -102,6 +102,13 @@ void __init mem_encrypt_setup_arch(void)
|
|||
phys_addr_t total_mem = memblock_phys_mem_size();
|
||||
unsigned long size;
|
||||
|
||||
/*
|
||||
* Do RMP table fixups after the e820 tables have been setup by
|
||||
* e820__memory_setup().
|
||||
*/
|
||||
if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
|
||||
snp_fixup_e820_tables();
|
||||
|
||||
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
|
||||
return;
|
||||
|
||||
|
|
|
@ -163,6 +163,42 @@ bool snp_probe_rmptable_info(void)
|
|||
return true;
|
||||
}
|
||||
|
||||
static void __init __snp_fixup_e820_tables(u64 pa)
|
||||
{
|
||||
if (IS_ALIGNED(pa, PMD_SIZE))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Handle cases where the RMP table placement by the BIOS is not
|
||||
* 2M aligned and the kexec kernel could try to allocate
|
||||
* from within that chunk which then causes a fatal RMP fault.
|
||||
*
|
||||
* The e820_table needs to be updated as it is converted to
|
||||
* kernel memory resources and used by KEXEC_FILE_LOAD syscall
|
||||
* to load kexec segments.
|
||||
*
|
||||
* The e820_table_firmware needs to be updated as it is exposed
|
||||
* to sysfs and used by the KEXEC_LOAD syscall to load kexec
|
||||
* segments.
|
||||
*
|
||||
* The e820_table_kexec needs to be updated as it passed to
|
||||
* the kexec-ed kernel.
|
||||
*/
|
||||
pa = ALIGN_DOWN(pa, PMD_SIZE);
|
||||
if (e820__mapped_any(pa, pa + PMD_SIZE, E820_TYPE_RAM)) {
|
||||
pr_info("Reserving start/end of RMP table on a 2MB boundary [0x%016llx]\n", pa);
|
||||
e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
|
||||
e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
|
||||
e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
|
||||
}
|
||||
}
|
||||
|
||||
void __init snp_fixup_e820_tables(void)
|
||||
{
|
||||
__snp_fixup_e820_tables(probed_rmp_base);
|
||||
__snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do the necessary preparations which are verified by the firmware as
|
||||
* described in the SNP_INIT_EX firmware command description in the SNP
|
||||
|
|
Loading…
Reference in a new issue