linux-stable/arch/x86/kernel/doublefault_32.c
Andy Lutomirski 3fb0fdb3bb x86/stackprotector/32: Make the canary into a regular percpu variable
On 32-bit kernels, the stackprotector canary is quite nasty -- it is
stored at %gs:(20), which is nasty because 32-bit kernels use %fs for
percpu storage.  It's even nastier because it means that whether %gs
contains userspace state or kernel state while running kernel code
depends on whether stackprotector is enabled (this is
CONFIG_X86_32_LAZY_GS), and this setting radically changes the way
that segment selectors work.  Supporting both variants is a
maintenance and testing mess.

Merely rearranging so that percpu and the stack canary
share the same segment would be messy as the 32-bit percpu address
layout isn't currently compatible with putting a variable at a fixed
offset.

Fortunately, GCC 8.1 added options that allow the stack canary to be
accessed as %fs:__stack_chk_guard, effectively turning it into an ordinary
percpu variable.  This lets us get rid of all of the code to manage the
stack canary GDT descriptor and the CONFIG_X86_32_LAZY_GS mess.

(That name is special.  We could use any symbol we want for the
 %fs-relative mode, but for CONFIG_SMP=n, gcc refuses to let us use any
 name other than __stack_chk_guard.)

Forcibly disable stackprotector on older compilers that don't support
the new options and turn the stack canary into a percpu variable. The
"lazy GS" approach is now used for all 32-bit configurations.

Also makes load_gs_index() work on 32-bit kernels. On 64-bit kernels,
it loads the GS selector and updates the user GSBASE accordingly. (This
is unchanged.) On 32-bit kernels, it loads the GS selector and updates
GSBASE, which is now always the user base. This means that the overall
effect is the same on 32-bit and 64-bit, which avoids some ifdeffery.

 [ bp: Massage commit message. ]

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/c0ff7dba14041c7e5d1cae5d4df052f03759bef3.1613243844.git.luto@kernel.org
2021-03-08 13:19:05 +01:00

131 lines
3.4 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/init_task.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/desc.h>
#include <asm/traps.h>
#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
#define TSS(x) this_cpu_read(cpu_tss_rw.x86_tss.x)
static void set_df_gdt_entry(unsigned int cpu);
/*
* Called by double_fault with CR0.TS and EFLAGS.NT cleared. The CPU thinks
* we're running the doublefault task. Cannot return.
*/
asmlinkage noinstr void __noreturn doublefault_shim(void)
{
unsigned long cr2;
struct pt_regs regs;
BUILD_BUG_ON(sizeof(struct doublefault_stack) != PAGE_SIZE);
cr2 = native_read_cr2();
/* Reset back to the normal kernel task. */
force_reload_TR();
set_df_gdt_entry(smp_processor_id());
trace_hardirqs_off();
/*
* Fill in pt_regs. A downside of doing this in C is that the unwinder
* won't see it (no ENCODE_FRAME_POINTER), so a nested stack dump
* won't successfully unwind to the source of the double fault.
* The main dump from exc_double_fault() is fine, though, since it
* uses these regs directly.
*
* If anyone ever cares, this could be moved to asm.
*/
regs.ss = TSS(ss);
regs.__ssh = 0;
regs.sp = TSS(sp);
regs.flags = TSS(flags);
regs.cs = TSS(cs);
/* We won't go through the entry asm, so we can leave __csh as 0. */
regs.__csh = 0;
regs.ip = TSS(ip);
regs.orig_ax = 0;
regs.gs = TSS(gs);
regs.__gsh = 0;
regs.fs = TSS(fs);
regs.__fsh = 0;
regs.es = TSS(es);
regs.__esh = 0;
regs.ds = TSS(ds);
regs.__dsh = 0;
regs.ax = TSS(ax);
regs.bp = TSS(bp);
regs.di = TSS(di);
regs.si = TSS(si);
regs.dx = TSS(dx);
regs.cx = TSS(cx);
regs.bx = TSS(bx);
exc_double_fault(&regs, 0, cr2);
/*
* x86_32 does not save the original CR3 anywhere on a task switch.
* This means that, even if we wanted to return, we would need to find
* some way to reconstruct CR3. We could make a credible guess based
* on cpu_tlbstate, but that would be racy and would not account for
* PTI.
*
* Instead, don't bother. We can return through
* rewind_stack_do_exit() instead.
*/
panic("cannot return from double fault\n");
}
DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
.tss = {
/*
* No sp0 or ss0 -- we never run CPL != 0 with this TSS
* active. sp is filled in later.
*/
.ldt = 0,
.io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
.ip = (unsigned long) asm_exc_double_fault,
.flags = X86_EFLAGS_FIXED,
.es = __USER_DS,
.cs = __KERNEL_CS,
.ss = __KERNEL_DS,
.ds = __USER_DS,
.fs = __KERNEL_PERCPU,
.gs = 0,
.__cr3 = __pa_nodebug(swapper_pg_dir),
},
};
static void set_df_gdt_entry(unsigned int cpu)
{
/* Set up doublefault TSS pointer in the GDT */
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS,
&get_cpu_entry_area(cpu)->doublefault_stack.tss);
}
void doublefault_init_cpu_tss(void)
{
unsigned int cpu = smp_processor_id();
struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
/*
* The linker isn't smart enough to initialize percpu variables that
* point to other places in percpu space.
*/
this_cpu_write(doublefault_stack.tss.sp,
(unsigned long)&cea->doublefault_stack.stack +
sizeof(doublefault_stack.stack));
set_df_gdt_entry(cpu);
}