From d974ffcfb7447db5f29a4b662a3eaf99a4e1109e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 26 Jun 2019 21:45:02 -0700 Subject: [PATCH 1/8] Documentation/admin: Remove the vsyscall=native documentation The vsyscall=native feature is gone -- remove the docs. Fixes: 076ca272a14c ("x86/vsyscall/64: Drop "native" vsyscalls") Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Acked-by: Kees Cook Cc: Florian Weimer Cc: Jann Horn Cc: stable@vger.kernel.org Cc: Borislav Petkov Cc: Kernel Hardening Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/d77c7105eb4c57c1a95a95b6a5b8ba194a18e764.1561610354.git.luto@kernel.org --- Documentation/admin-guide/kernel-parameters.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 138f6664b2e2..0082d1e56999 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5102,12 +5102,6 @@ emulate [default] Vsyscalls turn into traps and are emulated reasonably safely. - native Vsyscalls are native syscall instructions. - This is a little bit faster than trapping - and makes a few dynamic recompilers work - better than they would in emulation mode. - It also makes exploits much easier to write. - none Vsyscalls don't work at all. This makes them quite hard to use for exploits but might break your system. From bd49e16e3339f052fae05fb3e955c5db0c9c6445 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 26 Jun 2019 21:45:03 -0700 Subject: [PATCH 2/8] x86/vsyscall: Add a new vsyscall=xonly mode With vsyscall emulation on, a readable vsyscall page is still exposed that contains syscall instructions that validly implement the vsyscalls. This is required because certain dynamic binary instrumentation tools attempt to read the call targets of call instructions in the instrumented code. If the instrumented code uses vsyscalls, then the vsyscall page needs to contain readable code. Unfortunately, leaving readable memory at a deterministic address can be used to help various ASLR bypasses, so some hardening value can be gained by disallowing vsyscall reads. Given how rarely the vsyscall page needs to be readable, add a mechanism to make the vsyscall page be execute only. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Cc: Florian Weimer Cc: Jann Horn Cc: Borislav Petkov Cc: Kernel Hardening Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/d17655777c21bc09a7af1bbcf74e6f2b69a51152.1561610354.git.luto@kernel.org --- .../admin-guide/kernel-parameters.txt | 7 +++- arch/x86/Kconfig | 33 ++++++++++++++----- arch/x86/entry/vsyscall/vsyscall_64.c | 16 +++++++-- 3 files changed, 44 insertions(+), 12 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0082d1e56999..be8c3a680afa 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5100,7 +5100,12 @@ targets for exploits that can control RIP. emulate [default] Vsyscalls turn into traps and are - emulated reasonably safely. + emulated reasonably safely. The vsyscall + page is readable. + + xonly Vsyscalls turn into traps and are + emulated reasonably safely. The vsyscall + page is not readable. none Vsyscalls don't work at all. This makes them quite hard to use for exploits but diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2bbbd4d1ba31..0182d2c67590 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2293,23 +2293,38 @@ choice it can be used to assist security vulnerability exploitation. This setting can be changed at boot time via the kernel command - line parameter vsyscall=[emulate|none]. + line parameter vsyscall=[emulate|xonly|none]. On a system with recent enough glibc (2.14 or newer) and no static binaries, you can say None without a performance penalty to improve security. - If unsure, select "Emulate". + If unsure, select "Emulate execution only". config LEGACY_VSYSCALL_EMULATE - bool "Emulate" + bool "Full emulation" help - The kernel traps and emulates calls into the fixed - vsyscall address mapping. This makes the mapping - non-executable, but it still contains known contents, - which could be used in certain rare security vulnerability - exploits. This configuration is recommended when userspace - still uses the vsyscall area. + The kernel traps and emulates calls into the fixed vsyscall + address mapping. This makes the mapping non-executable, but + it still contains readable known contents, which could be + used in certain rare security vulnerability exploits. This + configuration is recommended when using legacy userspace + that still uses vsyscalls along with legacy binary + instrumentation tools that require code to be readable. + + An example of this type of legacy userspace is running + Pin on an old binary that still uses vsyscalls. + + config LEGACY_VSYSCALL_XONLY + bool "Emulate execution only" + help + The kernel traps and emulates calls into the fixed vsyscall + address mapping and does not allow reads. This + configuration is recommended when userspace might use the + legacy vsyscall area but support for legacy binary + instrumentation of legacy code is not needed. It mitigates + certain uses of the vsyscall area as an ASLR-bypassing + buffer. config LEGACY_VSYSCALL_NONE bool "None" diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index d9d81ad7a400..fedd7628f3a6 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -42,9 +42,11 @@ #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" -static enum { EMULATE, NONE } vsyscall_mode = +static enum { EMULATE, XONLY, NONE } vsyscall_mode = #ifdef CONFIG_LEGACY_VSYSCALL_NONE NONE; +#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY) + XONLY; #else EMULATE; #endif @@ -54,6 +56,8 @@ static int __init vsyscall_setup(char *str) if (str) { if (!strcmp("emulate", str)) vsyscall_mode = EMULATE; + else if (!strcmp("xonly", str)) + vsyscall_mode = XONLY; else if (!strcmp("none", str)) vsyscall_mode = NONE; else @@ -357,12 +361,20 @@ void __init map_vsyscall(void) extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - if (vsyscall_mode != NONE) { + /* + * For full emulation, the page needs to exist for real. In + * execute-only mode, there is no PTE at all backing the vsyscall + * page. + */ + if (vsyscall_mode == EMULATE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, PAGE_KERNEL_VVAR); set_vsyscall_pgtable_user_bits(swapper_pg_dir); } + if (vsyscall_mode == XONLY) + gate_vma.vm_flags = VM_EXEC; + BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); } From 918ce325098a4eef99daad7b6796da33cebaf03a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 26 Jun 2019 21:45:04 -0700 Subject: [PATCH 3/8] x86/vsyscall: Show something useful on a read fault Just segfaulting the application when it tries to read the vsyscall page in xonly mode is not helpful for those who need to debug it. Emit a hint. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Cc: Florian Weimer Cc: Jann Horn Link: https://lkml.kernel.org/r/8016afffe0eab497be32017ad7f6f7030dc3ba66.1561610354.git.luto@kernel.org --- arch/x86/entry/vsyscall/vsyscall_64.c | 19 ++++++++++++++++++- arch/x86/include/asm/vsyscall.h | 6 ++++-- arch/x86/mm/fault.c | 11 +++++------ 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index fedd7628f3a6..9c58ab807aeb 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -117,7 +117,8 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) } } -bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) +bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address) { struct task_struct *tsk; unsigned long caller; @@ -126,6 +127,22 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) long ret; unsigned long orig_dx; + /* Write faults or kernel-privilege faults never get fixed up. */ + if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER) + return false; + + if (!(error_code & X86_PF_INSTR)) { + /* Failed vsyscall read */ + if (vsyscall_mode == EMULATE) + return false; + + /* + * User code tried and failed to read the vsyscall page. + */ + warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround"); + return false; + } + /* * No point in checking CS -- the only way to get here is a user mode * trap to a high address, which means that we're in 64-bit user code. diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index b986b2ca688a..ab60a71a8dcb 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -13,10 +13,12 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root); * Called on instruction fetch fault in vsyscall page. * Returns true if handled. */ -extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); +extern bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address); #else static inline void map_vsyscall(void) {} -static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) +static inline bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address) { return false; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 46df4c6aae46..288a5462076f 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1369,16 +1369,15 @@ void do_user_addr_fault(struct pt_regs *regs, #ifdef CONFIG_X86_64 /* - * Instruction fetch faults in the vsyscall page might need - * emulation. The vsyscall page is at a high address - * (>PAGE_OFFSET), but is considered to be part of the user - * address space. + * Faults in the vsyscall page might need emulation. The + * vsyscall page is at a high address (>PAGE_OFFSET), but is + * considered to be part of the user address space. * * The vsyscall page does not have a "real" VMA, so do this * emulation before we go searching for VMAs. */ - if ((hw_error_code & X86_PF_INSTR) && is_vsyscall_vaddr(address)) { - if (emulate_vsyscall(regs, address)) + if (is_vsyscall_vaddr(address)) { + if (emulate_vsyscall(hw_error_code, regs, address)) return; } #endif From e0a446ce394a7915f2ffc03f9bb610c5ac4dbbf1 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 26 Jun 2019 21:45:05 -0700 Subject: [PATCH 4/8] x86/vsyscall: Document odd SIGSEGV error code for vsyscalls Even if vsyscall=none, user page faults on the vsyscall page are reported as though the PROT bit in the error code was set. Add a comment explaining why this is probably okay and display the value in the test case. While at it, explain why the behavior is correct with respect to PKRU. Modify also the selftest to print the odd error code so that there is a way to demonstrate the odd behaviour. If anyone really cares about more accurate emulation, the behaviour could be changed. But that needs a real good justification. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Cc: Florian Weimer Cc: Jann Horn Cc: Borislav Petkov Cc: Kernel Hardening Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/75c91855fd850649ace162eec5495a1354221aaa.1561610354.git.luto@kernel.org --- arch/x86/mm/fault.c | 7 +++++++ tools/testing/selftests/x86/test_vsyscall.c | 9 ++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 288a5462076f..58e4f1f00bbc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -710,6 +710,10 @@ static void set_signal_archinfo(unsigned long address, * To avoid leaking information about the kernel page * table layout, pretend that user-mode accesses to * kernel addresses are always protection faults. + * + * NB: This means that failed vsyscalls with vsyscall=none + * will have the PROT bit. This doesn't leak any + * information and does not appear to cause any problems. */ if (address >= TASK_SIZE_MAX) error_code |= X86_PF_PROT; @@ -1375,6 +1379,9 @@ void do_user_addr_fault(struct pt_regs *regs, * * The vsyscall page does not have a "real" VMA, so do this * emulation before we go searching for VMAs. + * + * PKRU never rejects instruction fetches, so we don't need + * to consider the PF_PK bit. */ if (is_vsyscall_vaddr(address)) { if (emulate_vsyscall(hw_error_code, regs, address)) diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 0b4f1cc2291c..4c9a8d76dba0 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -183,9 +183,13 @@ static inline long sys_getcpu(unsigned * cpu, unsigned * node, } static jmp_buf jmpbuf; +static volatile unsigned long segv_err; static void sigsegv(int sig, siginfo_t *info, void *ctx_void) { + ucontext_t *ctx = (ucontext_t *)ctx_void; + + segv_err = ctx->uc_mcontext.gregs[REG_ERR]; siglongjmp(jmpbuf, 1); } @@ -416,8 +420,11 @@ static int test_vsys_r(void) } else if (!can_read && should_read_vsyscall) { printf("[FAIL]\tWe don't have read access, but we should\n"); return 1; + } else if (can_read) { + printf("[OK]\tWe have read access\n"); } else { - printf("[OK]\tgot expected result\n"); + printf("[OK]\tWe do not have read access: #PF(0x%lx)\n", + segv_err); } #endif From b0386979867168575118501104f3d135067eab4f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 26 Jun 2019 21:45:06 -0700 Subject: [PATCH 5/8] selftests/x86/vsyscall: Verify that vsyscall=none blocks execution If vsyscall=none accidentally still allowed vsyscalls, the test wouldn't fail. Fix it. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Cc: Florian Weimer Cc: Jann Horn Cc: Borislav Petkov Cc: Kernel Hardening Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/b413397c804265f8865f3e70b14b09485ea7c314.1561610354.git.luto@kernel.org --- tools/testing/selftests/x86/test_vsyscall.c | 76 ++++++++++++++------- 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 4c9a8d76dba0..34a1d35995ef 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -49,21 +49,21 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), } /* vsyscalls and vDSO */ -bool should_read_vsyscall = false; +bool vsyscall_map_r = false, vsyscall_map_x = false; typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); -gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000); +const gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000); gtod_t vdso_gtod; typedef int (*vgettime_t)(clockid_t, struct timespec *); vgettime_t vdso_gettime; typedef long (*time_func_t)(time_t *t); -time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400); +const time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400); time_func_t vdso_time; typedef long (*getcpu_t)(unsigned *, unsigned *, void *); -getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); +const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); getcpu_t vdso_getcpu; static void init_vdso(void) @@ -107,7 +107,7 @@ static int init_vsys(void) maps = fopen("/proc/self/maps", "r"); if (!maps) { printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n"); - should_read_vsyscall = true; + vsyscall_map_r = true; return 0; } @@ -133,12 +133,8 @@ static int init_vsys(void) } printf("\tvsyscall permissions are %c-%c\n", r, x); - should_read_vsyscall = (r == 'r'); - if (x != 'x') { - vgtod = NULL; - vtime = NULL; - vgetcpu = NULL; - } + vsyscall_map_r = (r == 'r'); + vsyscall_map_x = (x == 'x'); found = true; break; @@ -148,10 +144,8 @@ static int init_vsys(void) if (!found) { printf("\tno vsyscall map in /proc/self/maps\n"); - should_read_vsyscall = false; - vgtod = NULL; - vtime = NULL; - vgetcpu = NULL; + vsyscall_map_r = false; + vsyscall_map_x = false; } return nerrs; @@ -242,7 +236,7 @@ static int test_gtod(void) err(1, "syscall gettimeofday"); if (vdso_gtod) ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso); - if (vgtod) + if (vsyscall_map_x) ret_vsys = vgtod(&tv_vsys, &tz_vsys); if (sys_gtod(&tv_sys2, &tz_sys) != 0) err(1, "syscall gettimeofday"); @@ -256,7 +250,7 @@ static int test_gtod(void) } } - if (vgtod) { + if (vsyscall_map_x) { if (ret_vsys == 0) { nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys); } else { @@ -277,7 +271,7 @@ static int test_time(void) { t_sys1 = sys_time(&t2_sys1); if (vdso_time) t_vdso = vdso_time(&t2_vdso); - if (vtime) + if (vsyscall_map_x) t_vsys = vtime(&t2_vsys); t_sys2 = sys_time(&t2_sys2); if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) { @@ -298,7 +292,7 @@ static int test_time(void) { } } - if (vtime) { + if (vsyscall_map_x) { if (t_vsys < 0 || t_vsys != t2_vsys) { printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys); nerrs++; @@ -334,7 +328,7 @@ static int test_getcpu(int cpu) ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); if (vdso_getcpu) ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); - if (vgetcpu) + if (vsyscall_map_x) ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); if (ret_sys == 0) { @@ -373,7 +367,7 @@ static int test_getcpu(int cpu) } } - if (vgetcpu) { + if (vsyscall_map_x) { if (ret_vsys) { printf("[FAIL]\tvsyscall getcpu() failed\n"); nerrs++; @@ -414,10 +408,10 @@ static int test_vsys_r(void) can_read = false; } - if (can_read && !should_read_vsyscall) { + if (can_read && !vsyscall_map_r) { printf("[FAIL]\tWe have read access, but we shouldn't\n"); return 1; - } else if (!can_read && should_read_vsyscall) { + } else if (!can_read && vsyscall_map_r) { printf("[FAIL]\tWe don't have read access, but we should\n"); return 1; } else if (can_read) { @@ -431,6 +425,39 @@ static int test_vsys_r(void) return 0; } +static int test_vsys_x(void) +{ +#ifdef __x86_64__ + if (vsyscall_map_x) { + /* We already tested this adequately. */ + return 0; + } + + printf("[RUN]\tMake sure that vsyscalls really page fault\n"); + + bool can_exec; + if (sigsetjmp(jmpbuf, 1) == 0) { + vgtod(NULL, NULL); + can_exec = true; + } else { + can_exec = false; + } + + if (can_exec) { + printf("[FAIL]\tExecuting the vsyscall did not page fault\n"); + return 1; + } else if (segv_err & (1 << 4)) { /* INSTR */ + printf("[OK]\tExecuting the vsyscall page failed: #PF(0x%lx)\n", + segv_err); + } else { + printf("[FAILT]\tExecution failed with the wrong error: #PF(0x%lx)\n", + segv_err); + return 1; + } +#endif + + return 0; +} #ifdef __x86_64__ #define X86_EFLAGS_TF (1UL << 8) @@ -462,7 +489,7 @@ static int test_emulation(void) time_t tmp; bool is_native; - if (!vtime) + if (!vsyscall_map_x) return 0; printf("[RUN]\tchecking that vsyscalls are emulated\n"); @@ -504,6 +531,7 @@ int main(int argc, char **argv) sethandler(SIGSEGV, sigsegv, 0); nerrs += test_vsys_r(); + nerrs += test_vsys_x(); #ifdef __x86_64__ nerrs += test_emulation(); From 625b7b7f79c66626fb2b7687fc1a58309a57edd5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 26 Jun 2019 21:45:07 -0700 Subject: [PATCH 6/8] x86/vsyscall: Change the default vsyscall mode to xonly The use case for full emulation over xonly is very esoteric, e.g. magic instrumentation tools. Change the default to the safer xonly mode. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Cc: Florian Weimer Cc: Jann Horn Cc: Borislav Petkov Cc: Kernel Hardening Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/30539f8072d2376b9c9efcc07e6ed0d6bf20e882.1561610354.git.luto@kernel.org --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0182d2c67590..32028edc1b0e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2285,7 +2285,7 @@ config COMPAT_VDSO choice prompt "vsyscall table for legacy applications" depends on X86_64 - default LEGACY_VSYSCALL_EMULATE + default LEGACY_VSYSCALL_XONLY help Legacy user code that does not know how to find the vDSO expects to be able to issue three syscalls by calling fixed addresses in From 441cedab2dfca18fe4983cbc795de04536ed421e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 26 Jun 2019 21:45:08 -0700 Subject: [PATCH 7/8] x86/vsyscall: Add __ro_after_init to global variables The vDSO is only configurable by command-line options, so make its global variables __ro_after_init. This seems highly unlikely to ever stop an exploit, but it's nicer anyway. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Cc: Florian Weimer Cc: Jann Horn Cc: Borislav Petkov Cc: Kernel Hardening Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/a386925835e49d319e70c4d7404b1f6c3c2e3702.1561610354.git.luto@kernel.org --- arch/x86/entry/vsyscall/vsyscall_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 9c58ab807aeb..07003f3f1bfc 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -42,7 +42,7 @@ #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" -static enum { EMULATE, XONLY, NONE } vsyscall_mode = +static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init = #ifdef CONFIG_LEGACY_VSYSCALL_NONE NONE; #elif defined(CONFIG_LEGACY_VSYSCALL_XONLY) @@ -305,7 +305,7 @@ static const char *gate_vma_name(struct vm_area_struct *vma) static const struct vm_operations_struct gate_vma_ops = { .name = gate_vma_name, }; -static struct vm_area_struct gate_vma = { +static struct vm_area_struct gate_vma __ro_after_init = { .vm_start = VSYSCALL_ADDR, .vm_end = VSYSCALL_ADDR + PAGE_SIZE, .vm_page_prot = PAGE_READONLY_EXEC, From 7f0a5e0755832301e7b010eab46fb715c483ba60 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 26 Jun 2019 21:45:09 -0700 Subject: [PATCH 8/8] selftests/x86: Add a test for process_vm_readv() on the vsyscall page get_gate_page() is a piece of somewhat alarming code to make get_user_pages() work on the vsyscall page. Test it via process_vm_readv(). Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Cc: Florian Weimer Cc: Jann Horn Cc: Borislav Petkov Cc: Kernel Hardening Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/0fe34229a9330e8f9de9765967939cc4f1cf26b1.1561610354.git.luto@kernel.org --- tools/testing/selftests/x86/test_vsyscall.c | 35 +++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 34a1d35995ef..4602326b8f5b 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef __x86_64__ # define VSYS(x) (x) @@ -459,6 +460,38 @@ static int test_vsys_x(void) return 0; } +static int test_process_vm_readv(void) +{ +#ifdef __x86_64__ + char buf[4096]; + struct iovec local, remote; + int ret; + + printf("[RUN]\tprocess_vm_readv() from vsyscall page\n"); + + local.iov_base = buf; + local.iov_len = 4096; + remote.iov_base = (void *)0xffffffffff600000; + remote.iov_len = 4096; + ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0); + if (ret != 4096) { + printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno); + return 0; + } + + if (vsyscall_map_r) { + if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) { + printf("[OK]\tIt worked and read correct data\n"); + } else { + printf("[FAIL]\tIt worked but returned incorrect data\n"); + return 1; + } + } +#endif + + return 0; +} + #ifdef __x86_64__ #define X86_EFLAGS_TF (1UL << 8) static volatile sig_atomic_t num_vsyscall_traps; @@ -533,6 +566,8 @@ int main(int argc, char **argv) nerrs += test_vsys_r(); nerrs += test_vsys_x(); + nerrs += test_process_vm_readv(); + #ifdef __x86_64__ nerrs += test_emulation(); #endif