From c10910c323bb9e7fc53ba25c83d1adeb9fb20878 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 21 Jan 2016 15:51:17 -0800 Subject: [PATCH 01/11] sparc: Hook up copy_file_range syscall. Signed-off-by: David S. Miller --- arch/sparc/include/uapi/asm/unistd.h | 3 ++- arch/sparc/kernel/systbls_32.S | 2 +- arch/sparc/kernel/systbls_64.S | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index 1c26d440d288..b6de8b10a55b 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -422,8 +422,9 @@ #define __NR_listen 354 #define __NR_setsockopt 355 #define __NR_mlock2 356 +#define __NR_copy_file_range 357 -#define NR_syscalls 357 +#define NR_syscalls 358 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index e663b6c78de2..6c3dd6c52f8b 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -88,4 +88,4 @@ sys_call_table: /*340*/ .long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr /*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen -/*355*/ .long sys_setsockopt, sys_mlock2 +/*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 1557121f4cdc..12b524cfcfa0 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -89,7 +89,7 @@ sys_call_table32: /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen - .word compat_sys_setsockopt, sys_mlock2 + .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range #endif /* CONFIG_COMPAT */ @@ -170,4 +170,4 @@ sys_call_table: /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen - .word sys_setsockopt, sys_mlock2 + .word sys_setsockopt, sys_mlock2, sys_copy_file_range From 1a40b95374f680625318ab61d81958e949e0afe3 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 18 Jan 2016 06:32:30 -0500 Subject: [PATCH 02/11] sparc: Fix system call tracing register handling. A system call trace trigger on entry allows the tracing process to inspect and potentially change the traced process's registers. Account for that by reloading the %g1 (syscall number) and %i0-%i5 (syscall argument) values. We need to be careful to revalidate the range of %g1, and reload the system call table entry it corresponds to into %l7. Reported-by: Mike Frysinger Signed-off-by: David S. Miller Tested-by: Mike Frysinger --- arch/sparc/kernel/entry.S | 17 +++++++++++++++++ arch/sparc/kernel/syscalls.S | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 33c02b15f478..a83707c83be8 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -948,7 +948,24 @@ linux_syscall_trace: cmp %o0, 0 bne 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ld [%sp + STACKFRAME_SZ + PT_G1], %g1 + sethi %hi(sys_call_table), %l7 + ld [%sp + STACKFRAME_SZ + PT_I0], %i0 + or %l7, %lo(sys_call_table), %l7 + ld [%sp + STACKFRAME_SZ + PT_I1], %i1 + ld [%sp + STACKFRAME_SZ + PT_I2], %i2 + ld [%sp + STACKFRAME_SZ + PT_I3], %i3 + ld [%sp + STACKFRAME_SZ + PT_I4], %i4 + ld [%sp + STACKFRAME_SZ + PT_I5], %i5 + cmp %g1, NR_syscalls + bgeu 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 mov %i0, %o0 + ld [%l7 + %l4], %l7 mov %i1, %o1 mov %i2, %o2 mov %i3, %o3 diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index bb0008927598..c4a1b5c40e4e 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -158,7 +158,25 @@ linux_syscall_trace32: add %sp, PTREGS_OFF, %o0 brnz,pn %o0, 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 + sethi %hi(sys_call_table32), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 + or %l7, %lo(sys_call_table32), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 + ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2 + ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3 + ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4 + ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5 + + cmp %g1, NR_syscalls + bgeu,pn %xcc, 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 srl %i0, 0, %o0 + lduw [%l7 + %l4], %l7 srl %i4, 0, %o4 srl %i1, 0, %o1 srl %i2, 0, %o2 @@ -170,7 +188,25 @@ linux_syscall_trace: add %sp, PTREGS_OFF, %o0 brnz,pn %o0, 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 + sethi %hi(sys_call_table64), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 + or %l7, %lo(sys_call_table64), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 + ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2 + ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3 + ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4 + ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5 + + cmp %g1, NR_syscalls + bgeu,pn %xcc, 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 mov %i0, %o0 + lduw [%l7 + %l4], %l7 mov %i1, %o1 mov %i2, %o2 mov %i3, %o3 From ca0bb0798022732773752fee97bb633c6f3623d2 Mon Sep 17 00:00:00 2001 From: "wim.coekaerts@oracle.com" Date: Fri, 29 Jan 2016 09:39:38 -0800 Subject: [PATCH 03/11] Add sun4v_wdt watchdog driver This driver adds sparc hypervisor watchdog support. The default timeout is 60 seconds and the range is between 1 and 31536000 seconds. Both watchdog-resolution and watchdog-max-timeout MD properties settings are supported. Signed-off-by: Wim Coekaerts Reviewed-by: Julian Calaby Reviewed-by: Guenter Roeck Signed-off-by: David S. Miller --- .../watchdog/watchdog-parameters.txt | 4 + arch/sparc/kernel/hvcalls.S | 3 +- arch/sparc/kernel/sparc_ksyms_64.c | 1 + drivers/watchdog/Kconfig | 11 + drivers/watchdog/Makefile | 1 + drivers/watchdog/sun4v_wdt.c | 191 ++++++++++++++++++ 6 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 drivers/watchdog/sun4v_wdt.c diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt index 9f9ec9f76039..4e4b6f10d841 100644 --- a/Documentation/watchdog/watchdog-parameters.txt +++ b/Documentation/watchdog/watchdog-parameters.txt @@ -400,3 +400,7 @@ wm8350_wdt: nowayout: Watchdog cannot be stopped once started (default=kernel config parameter) ------------------------------------------------- +sun4v_wdt: +timeout_ms: Watchdog timeout in milliseconds 1..180000, default=60000) +nowayout: Watchdog cannot be stopped once started +------------------------------------------------- diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index afbaba52d2f1..d127130bf424 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -338,8 +338,9 @@ ENTRY(sun4v_mach_set_watchdog) mov %o1, %o4 mov HV_FAST_MACH_SET_WATCHDOG, %o5 ta HV_FAST_TRAP + brnz,a,pn %o4, 0f stx %o1, [%o4] - retl +0: retl nop ENDPROC(sun4v_mach_set_watchdog) diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c index a92d5d2c46a3..9e034f29dcc5 100644 --- a/arch/sparc/kernel/sparc_ksyms_64.c +++ b/arch/sparc/kernel/sparc_ksyms_64.c @@ -37,6 +37,7 @@ EXPORT_SYMBOL(sun4v_niagara_getperf); EXPORT_SYMBOL(sun4v_niagara_setperf); EXPORT_SYMBOL(sun4v_niagara2_getperf); EXPORT_SYMBOL(sun4v_niagara2_setperf); +EXPORT_SYMBOL(sun4v_mach_set_watchdog); /* from hweight.S */ EXPORT_SYMBOL(__arch_hweight8); diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 4f0e7be0da34..30d38ae685f6 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1565,6 +1565,17 @@ config WATCHDOG_RIO machines. The watchdog timeout period is normally one minute but can be changed with a boot-time parameter. +config WATCHDOG_SUN4V + tristate "Sun4v Watchdog support" + select WATCHDOG_CORE + depends on SPARC64 + help + Say Y here to support the hypervisor watchdog capability embedded + in the SPARC sun4v architecture. + + To compile this driver as a module, choose M here. The module will + be called sun4v_wdt. + # XTENSA Architecture # Xen Architecture diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index f566753256ab..f6a6a387c6c7 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -179,6 +179,7 @@ obj-$(CONFIG_SH_WDT) += shwdt.o obj-$(CONFIG_WATCHDOG_RIO) += riowd.o obj-$(CONFIG_WATCHDOG_CP1XXX) += cpwd.o +obj-$(CONFIG_WATCHDOG_SUN4V) += sun4v_wdt.o # XTENSA Architecture diff --git a/drivers/watchdog/sun4v_wdt.c b/drivers/watchdog/sun4v_wdt.c new file mode 100644 index 000000000000..1467fe50a76f --- /dev/null +++ b/drivers/watchdog/sun4v_wdt.c @@ -0,0 +1,191 @@ +/* + * sun4v watchdog timer + * (c) Copyright 2016 Oracle Corporation + * + * Implement a simple watchdog driver using the built-in sun4v hypervisor + * watchdog support. If time expires, the hypervisor stops or bounces + * the guest domain. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#define WDT_TIMEOUT 60 +#define WDT_MAX_TIMEOUT 31536000 +#define WDT_MIN_TIMEOUT 1 +#define WDT_DEFAULT_RESOLUTION_MS 1000 /* 1 second */ + +static unsigned int timeout; +module_param(timeout, uint, 0); +MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds (default=" + __MODULE_STRING(WDT_TIMEOUT) ")"); + +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, S_IRUGO); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +static int sun4v_wdt_stop(struct watchdog_device *wdd) +{ + sun4v_mach_set_watchdog(0, NULL); + + return 0; +} + +static int sun4v_wdt_ping(struct watchdog_device *wdd) +{ + int hverr; + + /* + * HV watchdog timer will round up the timeout + * passed in to the nearest multiple of the + * watchdog resolution in milliseconds. + */ + hverr = sun4v_mach_set_watchdog(wdd->timeout * 1000, NULL); + if (hverr == HV_EINVAL) + return -EINVAL; + + return 0; +} + +static int sun4v_wdt_set_timeout(struct watchdog_device *wdd, + unsigned int timeout) +{ + wdd->timeout = timeout; + + return 0; +} + +static const struct watchdog_info sun4v_wdt_ident = { + .options = WDIOF_SETTIMEOUT | + WDIOF_MAGICCLOSE | + WDIOF_KEEPALIVEPING, + .identity = "sun4v hypervisor watchdog", + .firmware_version = 0, +}; + +static struct watchdog_ops sun4v_wdt_ops = { + .owner = THIS_MODULE, + .start = sun4v_wdt_ping, + .stop = sun4v_wdt_stop, + .ping = sun4v_wdt_ping, + .set_timeout = sun4v_wdt_set_timeout, +}; + +static struct watchdog_device wdd = { + .info = &sun4v_wdt_ident, + .ops = &sun4v_wdt_ops, + .min_timeout = WDT_MIN_TIMEOUT, + .max_timeout = WDT_MAX_TIMEOUT, + .timeout = WDT_TIMEOUT, +}; + +static int __init sun4v_wdt_init(void) +{ + struct mdesc_handle *handle; + u64 node; + const u64 *value; + int err = 0; + unsigned long major = 1, minor = 1; + + /* + * There are 2 properties that can be set from the control + * domain for the watchdog. + * watchdog-resolution + * watchdog-max-timeout + * + * We can expect a handle to be returned otherwise something + * serious is wrong. Correct to return -ENODEV here. + */ + + handle = mdesc_grab(); + if (!handle) + return -ENODEV; + + node = mdesc_node_by_name(handle, MDESC_NODE_NULL, "platform"); + err = -ENODEV; + if (node == MDESC_NODE_NULL) + goto out_release; + + /* + * This is a safe way to validate if we are on the right + * platform. + */ + if (sun4v_hvapi_register(HV_GRP_CORE, major, &minor)) + goto out_hv_unreg; + + /* Allow value of watchdog-resolution up to 1s (default) */ + value = mdesc_get_property(handle, node, "watchdog-resolution", NULL); + err = -EINVAL; + if (value) { + if (*value == 0 || + *value > WDT_DEFAULT_RESOLUTION_MS) + goto out_hv_unreg; + } + + value = mdesc_get_property(handle, node, "watchdog-max-timeout", NULL); + if (value) { + /* + * If the property value (in ms) is smaller than + * min_timeout, return -EINVAL. + */ + if (*value < wdd.min_timeout * 1000) + goto out_hv_unreg; + + /* + * If the property value is smaller than + * default max_timeout then set watchdog max_timeout to + * the value of the property in seconds. + */ + if (*value < wdd.max_timeout * 1000) + wdd.max_timeout = *value / 1000; + } + + watchdog_init_timeout(&wdd, timeout, NULL); + + watchdog_set_nowayout(&wdd, nowayout); + + err = watchdog_register_device(&wdd); + if (err) + goto out_hv_unreg; + + pr_info("initialized (timeout=%ds, nowayout=%d)\n", + wdd.timeout, nowayout); + + mdesc_release(handle); + + return 0; + +out_hv_unreg: + sun4v_hvapi_unregister(HV_GRP_CORE); + +out_release: + mdesc_release(handle); + return err; +} + +static void __exit sun4v_wdt_exit(void) +{ + sun4v_hvapi_unregister(HV_GRP_CORE); + watchdog_unregister_device(&wdd); +} + +module_init(sun4v_wdt_init); +module_exit(sun4v_wdt_exit); + +MODULE_AUTHOR("Wim Coekaerts "); +MODULE_DESCRIPTION("sun4v watchdog driver"); +MODULE_LICENSE("GPL"); From 22be3b105597002ad3499bdb9c0aba49c9ab05f7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 17 Jan 2016 11:47:29 -0500 Subject: [PATCH 04/11] sparc32: Add -Wa,-Av8 to KBUILD_CFLAGS. Binutils used to be (erroneously) extremely permissive about instruction usage. But that got fixed and if you don't properly tell it to accept classes of instructions it will fail. This uncovered a specs bug on sparc in gcc where it wouldn't pass the proper options to binutils options. Deal with this in the kernel build by adding -Wa,-Av8 to KBUILD_CFLAGS. Reported-by: Al Viro Signed-off-by: David S. Miller --- arch/sparc/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index eaee14637d93..8496a074bd0e 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -24,7 +24,13 @@ LDFLAGS := -m elf32_sparc export BITS := 32 UTS_MACHINE := sparc +# We are adding -Wa,-Av8 to KBUILD_CFLAGS to deal with a specs bug in some +# versions of gcc. Some gcc versions won't pass -Av8 to binutils when you +# give -mcpu=v8. This silently worked with older bintutils versions but +# does not any more. KBUILD_CFLAGS += -m32 -mcpu=v8 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7 +KBUILD_CFLAGS += -Wa,-Av8 + KBUILD_AFLAGS += -m32 -Wa,-Av8 else From 397d1533b6cce0ccb5379542e2e6d079f6936c46 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Mar 2016 00:25:32 -0500 Subject: [PATCH 05/11] sparc64: Fix sparc64_set_context stack handling. Like a signal return, we should use synchronize_user_stack() rather than flush_user_windows(). Reported-by: Ilya Malakhov Signed-off-by: David S. Miller --- arch/sparc/kernel/signal_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index d88beff47bab..39aaec173f66 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -52,7 +52,7 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs) unsigned char fenab; int err; - flush_user_windows(); + synchronize_user_stack(); if (get_thread_wsaved() || (((unsigned long)ucp) & (sizeof(unsigned long)-1)) || (!__access_ok(ucp, sizeof(*ucp)))) From ea861d73a9aa97a5e0c7cef6a26ce01786e0c58d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 26 Feb 2016 15:12:25 -0800 Subject: [PATCH 06/11] MAINTAINERS: add myself as lkdtm maintainer Officially claim maintainership over the LKDTM code. Signed-off-by: Kees Cook --- MAINTAINERS | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index da3e4d8016d0..f9eb9914828b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6580,6 +6580,11 @@ F: samples/livepatch/ L: live-patching@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/livepatching.git +LINUX KERNEL DUMP TEST MODULE (LKDTM) +M: Kees Cook +S: Maintained +F: drivers/misc/lkdtm.c + LLC (802.2) M: Arnaldo Carvalho de Melo S: Maintained From bc0b8cc6cb26a209fa1679d5c063b47bc0afe964 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 25 Feb 2016 16:36:42 -0800 Subject: [PATCH 07/11] lkdtm: Add READ_AFTER_FREE test In a similar manner to WRITE_AFTER_FREE, add a READ_AFTER_FREE test to test free poisoning features. Sample output when no sanitization is present: # echo READ_AFTER_FREE > /sys/kernel/debug/provoke-crash/DIRECT [ 17.542473] lkdtm: Performing direct entry READ_AFTER_FREE [ 17.543866] lkdtm: Value in memory before free: 12345678 [ 17.545212] lkdtm: Attempting bad read from freed memory [ 17.546542] lkdtm: Memory was not poisoned with slub_debug=P: # echo READ_AFTER_FREE > /sys/kernel/debug/provoke-crash/DIRECT [ 22.415531] lkdtm: Performing direct entry READ_AFTER_FREE [ 22.416366] lkdtm: Value in memory before free: 12345678 [ 22.417137] lkdtm: Attempting bad read from freed memory [ 22.417897] lkdtm: Memory correctly poisoned, calling BUG Signed-off-by: Laura Abbott Signed-off-by: Kees Cook --- drivers/misc/lkdtm.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c index 11fdadc68e53..8de47462638a 100644 --- a/drivers/misc/lkdtm.c +++ b/drivers/misc/lkdtm.c @@ -92,6 +92,7 @@ enum ctype { CT_UNALIGNED_LOAD_STORE_WRITE, CT_OVERWRITE_ALLOCATION, CT_WRITE_AFTER_FREE, + CT_READ_AFTER_FREE, CT_SOFTLOCKUP, CT_HARDLOCKUP, CT_SPINLOCKUP, @@ -129,6 +130,7 @@ static char* cp_type[] = { "UNALIGNED_LOAD_STORE_WRITE", "OVERWRITE_ALLOCATION", "WRITE_AFTER_FREE", + "READ_AFTER_FREE", "SOFTLOCKUP", "HARDLOCKUP", "SPINLOCKUP", @@ -417,6 +419,42 @@ static void lkdtm_do_action(enum ctype which) memset(data, 0x78, len); break; } + case CT_READ_AFTER_FREE: { + int *base, *val, saw; + size_t len = 1024; + /* + * The slub allocator uses the first word to store the free + * pointer in some configurations. Use the middle of the + * allocation to avoid running into the freelist + */ + size_t offset = (len / sizeof(*base)) / 2; + + base = kmalloc(len, GFP_KERNEL); + if (!base) + break; + + val = kmalloc(len, GFP_KERNEL); + if (!val) + break; + + *val = 0x12345678; + base[offset] = *val; + pr_info("Value in memory before free: %x\n", base[offset]); + + kfree(base); + + pr_info("Attempting bad read from freed memory\n"); + saw = base[offset]; + if (saw != *val) { + /* Good! Poisoning happened, so declare a win. */ + pr_info("Memory correctly poisoned, calling BUG\n"); + BUG(); + } + pr_info("Memory was not poisoned\n"); + + kfree(val); + break; + } case CT_SOFTLOCKUP: preempt_disable(); for (;;) From 250a8988ef4071d8b7cdbb27388f09f33402293a Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 25 Feb 2016 16:36:43 -0800 Subject: [PATCH 08/11] lkdtm: Update WRITE_AFTER_FREE test The SLUB allocator may use the first word of a freed block to store the freelist information. This may make it harder to test poisoning features. Change the WRITE_AFTER_FREE test to better match what the READ_AFTER_FREE test does and also print out a big more information. Signed-off-by: Laura Abbott Signed-off-by: Kees Cook --- drivers/misc/lkdtm.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c index 8de47462638a..a00a2b11b821 100644 --- a/drivers/misc/lkdtm.c +++ b/drivers/misc/lkdtm.c @@ -411,12 +411,21 @@ static void lkdtm_do_action(enum ctype which) break; } case CT_WRITE_AFTER_FREE: { + int *base; size_t len = 1024; - u32 *data = kmalloc(len, GFP_KERNEL); + /* + * The slub allocator uses the first word to store the free + * pointer in some configurations. Use the middle of the + * allocation to avoid running into the freelist + */ + size_t offset = (len / sizeof(*base)) / 2; - kfree(data); - schedule(); - memset(data, 0x78, len); + base = kmalloc(len, GFP_KERNEL); + pr_info("Allocated memory %p-%p\n", base, &base[offset * 2]); + kfree(base); + pr_info("Attempting bad write to freed memory at %p\n", + &base[offset]); + base[offset] = 0x0abcdef0; break; } case CT_READ_AFTER_FREE: { From 920d451f9ce68e306b1f35b2029450093163d476 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 25 Feb 2016 16:36:44 -0800 Subject: [PATCH 09/11] lkdtm: Add read/write after free tests for buddy memory The current tests for read/write after free work on slab allocated memory. Memory straight from the buddy allocator may behave slightly differently and have a different set of parameters to test. Add tests for those cases as well. On a basic x86 boot: # echo WRITE_BUDDY_AFTER_FREE > /sys/kernel/debug/provoke-crash/DIRECT [ 22.291950] lkdtm: Performing direct entry WRITE_BUDDY_AFTER_FREE [ 22.292983] lkdtm: Writing to the buddy page before free [ 22.293950] lkdtm: Attempting bad write to the buddy page after free # echo READ_BUDDY_AFTER_FREE > /sys/kernel/debug/provoke-crash/DIRECT [ 32.375601] lkdtm: Performing direct entry READ_BUDDY_AFTER_FREE [ 32.379896] lkdtm: Value in memory before free: 12345678 [ 32.383854] lkdtm: Attempting to read from freed memory [ 32.389309] lkdtm: Buddy page was not poisoned On x86 with CONFIG_DEBUG_PAGEALLOC and debug_pagealloc=on: # echo WRITE_BUDDY_AFTER_FREE > /sys/kernel/debug/provoke-crash/DIRECT [ 17.475533] lkdtm: Performing direct entry WRITE_BUDDY_AFTER_FREE [ 17.477360] lkdtm: Writing to the buddy page before free [ 17.479089] lkdtm: Attempting bad write to the buddy page after free [ 17.480904] BUG: unable to handle kernel paging request at ffff88000ebd8000 # echo READ_BUDDY_AFTER_FREE > /sys/kernel/debug/provoke-crash/DIRECT [ 14.606433] lkdtm: Performing direct entry READ_BUDDY_AFTER_FREE [ 14.607447] lkdtm: Value in memory before free: 12345678 [ 14.608161] lkdtm: Attempting to read from freed memory [ 14.608860] BUG: unable to handle kernel paging request at ffff88000eba3000 Note that arches without ARCH_SUPPORTS_DEBUG_PAGEALLOC may not produce the same crash. Signed-off-by: Laura Abbott Signed-off-by: Kees Cook --- drivers/misc/lkdtm.c | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c index a00a2b11b821..8e00e2e992d1 100644 --- a/drivers/misc/lkdtm.c +++ b/drivers/misc/lkdtm.c @@ -93,6 +93,8 @@ enum ctype { CT_OVERWRITE_ALLOCATION, CT_WRITE_AFTER_FREE, CT_READ_AFTER_FREE, + CT_WRITE_BUDDY_AFTER_FREE, + CT_READ_BUDDY_AFTER_FREE, CT_SOFTLOCKUP, CT_HARDLOCKUP, CT_SPINLOCKUP, @@ -131,6 +133,8 @@ static char* cp_type[] = { "OVERWRITE_ALLOCATION", "WRITE_AFTER_FREE", "READ_AFTER_FREE", + "WRITE_BUDDY_AFTER_FREE", + "READ_BUDDY_AFTER_FREE", "SOFTLOCKUP", "HARDLOCKUP", "SPINLOCKUP", @@ -464,6 +468,47 @@ static void lkdtm_do_action(enum ctype which) kfree(val); break; } + case CT_WRITE_BUDDY_AFTER_FREE: { + unsigned long p = __get_free_page(GFP_KERNEL); + if (!p) + break; + pr_info("Writing to the buddy page before free\n"); + memset((void *)p, 0x3, PAGE_SIZE); + free_page(p); + schedule(); + pr_info("Attempting bad write to the buddy page after free\n"); + memset((void *)p, 0x78, PAGE_SIZE); + break; + } + case CT_READ_BUDDY_AFTER_FREE: { + unsigned long p = __get_free_page(GFP_KERNEL); + int saw, *val = kmalloc(1024, GFP_KERNEL); + int *base; + + if (!p) + break; + + if (!val) + break; + + base = (int *)p; + + *val = 0x12345678; + base[0] = *val; + pr_info("Value in memory before free: %x\n", base[0]); + free_page(p); + pr_info("Attempting to read from freed memory\n"); + saw = base[0]; + if (saw != *val) { + /* Good! Poisoning happened, so declare a win. */ + pr_info("Buddy page correctly poisoned, calling BUG\n"); + BUG(); + } + pr_info("Buddy page was not poisoned\n"); + + kfree(val); + break; + } case CT_SOFTLOCKUP: preempt_disable(); for (;;) From 5fd9e48084f5566aafb759882f549f37e5940501 Mon Sep 17 00:00:00 2001 From: David Windsor Date: Thu, 17 Dec 2015 00:56:36 -0500 Subject: [PATCH 10/11] lkdtm: add test for atomic_t underflow/overflow dmesg output of running this LKDTM test with PaX: [187095.475573] lkdtm: No crash points registered, enable through debugfs [187118.020257] lkdtm: Performing direct entry WRAP_ATOMIC [187118.030045] lkdtm: attempting atomic underflow [187118.030929] PAX: refcount overflow detected in: bash:1790, uid/euid: 0/0 [187118.071667] PAX: refcount overflow occured at: lkdtm_do_action+0x19e/0x400 [lkdtm] [187118.081423] CPU: 3 PID: 1790 Comm: bash Not tainted 4.2.6-pax-refcount-split+ #2 [187118.083403] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [187118.102596] task: ffff8800da8de040 ti: ffff8800da8e4000 task.ti: ffff8800da8e4000 [187118.111321] RIP: 0010:[] [] lkdtm_do_action+0x19e/0x400 [lkdtm] ... [187118.128074] lkdtm: attempting atomic overflow [187118.128080] PAX: refcount overflow detected in: bash:1790, uid/euid: 0/0 [187118.128082] PAX: refcount overflow occured at: lkdtm_do_action+0x1b6/0x400 [lkdtm] [187118.128085] CPU: 3 PID: 1790 Comm: bash Not tainted 4.2.6-pax-refcount-split+ #2 [187118.128086] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [187118.128088] task: ffff8800da8de040 ti: ffff8800da8e4000 task.ti: ffff8800da8e4000 [187118.128092] RIP: 0010:[] [] lkdtm_do_action+0x1b6/0x400 [lkdtm] Signed-off-by: David Windsor [cleaned up whitespacing, keescook] Signed-off-by: Kees Cook --- drivers/misc/lkdtm.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c index 8e00e2e992d1..c333e813ed34 100644 --- a/drivers/misc/lkdtm.c +++ b/drivers/misc/lkdtm.c @@ -107,6 +107,7 @@ enum ctype { CT_ACCESS_USERSPACE, CT_WRITE_RO, CT_WRITE_KERN, + CT_WRAP_ATOMIC }; static char* cp_name[] = { @@ -147,6 +148,7 @@ static char* cp_type[] = { "ACCESS_USERSPACE", "WRITE_RO", "WRITE_KERN", + "WRAP_ATOMIC" }; static struct jprobe lkdtm; @@ -620,6 +622,17 @@ static void lkdtm_do_action(enum ctype which) do_overwritten(); break; } + case CT_WRAP_ATOMIC: { + atomic_t under = ATOMIC_INIT(INT_MIN); + atomic_t over = ATOMIC_INIT(INT_MAX); + + pr_info("attempting atomic underflow\n"); + atomic_dec(&under); + pr_info("attempting atomic overflow\n"); + atomic_inc(&over); + + return; + } case CT_NONE: default: break; From 7c0ae5be821c1b6a700c5506de9b62e95f60df3c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 26 Feb 2016 15:27:35 -0800 Subject: [PATCH 11/11] lkdtm: improve use-after-free tests This improves the order of operations on the use-after-free tests to try to make sure we've executed any available sanity-checking code, and to report the poisoning that was found. Signed-off-by: Kees Cook --- drivers/misc/lkdtm.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c index c333e813ed34..9345999f5673 100644 --- a/drivers/misc/lkdtm.c +++ b/drivers/misc/lkdtm.c @@ -417,7 +417,7 @@ static void lkdtm_do_action(enum ctype which) break; } case CT_WRITE_AFTER_FREE: { - int *base; + int *base, *again; size_t len = 1024; /* * The slub allocator uses the first word to store the free @@ -428,10 +428,16 @@ static void lkdtm_do_action(enum ctype which) base = kmalloc(len, GFP_KERNEL); pr_info("Allocated memory %p-%p\n", base, &base[offset * 2]); - kfree(base); pr_info("Attempting bad write to freed memory at %p\n", &base[offset]); + kfree(base); base[offset] = 0x0abcdef0; + /* Attempt to notice the overwrite. */ + again = kmalloc(len, GFP_KERNEL); + kfree(again); + if (again != base) + pr_info("Hmm, didn't get the same memory range.\n"); + break; } case CT_READ_AFTER_FREE: { @@ -462,7 +468,7 @@ static void lkdtm_do_action(enum ctype which) saw = base[offset]; if (saw != *val) { /* Good! Poisoning happened, so declare a win. */ - pr_info("Memory correctly poisoned, calling BUG\n"); + pr_info("Memory correctly poisoned (%x)\n", saw); BUG(); } pr_info("Memory was not poisoned\n"); @@ -480,6 +486,11 @@ static void lkdtm_do_action(enum ctype which) schedule(); pr_info("Attempting bad write to the buddy page after free\n"); memset((void *)p, 0x78, PAGE_SIZE); + /* Attempt to notice the overwrite. */ + p = __get_free_page(GFP_KERNEL); + free_page(p); + schedule(); + break; } case CT_READ_BUDDY_AFTER_FREE: { @@ -503,7 +514,7 @@ static void lkdtm_do_action(enum ctype which) saw = base[0]; if (saw != *val) { /* Good! Poisoning happened, so declare a win. */ - pr_info("Buddy page correctly poisoned, calling BUG\n"); + pr_info("Memory correctly poisoned (%x)\n", saw); BUG(); } pr_info("Buddy page was not poisoned\n");