diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt index 9f9ec9f76039..4e4b6f10d841 100644 --- a/Documentation/watchdog/watchdog-parameters.txt +++ b/Documentation/watchdog/watchdog-parameters.txt @@ -400,3 +400,7 @@ wm8350_wdt: nowayout: Watchdog cannot be stopped once started (default=kernel config parameter) ------------------------------------------------- +sun4v_wdt: +timeout_ms: Watchdog timeout in milliseconds 1..180000, default=60000) +nowayout: Watchdog cannot be stopped once started +------------------------------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 403248c768d0..229c17da4568 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6581,6 +6581,11 @@ F: samples/livepatch/ L: live-patching@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/livepatching.git +LINUX KERNEL DUMP TEST MODULE (LKDTM) +M: Kees Cook +S: Maintained +F: drivers/misc/lkdtm.c + LLC (802.2) M: Arnaldo Carvalho de Melo S: Maintained diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index eaee14637d93..8496a074bd0e 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -24,7 +24,13 @@ LDFLAGS := -m elf32_sparc export BITS := 32 UTS_MACHINE := sparc +# We are adding -Wa,-Av8 to KBUILD_CFLAGS to deal with a specs bug in some +# versions of gcc. Some gcc versions won't pass -Av8 to binutils when you +# give -mcpu=v8. This silently worked with older bintutils versions but +# does not any more. KBUILD_CFLAGS += -m32 -mcpu=v8 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7 +KBUILD_CFLAGS += -Wa,-Av8 + KBUILD_AFLAGS += -m32 -Wa,-Av8 else diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index 1c26d440d288..b6de8b10a55b 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -422,8 +422,9 @@ #define __NR_listen 354 #define __NR_setsockopt 355 #define __NR_mlock2 356 +#define __NR_copy_file_range 357 -#define NR_syscalls 357 +#define NR_syscalls 358 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 33c02b15f478..a83707c83be8 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -948,7 +948,24 @@ linux_syscall_trace: cmp %o0, 0 bne 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ld [%sp + STACKFRAME_SZ + PT_G1], %g1 + sethi %hi(sys_call_table), %l7 + ld [%sp + STACKFRAME_SZ + PT_I0], %i0 + or %l7, %lo(sys_call_table), %l7 + ld [%sp + STACKFRAME_SZ + PT_I1], %i1 + ld [%sp + STACKFRAME_SZ + PT_I2], %i2 + ld [%sp + STACKFRAME_SZ + PT_I3], %i3 + ld [%sp + STACKFRAME_SZ + PT_I4], %i4 + ld [%sp + STACKFRAME_SZ + PT_I5], %i5 + cmp %g1, NR_syscalls + bgeu 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 mov %i0, %o0 + ld [%l7 + %l4], %l7 mov %i1, %o1 mov %i2, %o2 mov %i3, %o3 diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index afbaba52d2f1..d127130bf424 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -338,8 +338,9 @@ ENTRY(sun4v_mach_set_watchdog) mov %o1, %o4 mov HV_FAST_MACH_SET_WATCHDOG, %o5 ta HV_FAST_TRAP + brnz,a,pn %o4, 0f stx %o1, [%o4] - retl +0: retl nop ENDPROC(sun4v_mach_set_watchdog) diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index d88beff47bab..39aaec173f66 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -52,7 +52,7 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs) unsigned char fenab; int err; - flush_user_windows(); + synchronize_user_stack(); if (get_thread_wsaved() || (((unsigned long)ucp) & (sizeof(unsigned long)-1)) || (!__access_ok(ucp, sizeof(*ucp)))) diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c index a92d5d2c46a3..9e034f29dcc5 100644 --- a/arch/sparc/kernel/sparc_ksyms_64.c +++ b/arch/sparc/kernel/sparc_ksyms_64.c @@ -37,6 +37,7 @@ EXPORT_SYMBOL(sun4v_niagara_getperf); EXPORT_SYMBOL(sun4v_niagara_setperf); EXPORT_SYMBOL(sun4v_niagara2_getperf); EXPORT_SYMBOL(sun4v_niagara2_setperf); +EXPORT_SYMBOL(sun4v_mach_set_watchdog); /* from hweight.S */ EXPORT_SYMBOL(__arch_hweight8); diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index bb0008927598..c4a1b5c40e4e 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -158,7 +158,25 @@ linux_syscall_trace32: add %sp, PTREGS_OFF, %o0 brnz,pn %o0, 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 + sethi %hi(sys_call_table32), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 + or %l7, %lo(sys_call_table32), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 + ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2 + ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3 + ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4 + ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5 + + cmp %g1, NR_syscalls + bgeu,pn %xcc, 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 srl %i0, 0, %o0 + lduw [%l7 + %l4], %l7 srl %i4, 0, %o4 srl %i1, 0, %o1 srl %i2, 0, %o2 @@ -170,7 +188,25 @@ linux_syscall_trace: add %sp, PTREGS_OFF, %o0 brnz,pn %o0, 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 + sethi %hi(sys_call_table64), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 + or %l7, %lo(sys_call_table64), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 + ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2 + ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3 + ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4 + ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5 + + cmp %g1, NR_syscalls + bgeu,pn %xcc, 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 mov %i0, %o0 + lduw [%l7 + %l4], %l7 mov %i1, %o1 mov %i2, %o2 mov %i3, %o3 diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index e663b6c78de2..6c3dd6c52f8b 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -88,4 +88,4 @@ sys_call_table: /*340*/ .long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr /*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen -/*355*/ .long sys_setsockopt, sys_mlock2 +/*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 1557121f4cdc..12b524cfcfa0 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -89,7 +89,7 @@ sys_call_table32: /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen - .word compat_sys_setsockopt, sys_mlock2 + .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range #endif /* CONFIG_COMPAT */ @@ -170,4 +170,4 @@ sys_call_table: /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen - .word sys_setsockopt, sys_mlock2 + .word sys_setsockopt, sys_mlock2, sys_copy_file_range diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c index 5c1351b19029..fb354e8a1265 100644 --- a/drivers/misc/lkdtm.c +++ b/drivers/misc/lkdtm.c @@ -92,6 +92,9 @@ enum ctype { CT_UNALIGNED_LOAD_STORE_WRITE, CT_OVERWRITE_ALLOCATION, CT_WRITE_AFTER_FREE, + CT_READ_AFTER_FREE, + CT_WRITE_BUDDY_AFTER_FREE, + CT_READ_BUDDY_AFTER_FREE, CT_SOFTLOCKUP, CT_HARDLOCKUP, CT_SPINLOCKUP, @@ -104,6 +107,7 @@ enum ctype { CT_ACCESS_USERSPACE, CT_WRITE_RO, CT_WRITE_KERN, + CT_WRAP_ATOMIC }; static char* cp_name[] = { @@ -129,6 +133,9 @@ static char* cp_type[] = { "UNALIGNED_LOAD_STORE_WRITE", "OVERWRITE_ALLOCATION", "WRITE_AFTER_FREE", + "READ_AFTER_FREE", + "WRITE_BUDDY_AFTER_FREE", + "READ_BUDDY_AFTER_FREE", "SOFTLOCKUP", "HARDLOCKUP", "SPINLOCKUP", @@ -141,6 +148,7 @@ static char* cp_type[] = { "ACCESS_USERSPACE", "WRITE_RO", "WRITE_KERN", + "WRAP_ATOMIC" }; static struct jprobe lkdtm; @@ -409,12 +417,109 @@ static void lkdtm_do_action(enum ctype which) break; } case CT_WRITE_AFTER_FREE: { + int *base, *again; size_t len = 1024; - u32 *data = kmalloc(len, GFP_KERNEL); + /* + * The slub allocator uses the first word to store the free + * pointer in some configurations. Use the middle of the + * allocation to avoid running into the freelist + */ + size_t offset = (len / sizeof(*base)) / 2; - kfree(data); + base = kmalloc(len, GFP_KERNEL); + pr_info("Allocated memory %p-%p\n", base, &base[offset * 2]); + pr_info("Attempting bad write to freed memory at %p\n", + &base[offset]); + kfree(base); + base[offset] = 0x0abcdef0; + /* Attempt to notice the overwrite. */ + again = kmalloc(len, GFP_KERNEL); + kfree(again); + if (again != base) + pr_info("Hmm, didn't get the same memory range.\n"); + + break; + } + case CT_READ_AFTER_FREE: { + int *base, *val, saw; + size_t len = 1024; + /* + * The slub allocator uses the first word to store the free + * pointer in some configurations. Use the middle of the + * allocation to avoid running into the freelist + */ + size_t offset = (len / sizeof(*base)) / 2; + + base = kmalloc(len, GFP_KERNEL); + if (!base) + break; + + val = kmalloc(len, GFP_KERNEL); + if (!val) + break; + + *val = 0x12345678; + base[offset] = *val; + pr_info("Value in memory before free: %x\n", base[offset]); + + kfree(base); + + pr_info("Attempting bad read from freed memory\n"); + saw = base[offset]; + if (saw != *val) { + /* Good! Poisoning happened, so declare a win. */ + pr_info("Memory correctly poisoned (%x)\n", saw); + BUG(); + } + pr_info("Memory was not poisoned\n"); + + kfree(val); + break; + } + case CT_WRITE_BUDDY_AFTER_FREE: { + unsigned long p = __get_free_page(GFP_KERNEL); + if (!p) + break; + pr_info("Writing to the buddy page before free\n"); + memset((void *)p, 0x3, PAGE_SIZE); + free_page(p); schedule(); - memset(data, 0x78, len); + pr_info("Attempting bad write to the buddy page after free\n"); + memset((void *)p, 0x78, PAGE_SIZE); + /* Attempt to notice the overwrite. */ + p = __get_free_page(GFP_KERNEL); + free_page(p); + schedule(); + + break; + } + case CT_READ_BUDDY_AFTER_FREE: { + unsigned long p = __get_free_page(GFP_KERNEL); + int saw, *val = kmalloc(1024, GFP_KERNEL); + int *base; + + if (!p) + break; + + if (!val) + break; + + base = (int *)p; + + *val = 0x12345678; + base[0] = *val; + pr_info("Value in memory before free: %x\n", base[0]); + free_page(p); + pr_info("Attempting to read from freed memory\n"); + saw = base[0]; + if (saw != *val) { + /* Good! Poisoning happened, so declare a win. */ + pr_info("Memory correctly poisoned (%x)\n", saw); + BUG(); + } + pr_info("Buddy page was not poisoned\n"); + + kfree(val); break; } case CT_SOFTLOCKUP: @@ -528,6 +633,17 @@ static void lkdtm_do_action(enum ctype which) do_overwritten(); break; } + case CT_WRAP_ATOMIC: { + atomic_t under = ATOMIC_INIT(INT_MIN); + atomic_t over = ATOMIC_INIT(INT_MAX); + + pr_info("attempting atomic underflow\n"); + atomic_dec(&under); + pr_info("attempting atomic overflow\n"); + atomic_inc(&over); + + return; + } case CT_NONE: default: break; diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 86c2392bd968..9289da313d98 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1584,6 +1584,17 @@ config WATCHDOG_RIO machines. The watchdog timeout period is normally one minute but can be changed with a boot-time parameter. +config WATCHDOG_SUN4V + tristate "Sun4v Watchdog support" + select WATCHDOG_CORE + depends on SPARC64 + help + Say Y here to support the hypervisor watchdog capability embedded + in the SPARC sun4v architecture. + + To compile this driver as a module, choose M here. The module will + be called sun4v_wdt. + # XTENSA Architecture # Xen Architecture diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index efc4f788e0f2..14bd772d3e66 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -180,6 +180,7 @@ obj-$(CONFIG_SH_WDT) += shwdt.o obj-$(CONFIG_WATCHDOG_RIO) += riowd.o obj-$(CONFIG_WATCHDOG_CP1XXX) += cpwd.o +obj-$(CONFIG_WATCHDOG_SUN4V) += sun4v_wdt.o # XTENSA Architecture diff --git a/drivers/watchdog/sun4v_wdt.c b/drivers/watchdog/sun4v_wdt.c new file mode 100644 index 000000000000..1467fe50a76f --- /dev/null +++ b/drivers/watchdog/sun4v_wdt.c @@ -0,0 +1,191 @@ +/* + * sun4v watchdog timer + * (c) Copyright 2016 Oracle Corporation + * + * Implement a simple watchdog driver using the built-in sun4v hypervisor + * watchdog support. If time expires, the hypervisor stops or bounces + * the guest domain. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#define WDT_TIMEOUT 60 +#define WDT_MAX_TIMEOUT 31536000 +#define WDT_MIN_TIMEOUT 1 +#define WDT_DEFAULT_RESOLUTION_MS 1000 /* 1 second */ + +static unsigned int timeout; +module_param(timeout, uint, 0); +MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds (default=" + __MODULE_STRING(WDT_TIMEOUT) ")"); + +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, S_IRUGO); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +static int sun4v_wdt_stop(struct watchdog_device *wdd) +{ + sun4v_mach_set_watchdog(0, NULL); + + return 0; +} + +static int sun4v_wdt_ping(struct watchdog_device *wdd) +{ + int hverr; + + /* + * HV watchdog timer will round up the timeout + * passed in to the nearest multiple of the + * watchdog resolution in milliseconds. + */ + hverr = sun4v_mach_set_watchdog(wdd->timeout * 1000, NULL); + if (hverr == HV_EINVAL) + return -EINVAL; + + return 0; +} + +static int sun4v_wdt_set_timeout(struct watchdog_device *wdd, + unsigned int timeout) +{ + wdd->timeout = timeout; + + return 0; +} + +static const struct watchdog_info sun4v_wdt_ident = { + .options = WDIOF_SETTIMEOUT | + WDIOF_MAGICCLOSE | + WDIOF_KEEPALIVEPING, + .identity = "sun4v hypervisor watchdog", + .firmware_version = 0, +}; + +static struct watchdog_ops sun4v_wdt_ops = { + .owner = THIS_MODULE, + .start = sun4v_wdt_ping, + .stop = sun4v_wdt_stop, + .ping = sun4v_wdt_ping, + .set_timeout = sun4v_wdt_set_timeout, +}; + +static struct watchdog_device wdd = { + .info = &sun4v_wdt_ident, + .ops = &sun4v_wdt_ops, + .min_timeout = WDT_MIN_TIMEOUT, + .max_timeout = WDT_MAX_TIMEOUT, + .timeout = WDT_TIMEOUT, +}; + +static int __init sun4v_wdt_init(void) +{ + struct mdesc_handle *handle; + u64 node; + const u64 *value; + int err = 0; + unsigned long major = 1, minor = 1; + + /* + * There are 2 properties that can be set from the control + * domain for the watchdog. + * watchdog-resolution + * watchdog-max-timeout + * + * We can expect a handle to be returned otherwise something + * serious is wrong. Correct to return -ENODEV here. + */ + + handle = mdesc_grab(); + if (!handle) + return -ENODEV; + + node = mdesc_node_by_name(handle, MDESC_NODE_NULL, "platform"); + err = -ENODEV; + if (node == MDESC_NODE_NULL) + goto out_release; + + /* + * This is a safe way to validate if we are on the right + * platform. + */ + if (sun4v_hvapi_register(HV_GRP_CORE, major, &minor)) + goto out_hv_unreg; + + /* Allow value of watchdog-resolution up to 1s (default) */ + value = mdesc_get_property(handle, node, "watchdog-resolution", NULL); + err = -EINVAL; + if (value) { + if (*value == 0 || + *value > WDT_DEFAULT_RESOLUTION_MS) + goto out_hv_unreg; + } + + value = mdesc_get_property(handle, node, "watchdog-max-timeout", NULL); + if (value) { + /* + * If the property value (in ms) is smaller than + * min_timeout, return -EINVAL. + */ + if (*value < wdd.min_timeout * 1000) + goto out_hv_unreg; + + /* + * If the property value is smaller than + * default max_timeout then set watchdog max_timeout to + * the value of the property in seconds. + */ + if (*value < wdd.max_timeout * 1000) + wdd.max_timeout = *value / 1000; + } + + watchdog_init_timeout(&wdd, timeout, NULL); + + watchdog_set_nowayout(&wdd, nowayout); + + err = watchdog_register_device(&wdd); + if (err) + goto out_hv_unreg; + + pr_info("initialized (timeout=%ds, nowayout=%d)\n", + wdd.timeout, nowayout); + + mdesc_release(handle); + + return 0; + +out_hv_unreg: + sun4v_hvapi_unregister(HV_GRP_CORE); + +out_release: + mdesc_release(handle); + return err; +} + +static void __exit sun4v_wdt_exit(void) +{ + sun4v_hvapi_unregister(HV_GRP_CORE); + watchdog_unregister_device(&wdd); +} + +module_init(sun4v_wdt_init); +module_exit(sun4v_wdt_exit); + +MODULE_AUTHOR("Wim Coekaerts "); +MODULE_DESCRIPTION("sun4v watchdog driver"); +MODULE_LICENSE("GPL");