linux-stable/arch/arm/kernel/process.c

445 lines
10 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/arch/arm/kernel/process.c
*
* Copyright (C) 1996-2000 Russell King - Converted to ARM.
* Original Copyright (C) 1995 Linus Torvalds
*/
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/user.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/elfcore.h>
#include <linux/pm.h>
#include <linux/tick.h>
#include <linux/utsname.h>
#include <linux/uaccess.h>
#include <linux/random.h>
#include <linux/hw_breakpoint.h>
#include <linux/leds.h>
#include <asm/processor.h>
#include <asm/thread_notify.h>
#include <asm/stacktrace.h>
#include <asm/system_misc.h>
#include <asm/mach/time.h>
#include <asm/tls.h>
#include <asm/vdso.h>
#include "signal.h"
#if defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || defined(CONFIG_SMP)
DEFINE_PER_CPU(struct task_struct *, __entry_task);
#endif
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
#ifndef CONFIG_CURRENT_POINTER_IN_TPIDRURO
asmlinkage struct task_struct *__current;
EXPORT_SYMBOL(__current);
#endif
static const char *processor_modes[] __maybe_unused = {
"USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , "UK4_26" , "UK5_26" , "UK6_26" , "UK7_26" ,
"UK8_26" , "UK9_26" , "UK10_26", "UK11_26", "UK12_26", "UK13_26", "UK14_26", "UK15_26",
"USER_32", "FIQ_32" , "IRQ_32" , "SVC_32" , "UK4_32" , "UK5_32" , "MON_32" , "ABT_32" ,
"UK8_32" , "UK9_32" , "HYP_32", "UND_32" , "UK12_32", "UK13_32", "UK14_32", "SYS_32"
};
static const char *isa_modes[] __maybe_unused = {
"ARM" , "Thumb" , "Jazelle", "ThumbEE"
};
/*
* This is our default idle handler.
*/
void (*arm_pm_idle)(void);
/*
* Called from the core idle loop.
*/
void arch_cpu_idle(void)
{
if (arm_pm_idle)
arm_pm_idle();
else
cpu_do_idle();
raw_local_irq_enable();
}
void arch_cpu_idle_prepare(void)
{
local_fiq_enable();
}
void arch_cpu_idle_enter(void)
{
ledtrig_cpu(CPU_LED_IDLE_START);
#ifdef CONFIG_PL310_ERRATA_769419
wmb();
#endif
}
void arch_cpu_idle_exit(void)
{
ledtrig_cpu(CPU_LED_IDLE_END);
}
arm: print alloc free paths for address in registers In case of a use after free kernel oops, the freeing path of the object is required to debug futher. In most of cases the object address is present in one of the registers. Thus check the register's address and if it belongs to slab, print its alloc and free path. e.g. in the below issue register r6 belongs to slab, and a use after free issue occurred on one of its dereferenced values: Unable to handle kernel paging request at virtual address 6b6b6b6f .... pc : [<c0538afc>] lr : [<c0465674>] psr: 60000013 sp : c8927d40 ip : ffffefff fp : c8aa8020 r10: c8927e10 r9 : 00000001 r8 : 00400cc0 r7 : 00000000 r6 : c8ab0180 r5 : c1804a80 r4 : c8aa8008 r3 : c1a5661c r2 : 00000000 r1 : 6b6b6b6b r0 : c139bf48 ..... Register r6 information: slab kmalloc-64 start c8ab0140 data offset 64 pointer offset 0 size 64 allocated at meminfo_proc_show+0x40/0x4fc meminfo_proc_show+0x40/0x4fc seq_read_iter+0x18c/0x4c4 proc_reg_read_iter+0x84/0xac generic_file_splice_read+0xe8/0x17c splice_direct_to_actor+0xb8/0x290 do_splice_direct+0xa0/0xe0 do_sendfile+0x2d0/0x438 sys_sendfile64+0x12c/0x140 ret_fast_syscall+0x0/0x58 0xbeeacde4 Free path: meminfo_proc_show+0x5c/0x4fc seq_read_iter+0x18c/0x4c4 proc_reg_read_iter+0x84/0xac generic_file_splice_read+0xe8/0x17c splice_direct_to_actor+0xb8/0x290 do_splice_direct+0xa0/0xe0 do_sendfile+0x2d0/0x438 sys_sendfile64+0x12c/0x140 ret_fast_syscall+0x0/0x58 0xbeeacde4 Link: https://lkml.kernel.org/r/1615891032-29160-3-git-send-email-maninder1.s@samsung.com Co-developed-by: Vaneet Narang <v.narang@samsung.com> Signed-off-by: Vaneet Narang <v.narang@samsung.com> Signed-off-by: Maninder Singh <maninder1.s@samsung.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Pekka Enberg <penberg@kernel.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-05-07 01:06:09 +00:00
void __show_regs_alloc_free(struct pt_regs *regs)
{
int i;
/* check for r0 - r12 only */
for (i = 0; i < 13; i++) {
pr_alert("Register r%d information:", i);
mem_dump_obj((void *)regs->uregs[i]);
}
}
void __show_regs(struct pt_regs *regs)
{
unsigned long flags;
char buf[64];
#ifndef CONFIG_CPU_V7M
unsigned int domain;
#ifdef CONFIG_CPU_SW_DOMAIN_PAN
/*
* Get the domain register for the parent context. In user
* mode, we don't save the DACR, so lets use what it should
* be. For other modes, we place it after the pt_regs struct.
*/
if (user_mode(regs)) {
domain = DACR_UACCESS_ENABLE;
} else {
domain = to_svc_pt_regs(regs)->dacr;
}
#else
domain = get_domain();
#endif
#endif
dump_stack: unify debug information printed by show_regs() show_regs() is inherently arch-dependent but it does make sense to print generic debug information and some archs already do albeit in slightly different forms. This patch introduces a generic function to print debug information from show_regs() so that different archs print out the same information and it's much easier to modify what's printed. show_regs_print_info() prints out the same debug info as dump_stack() does plus task and thread_info pointers. * Archs which didn't print debug info now do. alpha, arc, blackfin, c6x, cris, frv, h8300, hexagon, ia64, m32r, metag, microblaze, mn10300, openrisc, parisc, score, sh64, sparc, um, xtensa * Already prints debug info. Replaced with show_regs_print_info(). The printed information is superset of what used to be there. arm, arm64, avr32, mips, powerpc, sh32, tile, unicore32, x86 * s390 is special in that it used to print arch-specific information along with generic debug info. Heiko and Martin think that the arch-specific extra isn't worth keeping s390 specfic implementation. Converted to use the generic version. Note that now all archs print the debug info before actual register dumps. An example BUG() dump follows. kernel BUG at /work/os/work/kernel/workqueue.c:4841! invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.9.0-rc1-work+ #7 Hardware name: empty empty/S3992, BIOS 080011 10/26/2007 task: ffff88007c85e040 ti: ffff88007c860000 task.ti: ffff88007c860000 RIP: 0010:[<ffffffff8234a07e>] [<ffffffff8234a07e>] init_workqueues+0x4/0x6 RSP: 0000:ffff88007c861ec8 EFLAGS: 00010246 RAX: ffff88007c861fd8 RBX: ffffffff824466a8 RCX: 0000000000000001 RDX: 0000000000000046 RSI: 0000000000000001 RDI: ffffffff8234a07a RBP: ffff88007c861ec8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff8234a07a R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88007dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffff88015f7ff000 CR3: 00000000021f1000 CR4: 00000000000007f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffff88007c861ef8 ffffffff81000312 ffffffff824466a8 ffff88007c85e650 0000000000000003 0000000000000000 ffff88007c861f38 ffffffff82335e5d ffff88007c862080 ffffffff8223d8c0 ffff88007c862080 ffffffff81c47760 Call Trace: [<ffffffff81000312>] do_one_initcall+0x122/0x170 [<ffffffff82335e5d>] kernel_init_freeable+0x9b/0x1c8 [<ffffffff81c47760>] ? rest_init+0x140/0x140 [<ffffffff81c4776e>] kernel_init+0xe/0xf0 [<ffffffff81c6be9c>] ret_from_fork+0x7c/0xb0 [<ffffffff81c47760>] ? rest_init+0x140/0x140 ... v2: Typo fix in x86-32. v3: CPU number dropped from show_regs_print_info() as dump_stack_print_info() has been updated to print it. s390 specific implementation dropped as requested by s390 maintainers. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: David S. Miller <davem@davemloft.net> Acked-by: Jesper Nilsson <jesper.nilsson@axis.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: Mike Frysinger <vapier@gentoo.org> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Sam Ravnborg <sam@ravnborg.org> Acked-by: Chris Metcalf <cmetcalf@tilera.com> [tile bits] Acked-by: Richard Kuo <rkuo@codeaurora.org> [hexagon bits] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-04-30 22:27:17 +00:00
show_regs_print_info(KERN_DEFAULT);
printk("PC is at %pS\n", (void *)instruction_pointer(regs));
printk("LR is at %pS\n", (void *)regs->ARM_lr);
printk("pc : [<%08lx>] lr : [<%08lx>] psr: %08lx\n",
regs->ARM_pc, regs->ARM_lr, regs->ARM_cpsr);
printk("sp : %08lx ip : %08lx fp : %08lx\n",
regs->ARM_sp, regs->ARM_ip, regs->ARM_fp);
printk("r10: %08lx r9 : %08lx r8 : %08lx\n",
regs->ARM_r10, regs->ARM_r9,
regs->ARM_r8);
printk("r7 : %08lx r6 : %08lx r5 : %08lx r4 : %08lx\n",
regs->ARM_r7, regs->ARM_r6,
regs->ARM_r5, regs->ARM_r4);
printk("r3 : %08lx r2 : %08lx r1 : %08lx r0 : %08lx\n",
regs->ARM_r3, regs->ARM_r2,
regs->ARM_r1, regs->ARM_r0);
flags = regs->ARM_cpsr;
buf[0] = flags & PSR_N_BIT ? 'N' : 'n';
buf[1] = flags & PSR_Z_BIT ? 'Z' : 'z';
buf[2] = flags & PSR_C_BIT ? 'C' : 'c';
buf[3] = flags & PSR_V_BIT ? 'V' : 'v';
buf[4] = '\0';
#ifndef CONFIG_CPU_V7M
{
const char *segment;
if ((domain & domain_mask(DOMAIN_USER)) ==
domain_val(DOMAIN_USER, DOMAIN_NOACCESS))
segment = "none";
else
segment = "user";
printk("Flags: %s IRQs o%s FIQs o%s Mode %s ISA %s Segment %s\n",
buf, interrupts_enabled(regs) ? "n" : "ff",
fast_interrupts_enabled(regs) ? "n" : "ff",
processor_modes[processor_mode(regs)],
isa_modes[isa_mode(regs)], segment);
}
#else
printk("xPSR: %08lx\n", regs->ARM_cpsr);
#endif
#ifdef CONFIG_CPU_CP15
{
unsigned int ctrl;
buf[0] = '\0';
#ifdef CONFIG_CPU_CP15_MMU
{
unsigned int transbase;
asm("mrc p15, 0, %0, c2, c0\n\t"
: "=r" (transbase));
snprintf(buf, sizeof(buf), " Table: %08x DAC: %08x",
transbase, domain);
}
#endif
asm("mrc p15, 0, %0, c1, c0\n" : "=r" (ctrl));
printk("Control: %08x%s\n", ctrl, buf);
}
#endif
}
void show_regs(struct pt_regs * regs)
{
__show_regs(regs);
ARM: 9224/1: Dump the stack traces based on the parameter 'regs' of show_regs() Function show_regs() is usually called in interrupt handler or exception handler, it prints the registers specified by the parameter 'regs', then dump the stack traces. Although not explicitly documented, dump the stack traces based on'regs' seems to make the most sense. Although dump_stack() can finally dump the desired content, because 'regs' are saved by the entry of current interrupt or exception. In the following example we can see: 1) The backtrace of interrupt or exception handler is not expected, it causes confusion. 2) Something is printed repeatedly. The line with the kernel version "CPU: 0 PID: 70 Comm: test0 Not tainted 5.19.0+ #8", the registers saved in "Exception stack" which 'regs' actually point to. For example: rcu: INFO: rcu_sched self-detected stall on CPU rcu: 0-....: (499 ticks this GP) idle=379/1/0x40000002 softirq=91/91 fqs=249 (t=500 jiffies g=-911 q=13 ncpus=4) CPU: 0 PID: 70 Comm: test0 Not tainted 5.19.0+ #8 Hardware name: ARM-Versatile Express PC is at ktime_get+0x4c/0xe8 LR is at ktime_get+0x4c/0xe8 pc : 8019a474 lr : 8019a474 psr: 60000013 sp : cabd1f28 ip : 00000001 fp : 00000005 r10: 527bf1b8 r9 : 431bde82 r8 : d7b634db r7 : 0000156e r6 : 61f234f8 r5 : 00000001 r4 : 80ca86c0 r3 : ffffffff r2 : fe5bce0b r1 : 00000000 r0 : 01a431f4 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 6121406a DAC: 00000051 CPU: 0 PID: 70 Comm: test0 Not tainted 5.19.0+ #8 <-----------start---------- Hardware name: ARM-Versatile Express | unwind_backtrace from show_stack+0x10/0x14 | show_stack from dump_stack_lvl+0x40/0x4c | dump_stack_lvl from rcu_dump_cpu_stacks+0x10c/0x134 | rcu_dump_cpu_stacks from rcu_sched_clock_irq+0x780/0xaf4 | rcu_sched_clock_irq from update_process_times+0x54/0x74 | update_process_times from tick_periodic+0x3c/0xd4 | tick_periodic from tick_handle_periodic+0x20/0x80 worthless tick_handle_periodic from twd_handler+0x30/0x40 or twd_handler from handle_percpu_devid_irq+0x8c/0x1c8 duplicated handle_percpu_devid_irq from generic_handle_domain_irq+0x24/0x34 | generic_handle_domain_irq from gic_handle_irq+0x74/0x88 | gic_handle_irq from generic_handle_arch_irq+0x34/0x44 | generic_handle_arch_irq from call_with_stack+0x18/0x20 | call_with_stack from __irq_svc+0x98/0xb0 | Exception stack(0xcabd1ed8 to 0xcabd1f20) | 1ec0: 01a431f4 00000000 | 1ee0: fe5bce0b ffffffff 80ca86c0 00000001 61f234f8 0000156e d7b634db 431bde82 | 1f00: 527bf1b8 00000005 00000001 cabd1f28 8019a474 8019a474 60000013 ffffffff | __irq_svc from ktime_get+0x4c/0xe8 <---------end-------------- ktime_get from test_task+0x44/0x110 test_task from kthread+0xd8/0xf4 kthread from ret_from_fork+0x14/0x2c Exception stack(0xcabd1fb0 to 0xcabd1ff8) 1fa0: 00000000 00000000 00000000 00000000 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 After replacing dump_stack() with dump_backtrace(): rcu: INFO: rcu_sched self-detected stall on CPU rcu: 0-....: (500 ticks this GP) idle=8f7/1/0x40000002 softirq=129/129 fqs=241 (t=500 jiffies g=-915 q=13 ncpus=4) CPU: 0 PID: 69 Comm: test0 Not tainted 5.19.0+ #9 Hardware name: ARM-Versatile Express PC is at ktime_get+0x4c/0xe8 LR is at ktime_get+0x4c/0xe8 pc : 8019a494 lr : 8019a494 psr: 60000013 sp : cabddf28 ip : 00000001 fp : 00000002 r10: 0779cb48 r9 : 431bde82 r8 : d7b634db r7 : 00000a66 r6 : e835ab70 r5 : 00000001 r4 : 80ca86c0 r3 : ffffffff r2 : ff337d39 r1 : 00000000 r0 : 00cc82c6 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 611d006a DAC: 00000051 ktime_get from test_task+0x44/0x110 test_task from kthread+0xd8/0xf4 kthread from ret_from_fork+0x14/0x2c Exception stack(0xcabddfb0 to 0xcabddff8) dfa0: 00000000 00000000 00000000 00000000 dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 dfe0: 00000000 00000000 00000000 00000000 00000013 00000000 Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com> Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-08-03 07:44:58 +00:00
dump_backtrace(regs, NULL, KERN_DEFAULT);
}
ATOMIC_NOTIFIER_HEAD(thread_notify_head);
EXPORT_SYMBOL_GPL(thread_notify_head);
/*
* Free current thread data structures etc..
*/
exit_thread: accept a task parameter to be exited We need to call exit_thread from copy_process in a fail path. So make it accept task_struct as a parameter. [v2] * s390: exit_thread_runtime_instr doesn't make sense to be called for non-current tasks. * arm: fix the comment in vfp_thread_copy * change 'me' to 'tsk' for task_struct * now we can change only archs that actually have exit_thread [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Jiri Slaby <jslaby@suse.cz> Cc: "David S. Miller" <davem@davemloft.net> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Aurelien Jacquiot <a-jacquiot@ti.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chen Liqin <liqin.linux@gmail.com> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: Chris Zankel <chris@zankel.net> Cc: David Howells <dhowells@redhat.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Guan Xuetao <gxt@mprc.pku.edu.cn> Cc: Haavard Skinnemoen <hskinnemoen@gmail.com> Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: James Hogan <james.hogan@imgtec.com> Cc: Jeff Dike <jdike@addtoit.com> Cc: Jesper Nilsson <jesper.nilsson@axis.com> Cc: Jiri Slaby <jslaby@suse.cz> Cc: Jonas Bonn <jonas@southpole.se> Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com> Cc: Lennox Wu <lennox.wu@gmail.com> Cc: Ley Foon Tan <lftan@altera.com> Cc: Mark Salter <msalter@redhat.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Mikael Starvik <starvik@axis.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Rich Felker <dalias@libc.org> Cc: Richard Henderson <rth@twiddle.net> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: Richard Weinberger <richard@nod.at> Cc: Russell King <linux@arm.linux.org.uk> Cc: Steven Miao <realmz6@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-05-21 00:00:20 +00:00
void exit_thread(struct task_struct *tsk)
{
exit_thread: accept a task parameter to be exited We need to call exit_thread from copy_process in a fail path. So make it accept task_struct as a parameter. [v2] * s390: exit_thread_runtime_instr doesn't make sense to be called for non-current tasks. * arm: fix the comment in vfp_thread_copy * change 'me' to 'tsk' for task_struct * now we can change only archs that actually have exit_thread [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Jiri Slaby <jslaby@suse.cz> Cc: "David S. Miller" <davem@davemloft.net> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Aurelien Jacquiot <a-jacquiot@ti.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chen Liqin <liqin.linux@gmail.com> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: Chris Zankel <chris@zankel.net> Cc: David Howells <dhowells@redhat.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Guan Xuetao <gxt@mprc.pku.edu.cn> Cc: Haavard Skinnemoen <hskinnemoen@gmail.com> Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: James Hogan <james.hogan@imgtec.com> Cc: Jeff Dike <jdike@addtoit.com> Cc: Jesper Nilsson <jesper.nilsson@axis.com> Cc: Jiri Slaby <jslaby@suse.cz> Cc: Jonas Bonn <jonas@southpole.se> Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com> Cc: Lennox Wu <lennox.wu@gmail.com> Cc: Ley Foon Tan <lftan@altera.com> Cc: Mark Salter <msalter@redhat.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Mikael Starvik <starvik@axis.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Rich Felker <dalias@libc.org> Cc: Richard Henderson <rth@twiddle.net> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: Richard Weinberger <richard@nod.at> Cc: Russell King <linux@arm.linux.org.uk> Cc: Steven Miao <realmz6@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-05-21 00:00:20 +00:00
thread_notify(THREAD_NOTIFY_EXIT, task_thread_info(tsk));
}
void flush_thread(void)
{
struct thread_info *thread = current_thread_info();
struct task_struct *tsk = current;
flush_ptrace_hw_breakpoint(tsk);
memset(thread->used_cp, 0, sizeof(thread->used_cp));
memset(&tsk->thread.debug, 0, sizeof(struct debug_info));
memset(&thread->fpstate, 0, sizeof(union fp_state));
flush_tls();
thread_notify(THREAD_NOTIFY_FLUSH, thread);
}
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
unsigned long clone_flags = args->flags;
unsigned long stack_start = args->stack;
unsigned long tls = args->tls;
struct thread_info *thread = task_thread_info(p);
struct pt_regs *childregs = task_pt_regs(p);
memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
#ifdef CONFIG_CPU_USE_DOMAINS
/*
* Copy the initial value of the domain access control register
* from the current thread: thread->addr_limit will have been
* copied from the current thread via setup_thread_stack() in
* kernel/fork.c
*/
thread->cpu_domain = get_domain();
#endif
if (likely(!args->fn)) {
*childregs = *current_pt_regs();
childregs->ARM_r0 = 0;
if (stack_start)
childregs->ARM_sp = stack_start;
} else {
memset(childregs, 0, sizeof(struct pt_regs));
thread->cpu_context.r4 = (unsigned long)args->fn_arg;
thread->cpu_context.r5 = (unsigned long)args->fn;
childregs->ARM_cpsr = SVC_MODE;
}
thread->cpu_context.pc = (unsigned long)ret_from_fork;
thread->cpu_context.sp = (unsigned long)childregs;
clear_ptrace_hw_breakpoint(p);
if (clone_flags & CLONE_SETTLS)
thread->tp_value[0] = tls;
thread->tp_value[1] = get_tpuser();
thread_notify(THREAD_NOTIFY_COPY, thread);
return 0;
}
unsigned long __get_wchan(struct task_struct *p)
{
struct stackframe frame;
unsigned long stack_page;
int count = 0;
frame.fp = thread_saved_fp(p);
frame.sp = thread_saved_sp(p);
frame.lr = 0; /* recovered from the stack */
frame.pc = thread_saved_pc(p);
stack_page = (unsigned long)task_stack_page(p);
do {
if (frame.sp < stack_page ||
frame.sp >= stack_page + THREAD_SIZE ||
unwind_frame(&frame) < 0)
return 0;
if (!in_sched_functions(frame.pc))
return frame.pc;
} while (count ++ < 16);
return 0;
}
#ifdef CONFIG_MMU
#ifdef CONFIG_KUSER_HELPERS
/*
* The vectors page is always readable from user space for the
* atomic helpers. Insert it into the gate_vma so that it is visible
* through ptrace and /proc/<pid>/mem.
*/
static struct vm_area_struct gate_vma;
static int __init gate_vma_init(void)
{
vma_init(&gate_vma, NULL);
gate_vma.vm_page_prot = PAGE_READONLY_EXEC;
gate_vma.vm_start = 0xffff0000;
gate_vma.vm_end = 0xffff0000 + PAGE_SIZE;
gate_vma.vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC;
return 0;
}
arch_initcall(gate_vma_init);
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
return &gate_vma;
}
int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
return (addr >= gate_vma.vm_start) && (addr < gate_vma.vm_end);
}
int in_gate_area_no_mm(unsigned long addr)
{
return in_gate_area(NULL, addr);
}
#define is_gate_vma(vma) ((vma) == &gate_vma)
#else
#define is_gate_vma(vma) 0
#endif
const char *arch_vma_name(struct vm_area_struct *vma)
{
return is_gate_vma(vma) ? "[vectors]" : NULL;
}
/* If possible, provide a placement hint at a random offset from the
ARM: 8331/1: VDSO initialization, mapping, and synchronization Initialize the VDSO page list at boot, install the VDSO mapping at exec time, and update the data page during timer ticks. This code is not built if CONFIG_VDSO is not enabled. Account for the VDSO length when randomizing the offset from the stack. The [vdso] and [vvar] pages are placed immediately following the sigpage with separate _install_special_mapping calls. We want to "penalize" systems lacking the arch timer as little as possible. Previous versions of this code installed the VDSO unconditionally and unmodified, making it a measurably slower way for glibc to invoke the real syscalls on such systems. E.g. calling gettimeofday via glibc goes from ~560ns to ~630ns on i.MX6Q. If we can indicate to glibc that the time-related APIs in the VDSO are not accelerated, glibc can continue to invoke the syscalls directly instead of dispatching through the VDSO only to fall back to the slow path. Thus, if the architected timer is unusable for whatever reason, patch the VDSO at boot time so that symbol lookups for gettimeofday and clock_gettime return NULL. (This is similar to what powerpc does and borrows code from there.) This allows glibc to perform the syscall directly instead of passing control to the VDSO, which minimizes the penalty. In my measurements the time taken for a gettimeofday call via glibc goes from ~560ns to ~580ns (again on i.MX6Q), and this is solely due to adding a test and branch to glibc's gettimeofday syscall wrapper. An alternative to patching the VDSO at boot would be to not install the VDSO at all when the arch timer isn't usable. Another alternative is to include a separate "dummy" vdso.so without gettimeofday and clock_gettime, which would be selected at boot time. Either of these would get cumbersome if the VDSO were to gain support for an API such as getcpu which is unrelated to arch timer support. Signed-off-by: Nathan Lynch <nathan_lynch@mentor.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2015-03-25 18:15:08 +00:00
* stack for the sigpage and vdso pages.
*/
static unsigned long sigpage_addr(const struct mm_struct *mm,
unsigned int npages)
{
unsigned long offset;
unsigned long first;
unsigned long last;
unsigned long addr;
unsigned int slots;
first = PAGE_ALIGN(mm->start_stack);
last = TASK_SIZE - (npages << PAGE_SHIFT);
/* No room after stack? */
if (first > last)
return 0;
/* Just enough room? */
if (first == last)
return first;
slots = ((last - first) >> PAGE_SHIFT) + 1;
offset = get_random_u32_below(slots);
addr = first + (offset << PAGE_SHIFT);
return addr;
}
static struct page *signal_page;
extern struct page *get_signal_page(void);
static int sigpage_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma)
{
current->mm->context.sigpage = new_vma->vm_start;
return 0;
}
static const struct vm_special_mapping sigpage_mapping = {
.name = "[sigpage]",
.pages = &signal_page,
.mremap = sigpage_mremap,
};
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
ARM: 8331/1: VDSO initialization, mapping, and synchronization Initialize the VDSO page list at boot, install the VDSO mapping at exec time, and update the data page during timer ticks. This code is not built if CONFIG_VDSO is not enabled. Account for the VDSO length when randomizing the offset from the stack. The [vdso] and [vvar] pages are placed immediately following the sigpage with separate _install_special_mapping calls. We want to "penalize" systems lacking the arch timer as little as possible. Previous versions of this code installed the VDSO unconditionally and unmodified, making it a measurably slower way for glibc to invoke the real syscalls on such systems. E.g. calling gettimeofday via glibc goes from ~560ns to ~630ns on i.MX6Q. If we can indicate to glibc that the time-related APIs in the VDSO are not accelerated, glibc can continue to invoke the syscalls directly instead of dispatching through the VDSO only to fall back to the slow path. Thus, if the architected timer is unusable for whatever reason, patch the VDSO at boot time so that symbol lookups for gettimeofday and clock_gettime return NULL. (This is similar to what powerpc does and borrows code from there.) This allows glibc to perform the syscall directly instead of passing control to the VDSO, which minimizes the penalty. In my measurements the time taken for a gettimeofday call via glibc goes from ~560ns to ~580ns (again on i.MX6Q), and this is solely due to adding a test and branch to glibc's gettimeofday syscall wrapper. An alternative to patching the VDSO at boot would be to not install the VDSO at all when the arch timer isn't usable. Another alternative is to include a separate "dummy" vdso.so without gettimeofday and clock_gettime, which would be selected at boot time. Either of these would get cumbersome if the VDSO were to gain support for an API such as getcpu which is unrelated to arch timer support. Signed-off-by: Nathan Lynch <nathan_lynch@mentor.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2015-03-25 18:15:08 +00:00
unsigned long npages;
unsigned long addr;
unsigned long hint;
int ret = 0;
if (!signal_page)
signal_page = get_signal_page();
if (!signal_page)
return -ENOMEM;
ARM: 8331/1: VDSO initialization, mapping, and synchronization Initialize the VDSO page list at boot, install the VDSO mapping at exec time, and update the data page during timer ticks. This code is not built if CONFIG_VDSO is not enabled. Account for the VDSO length when randomizing the offset from the stack. The [vdso] and [vvar] pages are placed immediately following the sigpage with separate _install_special_mapping calls. We want to "penalize" systems lacking the arch timer as little as possible. Previous versions of this code installed the VDSO unconditionally and unmodified, making it a measurably slower way for glibc to invoke the real syscalls on such systems. E.g. calling gettimeofday via glibc goes from ~560ns to ~630ns on i.MX6Q. If we can indicate to glibc that the time-related APIs in the VDSO are not accelerated, glibc can continue to invoke the syscalls directly instead of dispatching through the VDSO only to fall back to the slow path. Thus, if the architected timer is unusable for whatever reason, patch the VDSO at boot time so that symbol lookups for gettimeofday and clock_gettime return NULL. (This is similar to what powerpc does and borrows code from there.) This allows glibc to perform the syscall directly instead of passing control to the VDSO, which minimizes the penalty. In my measurements the time taken for a gettimeofday call via glibc goes from ~560ns to ~580ns (again on i.MX6Q), and this is solely due to adding a test and branch to glibc's gettimeofday syscall wrapper. An alternative to patching the VDSO at boot would be to not install the VDSO at all when the arch timer isn't usable. Another alternative is to include a separate "dummy" vdso.so without gettimeofday and clock_gettime, which would be selected at boot time. Either of these would get cumbersome if the VDSO were to gain support for an API such as getcpu which is unrelated to arch timer support. Signed-off-by: Nathan Lynch <nathan_lynch@mentor.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2015-03-25 18:15:08 +00:00
npages = 1; /* for sigpage */
npages += vdso_total_pages;
mmap locking API: use coccinelle to convert mmap_sem rwsem call sites This change converts the existing mmap_sem rwsem calls to use the new mmap locking API instead. The change is generated using coccinelle with the following rule: // spatch --sp-file mmap_lock_api.cocci --in-place --include-headers --dir . @@ expression mm; @@ ( -init_rwsem +mmap_init_lock | -down_write +mmap_write_lock | -down_write_killable +mmap_write_lock_killable | -down_write_trylock +mmap_write_trylock | -up_write +mmap_write_unlock | -downgrade_write +mmap_write_downgrade | -down_read +mmap_read_lock | -down_read_killable +mmap_read_lock_killable | -down_read_trylock +mmap_read_trylock | -up_read +mmap_read_unlock ) -(&mm->mmap_sem) +(mm) Signed-off-by: Michel Lespinasse <walken@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com> Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Cc: Davidlohr Bueso <dbueso@suse.de> Cc: David Rientjes <rientjes@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Jerome Glisse <jglisse@redhat.com> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Liam Howlett <Liam.Howlett@oracle.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ying Han <yinghan@google.com> Link: http://lkml.kernel.org/r/20200520052908.204642-5-walken@google.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-09 04:33:25 +00:00
if (mmap_write_lock_killable(mm))
return -EINTR;
ARM: 8331/1: VDSO initialization, mapping, and synchronization Initialize the VDSO page list at boot, install the VDSO mapping at exec time, and update the data page during timer ticks. This code is not built if CONFIG_VDSO is not enabled. Account for the VDSO length when randomizing the offset from the stack. The [vdso] and [vvar] pages are placed immediately following the sigpage with separate _install_special_mapping calls. We want to "penalize" systems lacking the arch timer as little as possible. Previous versions of this code installed the VDSO unconditionally and unmodified, making it a measurably slower way for glibc to invoke the real syscalls on such systems. E.g. calling gettimeofday via glibc goes from ~560ns to ~630ns on i.MX6Q. If we can indicate to glibc that the time-related APIs in the VDSO are not accelerated, glibc can continue to invoke the syscalls directly instead of dispatching through the VDSO only to fall back to the slow path. Thus, if the architected timer is unusable for whatever reason, patch the VDSO at boot time so that symbol lookups for gettimeofday and clock_gettime return NULL. (This is similar to what powerpc does and borrows code from there.) This allows glibc to perform the syscall directly instead of passing control to the VDSO, which minimizes the penalty. In my measurements the time taken for a gettimeofday call via glibc goes from ~560ns to ~580ns (again on i.MX6Q), and this is solely due to adding a test and branch to glibc's gettimeofday syscall wrapper. An alternative to patching the VDSO at boot would be to not install the VDSO at all when the arch timer isn't usable. Another alternative is to include a separate "dummy" vdso.so without gettimeofday and clock_gettime, which would be selected at boot time. Either of these would get cumbersome if the VDSO were to gain support for an API such as getcpu which is unrelated to arch timer support. Signed-off-by: Nathan Lynch <nathan_lynch@mentor.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2015-03-25 18:15:08 +00:00
hint = sigpage_addr(mm, npages);
addr = get_unmapped_area(NULL, hint, npages << PAGE_SHIFT, 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
}
vma = _install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
&sigpage_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto up_fail;
}
mm->context.sigpage = addr;
ARM: 8331/1: VDSO initialization, mapping, and synchronization Initialize the VDSO page list at boot, install the VDSO mapping at exec time, and update the data page during timer ticks. This code is not built if CONFIG_VDSO is not enabled. Account for the VDSO length when randomizing the offset from the stack. The [vdso] and [vvar] pages are placed immediately following the sigpage with separate _install_special_mapping calls. We want to "penalize" systems lacking the arch timer as little as possible. Previous versions of this code installed the VDSO unconditionally and unmodified, making it a measurably slower way for glibc to invoke the real syscalls on such systems. E.g. calling gettimeofday via glibc goes from ~560ns to ~630ns on i.MX6Q. If we can indicate to glibc that the time-related APIs in the VDSO are not accelerated, glibc can continue to invoke the syscalls directly instead of dispatching through the VDSO only to fall back to the slow path. Thus, if the architected timer is unusable for whatever reason, patch the VDSO at boot time so that symbol lookups for gettimeofday and clock_gettime return NULL. (This is similar to what powerpc does and borrows code from there.) This allows glibc to perform the syscall directly instead of passing control to the VDSO, which minimizes the penalty. In my measurements the time taken for a gettimeofday call via glibc goes from ~560ns to ~580ns (again on i.MX6Q), and this is solely due to adding a test and branch to glibc's gettimeofday syscall wrapper. An alternative to patching the VDSO at boot would be to not install the VDSO at all when the arch timer isn't usable. Another alternative is to include a separate "dummy" vdso.so without gettimeofday and clock_gettime, which would be selected at boot time. Either of these would get cumbersome if the VDSO were to gain support for an API such as getcpu which is unrelated to arch timer support. Signed-off-by: Nathan Lynch <nathan_lynch@mentor.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2015-03-25 18:15:08 +00:00
/* Unlike the sigpage, failure to install the vdso is unlikely
* to be fatal to the process, so no error check needed
* here.
*/
arm_install_vdso(mm, addr + PAGE_SIZE);
up_fail:
mmap locking API: use coccinelle to convert mmap_sem rwsem call sites This change converts the existing mmap_sem rwsem calls to use the new mmap locking API instead. The change is generated using coccinelle with the following rule: // spatch --sp-file mmap_lock_api.cocci --in-place --include-headers --dir . @@ expression mm; @@ ( -init_rwsem +mmap_init_lock | -down_write +mmap_write_lock | -down_write_killable +mmap_write_lock_killable | -down_write_trylock +mmap_write_trylock | -up_write +mmap_write_unlock | -downgrade_write +mmap_write_downgrade | -down_read +mmap_read_lock | -down_read_killable +mmap_read_lock_killable | -down_read_trylock +mmap_read_trylock | -up_read +mmap_read_unlock ) -(&mm->mmap_sem) +(mm) Signed-off-by: Michel Lespinasse <walken@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com> Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Cc: Davidlohr Bueso <dbueso@suse.de> Cc: David Rientjes <rientjes@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Jerome Glisse <jglisse@redhat.com> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Liam Howlett <Liam.Howlett@oracle.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ying Han <yinghan@google.com> Link: http://lkml.kernel.org/r/20200520052908.204642-5-walken@google.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-09 04:33:25 +00:00
mmap_write_unlock(mm);
return ret;
}
#endif