linux-stable/arch/loongarch/kernel/process.c

359 lines
8.7 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Author: Huacai Chen <chenhuacai@loongson.cn>
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*
* Derived from MIPS:
* Copyright (C) 1994 - 1999, 2000 by Ralf Baechle and others.
* Copyright (C) 2005, 2006 by Ralf Baechle (ralf@linux-mips.org)
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2004 Thiemo Seufer
* Copyright (C) 2013 Imagination Technologies Ltd.
*/
#include <linux/cpu.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/personality.h>
#include <linux/sys.h>
#include <linux/completion.h>
#include <linux/kallsyms.h>
#include <linux/random.h>
#include <linux/prctl.h>
#include <linux/nmi.h>
#include <asm/asm.h>
#include <asm/bootinfo.h>
#include <asm/cpu.h>
#include <asm/elf.h>
#include <asm/fpu.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/loongarch.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/reg.h>
#include <asm/unwind.h>
#include <asm/vdso.h>
/*
* Idle related variables and functions
*/
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override);
#ifdef CONFIG_HOTPLUG_CPU
void arch_cpu_idle_dead(void)
{
play_dead();
}
#endif
asmlinkage void ret_from_fork(void);
asmlinkage void ret_from_kernel_thread(void);
void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
{
unsigned long crmd;
unsigned long prmd;
unsigned long euen;
/* New thread loses kernel privileges. */
crmd = regs->csr_crmd & ~(PLV_MASK);
crmd |= PLV_USER;
regs->csr_crmd = crmd;
prmd = regs->csr_prmd & ~(PLV_MASK);
prmd |= PLV_USER;
regs->csr_prmd = prmd;
euen = regs->csr_euen & ~(CSR_EUEN_FPEN);
regs->csr_euen = euen;
lose_fpu(0);
clear_thread_flag(TIF_LSX_CTX_LIVE);
clear_thread_flag(TIF_LASX_CTX_LIVE);
clear_used_math();
regs->csr_era = pc;
regs->regs[3] = sp;
}
void exit_thread(struct task_struct *tsk)
{
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
/*
* Save any process state which is live in hardware registers to the
* parent context prior to duplication. This prevents the new child
* state becoming stale if the parent is preempted before copy_thread()
* gets a chance to save the parent's live hardware registers to the
* child context.
*/
preempt_disable();
if (is_fpu_owner())
save_fp(current);
preempt_enable();
if (used_math())
memcpy(dst, src, sizeof(struct task_struct));
else
memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
return 0;
}
/*
* Copy architecture-specific thread state
*/
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
unsigned long childksp;
unsigned long tls = args->tls;
unsigned long usp = args->stack;
unsigned long clone_flags = args->flags;
struct pt_regs *childregs, *regs = current_pt_regs();
childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
/* set up new TSS. */
childregs = (struct pt_regs *) childksp - 1;
/* Put the stack after the struct pt_regs. */
childksp = (unsigned long) childregs;
p->thread.sched_cfa = 0;
p->thread.csr_euen = 0;
p->thread.csr_crmd = csr_read32(LOONGARCH_CSR_CRMD);
p->thread.csr_prmd = csr_read32(LOONGARCH_CSR_PRMD);
p->thread.csr_ecfg = csr_read32(LOONGARCH_CSR_ECFG);
if (unlikely(args->fn)) {
/* kernel thread */
p->thread.reg03 = childksp;
p->thread.reg23 = (unsigned long)args->fn;
p->thread.reg24 = (unsigned long)args->fn_arg;
p->thread.reg01 = (unsigned long)ret_from_kernel_thread;
p->thread.sched_ra = (unsigned long)ret_from_kernel_thread;
memset(childregs, 0, sizeof(struct pt_regs));
childregs->csr_euen = p->thread.csr_euen;
childregs->csr_crmd = p->thread.csr_crmd;
childregs->csr_prmd = p->thread.csr_prmd;
childregs->csr_ecfg = p->thread.csr_ecfg;
LoongArch: Clear FPU/SIMD thread info flags for kernel thread If a kernel thread is created by a user thread, it may carry FPU/SIMD thread info flags (TIF_USEDFPU, TIF_USEDSIMD, etc.). Then it will be considered as a fpu owner and kernel try to save its FPU/SIMD context and cause such errors: [ 41.518931] do_fpu invoked from kernel context![#1]: [ 41.523933] CPU: 1 PID: 395 Comm: iou-wrk-394 Not tainted 6.1.0-rc5+ #217 [ 41.530757] Hardware name: Loongson Loongson-3A5000-7A1000-1w-CRB/Loongson-LS3A5000-7A1000-1w-CRB, BIOS vUDK2018-LoongArch-V2.0.pre-beta8 08/18/2022 [ 41.544064] $ 0 : 0000000000000000 90000000011e9468 9000000106c7c000 9000000106c7fcf0 [ 41.552101] $ 4 : 9000000106305d40 9000000106689800 9000000106c7fd08 0000000003995818 [ 41.560138] $ 8 : 0000000000000001 90000000009a72e4 0000000000000020 fffffffffffffffc [ 41.568174] $12 : 0000000000000000 0000000000000000 0000000000000020 00000009aab7e130 [ 41.576211] $16 : 00000000000001ff 0000000000000407 0000000000000001 0000000000000000 [ 41.584247] $20 : 0000000000000000 0000000000000001 9000000106c7fd70 90000001002f0400 [ 41.592284] $24 : 0000000000000000 900000000178f740 90000000011e9834 90000001063057c0 [ 41.600320] $28 : 0000000000000000 0000000000000001 9000000006826b40 9000000106305140 [ 41.608356] era : 9000000000228848 _save_fp+0x0/0xd8 [ 41.613542] ra : 90000000011e9468 __schedule+0x568/0x8d0 [ 41.619160] CSR crmd: 000000b0 [ 41.619163] CSR prmd: 00000000 [ 41.622359] CSR euen: 00000000 [ 41.625558] CSR ecfg: 00071c1c [ 41.628756] CSR estat: 000f0000 [ 41.635239] ExcCode : f (SubCode 0) [ 41.638783] PrId : 0014c010 (Loongson-64bit) [ 41.643191] Modules linked in: acpi_ipmi vfat fat ipmi_si ipmi_devintf cfg80211 ipmi_msghandler rfkill fuse efivarfs [ 41.653734] Process iou-wrk-394 (pid: 395, threadinfo=0000000004ebe913, task=00000000636fa1be) [ 41.662375] Stack : 00000000ffff0875 9000000006800ec0 9000000006800ec0 90000000002d57e0 [ 41.670412] 0000000000000001 0000000000000000 9000000106535880 0000000000000001 [ 41.678450] 9000000105291800 0000000000000000 9000000105291838 900000000178e000 [ 41.686487] 9000000106c7fd90 9000000106305140 0000000000000001 90000000011e9834 [ 41.694523] 00000000ffff0875 90000000011f034c 9000000105291838 9000000105291830 [ 41.702561] 0000000000000000 9000000006801440 00000000ffff0875 90000000002d48c0 [ 41.710597] 9000000128800001 9000000106305140 9000000105291838 9000000105291838 [ 41.718634] 9000000105291830 9000000107811740 9000000105291848 90000000009bf1e0 [ 41.726672] 9000000105291830 9000000107811748 2d6b72772d756f69 0000000000343933 [ 41.734708] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 41.742745] ... [ 41.745252] Call Trace: [ 42.197868] [<9000000000228848>] _save_fp+0x0/0xd8 [ 42.205214] [<90000000011ed468>] __schedule+0x568/0x8d0 [ 42.210485] [<90000000011ed834>] schedule+0x64/0xd4 [ 42.215411] [<90000000011f434c>] schedule_timeout+0x88/0x188 [ 42.221115] [<90000000009c36d0>] io_wqe_worker+0x184/0x350 [ 42.226645] [<9000000000221cf0>] ret_from_kernel_thread+0xc/0x9c This can be easily triggered by ltp testcase syscalls/io_uring02 and it can also be easily fixed by clearing the FPU/SIMD thread info flags for kernel threads in copy_thread(). Cc: stable@vger.kernel.org Reported-by: Qi Hu <huqi@loongson.cn> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2022-11-21 11:02:57 +00:00
goto out;
}
/* user thread */
*childregs = *regs;
childregs->regs[4] = 0; /* Child gets zero as return value */
if (usp)
childregs->regs[3] = usp;
p->thread.reg03 = (unsigned long) childregs;
p->thread.reg01 = (unsigned long) ret_from_fork;
p->thread.sched_ra = (unsigned long) ret_from_fork;
/*
* New tasks lose permission to use the fpu. This accelerates context
* switching for most programs since they don't use the fpu.
*/
childregs->csr_euen = 0;
LoongArch: Clear FPU/SIMD thread info flags for kernel thread If a kernel thread is created by a user thread, it may carry FPU/SIMD thread info flags (TIF_USEDFPU, TIF_USEDSIMD, etc.). Then it will be considered as a fpu owner and kernel try to save its FPU/SIMD context and cause such errors: [ 41.518931] do_fpu invoked from kernel context![#1]: [ 41.523933] CPU: 1 PID: 395 Comm: iou-wrk-394 Not tainted 6.1.0-rc5+ #217 [ 41.530757] Hardware name: Loongson Loongson-3A5000-7A1000-1w-CRB/Loongson-LS3A5000-7A1000-1w-CRB, BIOS vUDK2018-LoongArch-V2.0.pre-beta8 08/18/2022 [ 41.544064] $ 0 : 0000000000000000 90000000011e9468 9000000106c7c000 9000000106c7fcf0 [ 41.552101] $ 4 : 9000000106305d40 9000000106689800 9000000106c7fd08 0000000003995818 [ 41.560138] $ 8 : 0000000000000001 90000000009a72e4 0000000000000020 fffffffffffffffc [ 41.568174] $12 : 0000000000000000 0000000000000000 0000000000000020 00000009aab7e130 [ 41.576211] $16 : 00000000000001ff 0000000000000407 0000000000000001 0000000000000000 [ 41.584247] $20 : 0000000000000000 0000000000000001 9000000106c7fd70 90000001002f0400 [ 41.592284] $24 : 0000000000000000 900000000178f740 90000000011e9834 90000001063057c0 [ 41.600320] $28 : 0000000000000000 0000000000000001 9000000006826b40 9000000106305140 [ 41.608356] era : 9000000000228848 _save_fp+0x0/0xd8 [ 41.613542] ra : 90000000011e9468 __schedule+0x568/0x8d0 [ 41.619160] CSR crmd: 000000b0 [ 41.619163] CSR prmd: 00000000 [ 41.622359] CSR euen: 00000000 [ 41.625558] CSR ecfg: 00071c1c [ 41.628756] CSR estat: 000f0000 [ 41.635239] ExcCode : f (SubCode 0) [ 41.638783] PrId : 0014c010 (Loongson-64bit) [ 41.643191] Modules linked in: acpi_ipmi vfat fat ipmi_si ipmi_devintf cfg80211 ipmi_msghandler rfkill fuse efivarfs [ 41.653734] Process iou-wrk-394 (pid: 395, threadinfo=0000000004ebe913, task=00000000636fa1be) [ 41.662375] Stack : 00000000ffff0875 9000000006800ec0 9000000006800ec0 90000000002d57e0 [ 41.670412] 0000000000000001 0000000000000000 9000000106535880 0000000000000001 [ 41.678450] 9000000105291800 0000000000000000 9000000105291838 900000000178e000 [ 41.686487] 9000000106c7fd90 9000000106305140 0000000000000001 90000000011e9834 [ 41.694523] 00000000ffff0875 90000000011f034c 9000000105291838 9000000105291830 [ 41.702561] 0000000000000000 9000000006801440 00000000ffff0875 90000000002d48c0 [ 41.710597] 9000000128800001 9000000106305140 9000000105291838 9000000105291838 [ 41.718634] 9000000105291830 9000000107811740 9000000105291848 90000000009bf1e0 [ 41.726672] 9000000105291830 9000000107811748 2d6b72772d756f69 0000000000343933 [ 41.734708] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 41.742745] ... [ 41.745252] Call Trace: [ 42.197868] [<9000000000228848>] _save_fp+0x0/0xd8 [ 42.205214] [<90000000011ed468>] __schedule+0x568/0x8d0 [ 42.210485] [<90000000011ed834>] schedule+0x64/0xd4 [ 42.215411] [<90000000011f434c>] schedule_timeout+0x88/0x188 [ 42.221115] [<90000000009c36d0>] io_wqe_worker+0x184/0x350 [ 42.226645] [<9000000000221cf0>] ret_from_kernel_thread+0xc/0x9c This can be easily triggered by ltp testcase syscalls/io_uring02 and it can also be easily fixed by clearing the FPU/SIMD thread info flags for kernel threads in copy_thread(). Cc: stable@vger.kernel.org Reported-by: Qi Hu <huqi@loongson.cn> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2022-11-21 11:02:57 +00:00
if (clone_flags & CLONE_SETTLS)
childregs->regs[2] = tls;
out:
clear_tsk_thread_flag(p, TIF_USEDFPU);
clear_tsk_thread_flag(p, TIF_USEDSIMD);
clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE);
return 0;
}
unsigned long __get_wchan(struct task_struct *task)
{
unsigned long pc;
struct unwind_state state;
if (!try_get_task_stack(task))
return 0;
unwind_start(&state, task, NULL);
state.sp = thread_saved_fp(task);
get_stack_info(state.sp, state.task, &state.stack_info);
state.pc = thread_saved_ra(task);
#ifdef CONFIG_UNWINDER_PROLOGUE
state.type = UNWINDER_PROLOGUE;
#endif
for (; !unwind_done(&state); unwind_next_frame(&state)) {
pc = unwind_get_return_address(&state);
if (!pc)
break;
if (in_sched_functions(pc))
continue;
break;
}
put_task_stack(task);
return pc;
}
bool in_irq_stack(unsigned long stack, struct stack_info *info)
{
unsigned long nextsp;
unsigned long begin = (unsigned long)this_cpu_read(irq_stack);
unsigned long end = begin + IRQ_STACK_START;
if (stack < begin || stack >= end)
return false;
nextsp = *(unsigned long *)end;
if (nextsp & (SZREG - 1))
return false;
info->begin = begin;
info->end = end;
info->next_sp = nextsp;
info->type = STACK_TYPE_IRQ;
return true;
}
bool in_task_stack(unsigned long stack, struct task_struct *task,
struct stack_info *info)
{
unsigned long begin = (unsigned long)task_stack_page(task);
unsigned long end = begin + THREAD_SIZE;
if (stack < begin || stack >= end)
return false;
info->begin = begin;
info->end = end;
info->next_sp = 0;
info->type = STACK_TYPE_TASK;
return true;
}
int get_stack_info(unsigned long stack, struct task_struct *task,
struct stack_info *info)
{
task = task ? : current;
if (!stack || stack & (SZREG - 1))
goto unknown;
if (in_task_stack(stack, task, info))
return 0;
if (task != current)
goto unknown;
if (in_irq_stack(stack, info))
return 0;
unknown:
info->type = STACK_TYPE_UNKNOWN;
return -EINVAL;
}
unsigned long stack_top(void)
{
unsigned long top = TASK_SIZE & PAGE_MASK;
/* Space for the VDSO & data page */
top -= PAGE_ALIGN(current->thread.vdso->size);
top -= PAGE_SIZE;
/* Space to randomize the VDSO base */
if (current->flags & PF_RANDOMIZE)
top -= VDSO_RANDOMIZE_SIZE;
return top;
}
/*
* Don't forget that the stack pointer must be aligned on a 8 bytes
* boundary for 32-bits ABI and 16 bytes for 64-bits ABI.
*/
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
treewide: use prandom_u32_max() when possible, part 1 Rather than incurring a division or requesting too many random bytes for the given range, use the prandom_u32_max() function, which only takes the minimum required bytes from the RNG and avoids divisions. This was done mechanically with this coccinelle script: @basic@ expression E; type T; identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32"; typedef u64; @@ ( - ((T)get_random_u32() % (E)) + prandom_u32_max(E) | - ((T)get_random_u32() & ((E) - 1)) + prandom_u32_max(E * XXX_MAKE_SURE_E_IS_POW2) | - ((u64)(E) * get_random_u32() >> 32) + prandom_u32_max(E) | - ((T)get_random_u32() & ~PAGE_MASK) + prandom_u32_max(PAGE_SIZE) ) @multi_line@ identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32"; identifier RAND; expression E; @@ - RAND = get_random_u32(); ... when != RAND - RAND %= (E); + RAND = prandom_u32_max(E); // Find a potential literal @literal_mask@ expression LITERAL; type T; identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32"; position p; @@ ((T)get_random_u32()@p & (LITERAL)) // Add one to the literal. @script:python add_one@ literal << literal_mask.LITERAL; RESULT; @@ value = None if literal.startswith('0x'): value = int(literal, 16) elif literal[0] in '123456789': value = int(literal, 10) if value is None: print("I don't know how to handle %s" % (literal)) cocci.include_match(False) elif value == 2**32 - 1 or value == 2**31 - 1 or value == 2**24 - 1 or value == 2**16 - 1 or value == 2**8 - 1: print("Skipping 0x%x for cleanup elsewhere" % (value)) cocci.include_match(False) elif value & (value + 1) != 0: print("Skipping 0x%x because it's not a power of two minus one" % (value)) cocci.include_match(False) elif literal.startswith('0x'): coccinelle.RESULT = cocci.make_expr("0x%x" % (value + 1)) else: coccinelle.RESULT = cocci.make_expr("%d" % (value + 1)) // Replace the literal mask with the calculated result. @plus_one@ expression literal_mask.LITERAL; position literal_mask.p; expression add_one.RESULT; identifier FUNC; @@ - (FUNC()@p & (LITERAL)) + prandom_u32_max(RESULT) @collapse_ret@ type T; identifier VAR; expression E; @@ { - T VAR; - VAR = (E); - return VAR; + return E; } @drop_var@ type T; identifier VAR; @@ { - T VAR; ... when != VAR } Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Reviewed-by: Kees Cook <keescook@chromium.org> Reviewed-by: Yury Norov <yury.norov@gmail.com> Reviewed-by: KP Singh <kpsingh@kernel.org> Reviewed-by: Jan Kara <jack@suse.cz> # for ext4 and sbitmap Reviewed-by: Christoph Böhmwalder <christoph.boehmwalder@linbit.com> # for drbd Acked-by: Jakub Kicinski <kuba@kernel.org> Acked-by: Heiko Carstens <hca@linux.ibm.com> # for s390 Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # for mmc Acked-by: Darrick J. Wong <djwong@kernel.org> # for xfs Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
2022-10-05 14:43:38 +00:00
sp -= prandom_u32_max(PAGE_SIZE);
return sp & STACK_ALIGN;
}
static DEFINE_PER_CPU(call_single_data_t, backtrace_csd);
static struct cpumask backtrace_csd_busy;
static void handle_backtrace(void *info)
{
nmi_cpu_backtrace(get_irq_regs());
cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy);
}
static void raise_backtrace(cpumask_t *mask)
{
call_single_data_t *csd;
int cpu;
for_each_cpu(cpu, mask) {
/*
* If we previously sent an IPI to the target CPU & it hasn't
* cleared its bit in the busy cpumask then it didn't handle
* our previous IPI & it's not safe for us to reuse the
* call_single_data_t.
*/
if (cpumask_test_and_set_cpu(cpu, &backtrace_csd_busy)) {
pr_warn("Unable to send backtrace IPI to CPU%u - perhaps it hung?\n",
cpu);
continue;
}
csd = &per_cpu(backtrace_csd, cpu);
csd->func = handle_backtrace;
smp_call_function_single_async(cpu, csd);
}
}
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
{
nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
}
#ifdef CONFIG_64BIT
void loongarch_dump_regs64(u64 *uregs, const struct pt_regs *regs)
{
unsigned int i;
for (i = LOONGARCH_EF_R1; i <= LOONGARCH_EF_R31; i++) {
uregs[i] = regs->regs[i - LOONGARCH_EF_R0];
}
uregs[LOONGARCH_EF_ORIG_A0] = regs->orig_a0;
uregs[LOONGARCH_EF_CSR_ERA] = regs->csr_era;
uregs[LOONGARCH_EF_CSR_BADV] = regs->csr_badvaddr;
uregs[LOONGARCH_EF_CSR_CRMD] = regs->csr_crmd;
uregs[LOONGARCH_EF_CSR_PRMD] = regs->csr_prmd;
uregs[LOONGARCH_EF_CSR_EUEN] = regs->csr_euen;
uregs[LOONGARCH_EF_CSR_ECFG] = regs->csr_ecfg;
uregs[LOONGARCH_EF_CSR_ESTAT] = regs->csr_estat;
}
#endif /* CONFIG_64BIT */