LoongArch: Add kdump support

This patch adds support for kdump. In kdump case the normal kernel will
reserve a region for the crash kernel and jump there on panic.

Arch-specific functions are added to allow for implementing a crash dump
file interface, /proc/vmcore, which can be viewed as a ELF file.

A user-space tool, such as kexec-tools, is responsible for allocating a
separate region for the core's ELF header within the crash kdump kernel
memory and filling it in when executing kexec_load().

Then, its location will be advertised to the crash dump kernel via a
command line argument "elfcorehdr=", and the crash dump kernel will
preserve this region for later use with arch_reserve_vmcore() at boot
time.

At the same time, the crash kdump kernel is also limited within the
"crashkernel" area via a command line argument "mem=", so as not to
destroy the original kernel dump data.

In the crash dump kernel environment, /proc/vmcore is used to access the
primary kernel's memory with copy_oldmem_page().

I tested kdump on LoongArch machines (Loongson-3A5000) and it works as
expected (suggested crashkernel parameter is "crashkernel=512M@2560M"),
you may test it by triggering a crash through /proc/sysrq-trigger:

 $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
 # echo c > /proc/sysrq-trigger

Signed-off-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
This commit is contained in:
Youling Tang 2022-10-12 16:36:19 +08:00 committed by Huacai Chen
parent 4a03b2ac06
commit 4e62d1d865
9 changed files with 227 additions and 8 deletions

View File

@ -437,6 +437,28 @@ config KEXEC
The name comes from the similarity to the exec system call.
config CRASH_DUMP
bool "Build kdump crash kernel"
help
Generate crash dump after being started by kexec. This should
be normally only set in special crash dump kernels which are
loaded in the main kernel with kexec-tools into a specially
reserved region and then later executed after a crash by
kdump/kexec.
For more details see Documentation/admin-guide/kdump/kdump.rst
config PHYSICAL_START
hex "Physical address where the kernel is loaded"
default "0x90000000a0000000"
depends on CRASH_DUMP
help
This gives the XKPRANGE address where the kernel is loaded.
If you plan to use kernel for capturing the crash dump change
this value to start of the reserved region (the "X" value as
specified in the "crashkernel=YM@XM" command line boot parameter
passed to the panic-ed kernel).
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS

View File

@ -69,7 +69,11 @@ endif
cflags-y += -ffreestanding
cflags-y += $(call cc-option, -mno-check-zero-division)
ifndef CONFIG_PHYSICAL_START
load-y = 0x9000000000200000
else
load-y = $(CONFIG_PHYSICAL_START)
endif
bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
drivers-$(CONFIG_PCI) += arch/loongarch/pci/

View File

@ -26,6 +26,7 @@ obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o

View File

@ -0,0 +1,23 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/crash_dump.h>
#include <linux/io.h>
#include <linux/uio.h>
ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
size_t csize, unsigned long offset)
{
void *vaddr;
if (!csize)
return 0;
vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
if (!vaddr)
return -ENOMEM;
csize = copy_to_iter(vaddr + offset, csize, iter);
memunmap(vaddr);
return csize;
}

View File

@ -7,10 +7,15 @@
#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/kexec.h>
#include <linux/mm.h>
#include <linux/crash_dump.h>
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/libfdt.h>
#include <linux/mm.h>
#include <linux/of_fdt.h>
#include <linux/reboot.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <asm/bootinfo.h>
#include <asm/cacheflush.h>
@ -21,6 +26,7 @@
#define KEXEC_CMDLINE_ADDR TO_CACHE(0x108000UL)
static unsigned long reboot_code_buffer;
static cpumask_t cpus_in_crash = CPU_MASK_NONE;
#ifdef CONFIG_SMP
static void (*relocated_kexec_smp_wait)(void *);
@ -78,7 +84,7 @@ int machine_kexec_prepare(struct kimage *kimage)
return -EINVAL;
}
/* kexec need a safe page to save reboot_code_buffer */
/* kexec/kdump need a safe page to save reboot_code_buffer */
kimage->control_code_page = virt_to_page((void *)KEXEC_CONTROL_CODE);
reboot_code_buffer = (unsigned long)page_address(kimage->control_code_page);
@ -102,7 +108,8 @@ void kexec_reboot(void)
/*
* We know we were online, and there will be no incoming IPIs at
* this point.
* this point. Mark online again before rebooting so that the crash
* analysis tool will see us correctly.
*/
set_cpu_online(smp_processor_id(), true);
@ -147,7 +154,74 @@ static void kexec_shutdown_secondary(void *regs)
kexec_reboot();
}
#endif
static void crash_shutdown_secondary(void *passed_regs)
{
int cpu = smp_processor_id();
struct pt_regs *regs = passed_regs;
/*
* If we are passed registers, use those. Otherwise get the
* regs from the last interrupt, which should be correct, as
* we are in an interrupt. But if the regs are not there,
* pull them from the top of the stack. They are probably
* wrong, but we need something to keep from crashing again.
*/
if (!regs)
regs = get_irq_regs();
if (!regs)
regs = task_pt_regs(current);
if (!cpu_online(cpu))
return;
/* We won't be sent IPIs any more. */
set_cpu_online(cpu, false);
local_irq_disable();
if (!cpumask_test_cpu(cpu, &cpus_in_crash))
crash_save_cpu(regs, cpu);
cpumask_set_cpu(cpu, &cpus_in_crash);
while (!atomic_read(&kexec_ready_to_reboot))
cpu_relax();
kexec_reboot();
}
void crash_smp_send_stop(void)
{
unsigned int ncpus;
unsigned long timeout;
static int cpus_stopped;
/*
* This function can be called twice in panic path, but obviously
* we should execute this only once.
*/
if (cpus_stopped)
return;
cpus_stopped = 1;
/* Excluding the panic cpu */
ncpus = num_online_cpus() - 1;
smp_call_function(crash_shutdown_secondary, NULL, 0);
smp_wmb();
/*
* The crash CPU sends an IPI and wait for other CPUs to
* respond. Delay of at least 10 seconds.
*/
timeout = MSEC_PER_SEC * 10;
pr_emerg("Sending IPI to other cpus...\n");
while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
mdelay(1);
cpu_relax();
}
}
#endif /* defined(CONFIG_SMP) */
void machine_shutdown(void)
{
@ -165,6 +239,19 @@ void machine_shutdown(void)
void machine_crash_shutdown(struct pt_regs *regs)
{
int crashing_cpu;
local_irq_disable();
crashing_cpu = smp_processor_id();
crash_save_cpu(regs, crashing_cpu);
#ifdef CONFIG_SMP
crash_smp_send_stop();
#endif
cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
pr_info("Starting crashdump kernel...\n");
}
void machine_kexec(struct kimage *image)
@ -178,7 +265,8 @@ void machine_kexec(struct kimage *image)
start_addr = (unsigned long)phys_to_virt(image->start);
first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
first_ind_entry = (image->type == KEXEC_TYPE_DEFAULT) ?
(unsigned long)phys_to_virt(image->head & PAGE_MASK) : 0;
/*
* The generic kexec code builds a page list with physical

View File

@ -58,7 +58,4 @@ void __init memblock_init(void)
/* Reserve the kernel text/data/bss */
memblock_reserve(__pa_symbol(&_text),
__pa_symbol(&_end) - __pa_symbol(&_text));
/* Reserve the initrd */
reserve_initrd_mem();
}

View File

@ -24,6 +24,12 @@ SYM_CODE_START(relocate_new_kernel)
*/
move s0, a4
/*
* In case of a kdump/crash kernel, the indirection page is not
* populated as the kernel is directly copied to a reserved location
*/
beqz s0, done
process_entry:
PTR_L s1, s0, 0
PTR_ADDI s0, s0, SZREG

View File

@ -19,6 +19,8 @@
#include <linux/memblock.h>
#include <linux/initrd.h>
#include <linux/ioport.h>
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/root_dev.h>
#include <linux/console.h>
#include <linux/pfn.h>
@ -185,8 +187,70 @@ static int __init early_parse_mem(char *p)
}
early_param("mem", early_parse_mem);
static void __init arch_reserve_vmcore(void)
{
#ifdef CONFIG_PROC_VMCORE
u64 i;
phys_addr_t start, end;
if (!is_kdump_kernel())
return;
if (!elfcorehdr_size) {
for_each_mem_range(i, &start, &end) {
if (elfcorehdr_addr >= start && elfcorehdr_addr < end) {
/*
* Reserve from the elf core header to the end of
* the memory segment, that should all be kdump
* reserved memory.
*/
elfcorehdr_size = end - elfcorehdr_addr;
break;
}
}
}
if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
pr_warn("elfcorehdr is overlapped\n");
return;
}
memblock_reserve(elfcorehdr_addr, elfcorehdr_size);
pr_info("Reserving %llu KiB of memory at 0x%llx for elfcorehdr\n",
elfcorehdr_size >> 10, elfcorehdr_addr);
#endif
}
static void __init arch_parse_crashkernel(void)
{
#ifdef CONFIG_KEXEC
int ret;
unsigned long long start;
unsigned long long total_mem;
unsigned long long crash_base, crash_size;
total_mem = memblock_phys_mem_size();
ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base);
if (ret < 0 || crash_size <= 0)
return;
start = memblock_phys_alloc_range(crash_size, 1, crash_base, crash_base + crash_size);
if (start != crash_base) {
pr_warn("Invalid memory region reserved for crash kernel\n");
return;
}
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
#endif
}
void __init platform_init(void)
{
arch_reserve_vmcore();
arch_parse_crashkernel();
#ifdef CONFIG_ACPI_TABLE_UPGRADE
acpi_table_upgrade();
#endif
@ -289,6 +353,15 @@ static void __init resource_init(void)
request_resource(res, &data_resource);
request_resource(res, &bss_resource);
}
#ifdef CONFIG_KEXEC
if (crashk_res.start < crashk_res.end) {
insert_resource(&iomem_resource, &crashk_res);
pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
(unsigned long)((crashk_res.end - crashk_res.start + 1) >> 20),
(unsigned long)(crashk_res.start >> 20));
}
#endif
}
static int __init reserve_memblock_reserved_regions(void)
@ -350,6 +423,7 @@ void __init setup_arch(char **cmdline_p)
memblock_init();
pagetable_init();
parse_early_param();
reserve_initrd_mem();
platform_init();
arch_mem_init(cmdline_p);

View File

@ -10,6 +10,7 @@
#include <linux/entry-common.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
#include <linux/module.h>
#include <linux/extable.h>
#include <linux/mm.h>
@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
oops_exit();
if (regs && kexec_should_crash(current))
crash_kexec(regs);
if (in_interrupt())
panic("Fatal exception in interrupt");