crash: split crash dumping code out from kexec_core.c

Currently, KEXEC_CORE select CRASH_CORE automatically because crash codes
need be built in to avoid compiling error when building kexec code even
though the crash dumping functionality is not enabled. E.g
--------------------
CONFIG_CRASH_CORE=y
CONFIG_KEXEC_CORE=y
CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
---------------------

After splitting out crashkernel reservation code and vmcoreinfo exporting
code, there's only crash related code left in kernel/crash_core.c. Now
move crash related codes from kexec_core.c to crash_core.c and only build it
in when CONFIG_CRASH_DUMP=y.

And also wrap up crash codes inside CONFIG_CRASH_DUMP ifdeffery scope,
or replace inappropriate CONFIG_KEXEC_CORE ifdef with CONFIG_CRASH_DUMP
ifdef in generic kernel files.

With these changes, crash_core codes are abstracted from kexec codes and
can be disabled at all if only kexec reboot feature is wanted.

Link: https://lkml.kernel.org/r/20240124051254.67105-5-bhe@redhat.com
Signed-off-by: Baoquan He <bhe@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Hari Bathini <hbathini@linux.ibm.com>
Cc: Pingfan Liu <piliu@redhat.com>
Cc: Klara Modin <klarasmodin@gmail.com>
Cc: Michael Kelley <mhklinux@outlook.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Baoquan He 2024-01-24 13:12:44 +08:00 committed by Andrew Morton
parent 2c44b67e2e
commit 02aff84805
10 changed files with 359 additions and 292 deletions

View file

@ -144,7 +144,7 @@ static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store);
#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
#endif /* CONFIG_HOTPLUG_CPU */ #endif /* CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_KEXEC_CORE #ifdef CONFIG_CRASH_DUMP
#include <linux/kexec.h> #include <linux/kexec.h>
static ssize_t crash_notes_show(struct device *dev, static ssize_t crash_notes_show(struct device *dev,
@ -189,14 +189,14 @@ static const struct attribute_group crash_note_cpu_attr_group = {
#endif #endif
static const struct attribute_group *common_cpu_attr_groups[] = { static const struct attribute_group *common_cpu_attr_groups[] = {
#ifdef CONFIG_KEXEC_CORE #ifdef CONFIG_CRASH_DUMP
&crash_note_cpu_attr_group, &crash_note_cpu_attr_group,
#endif #endif
NULL NULL
}; };
static const struct attribute_group *hotplugable_cpu_attr_groups[] = { static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
#ifdef CONFIG_KEXEC_CORE #ifdef CONFIG_CRASH_DUMP
&crash_note_cpu_attr_group, &crash_note_cpu_attr_group,
#endif #endif
NULL NULL

View file

@ -6,6 +6,48 @@
#include <linux/elfcore.h> #include <linux/elfcore.h>
#include <linux/elf.h> #include <linux/elf.h>
struct kimage;
#ifdef CONFIG_CRASH_DUMP
int crash_shrink_memory(unsigned long new_size);
ssize_t crash_get_memory_size(void);
#ifndef arch_kexec_protect_crashkres
/*
* Protection mechanism for crashkernel reserved memory after
* the kdump kernel is loaded.
*
* Provide an empty default implementation here -- architecture
* code may override this
*/
static inline void arch_kexec_protect_crashkres(void) { }
#endif
#ifndef arch_kexec_unprotect_crashkres
static inline void arch_kexec_unprotect_crashkres(void) { }
#endif
#ifndef arch_crash_handle_hotplug_event
static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
#endif
int crash_check_update_elfcorehdr(void);
#ifndef crash_hotplug_cpu_support
static inline int crash_hotplug_cpu_support(void) { return 0; }
#endif
#ifndef crash_hotplug_memory_support
static inline int crash_hotplug_memory_support(void) { return 0; }
#endif
#ifndef crash_get_elfcorehdr_size
static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; }
#endif
/* Alignment required for elf header segment */ /* Alignment required for elf header segment */
#define ELF_CORE_HEADER_ALIGN 4096 #define ELF_CORE_HEADER_ALIGN 4096
@ -31,4 +73,23 @@ struct kexec_segment;
#define KEXEC_CRASH_HP_REMOVE_MEMORY 4 #define KEXEC_CRASH_HP_REMOVE_MEMORY 4
#define KEXEC_CRASH_HP_INVALID_CPU -1U #define KEXEC_CRASH_HP_INVALID_CPU -1U
extern void __crash_kexec(struct pt_regs *regs);
extern void crash_kexec(struct pt_regs *regs);
int kexec_should_crash(struct task_struct *p);
int kexec_crash_loaded(void);
void crash_save_cpu(struct pt_regs *regs, int cpu);
extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
#else /* !CONFIG_CRASH_DUMP*/
struct pt_regs;
struct task_struct;
struct kimage;
static inline void __crash_kexec(struct pt_regs *regs) { }
static inline void crash_kexec(struct pt_regs *regs) { }
static inline int kexec_should_crash(struct task_struct *p) { return 0; }
static inline int kexec_crash_loaded(void) { return 0; }
static inline void crash_save_cpu(struct pt_regs *regs, int cpu) {};
static inline int kimage_crash_copy_vmcoreinfo(struct kimage *image) { return 0; };
#endif /* CONFIG_CRASH_DUMP*/
#endif /* LINUX_CRASH_CORE_H */ #endif /* LINUX_CRASH_CORE_H */

View file

@ -15,7 +15,6 @@
#if !defined(__ASSEMBLY__) #if !defined(__ASSEMBLY__)
#include <linux/crash_core.h>
#include <linux/vmcore_info.h> #include <linux/vmcore_info.h>
#include <linux/crash_reserve.h> #include <linux/crash_reserve.h>
#include <asm/io.h> #include <asm/io.h>
@ -33,6 +32,7 @@ extern note_buf_t __percpu *crash_notes;
#include <linux/module.h> #include <linux/module.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <asm/kexec.h> #include <asm/kexec.h>
#include <linux/crash_core.h>
/* Verify architecture specific macros are defined */ /* Verify architecture specific macros are defined */
@ -380,13 +380,6 @@ extern struct page *kimage_alloc_control_pages(struct kimage *image,
static inline int machine_kexec_post_load(struct kimage *image) { return 0; } static inline int machine_kexec_post_load(struct kimage *image) { return 0; }
#endif #endif
extern void __crash_kexec(struct pt_regs *);
extern void crash_kexec(struct pt_regs *);
int kexec_should_crash(struct task_struct *);
int kexec_crash_loaded(void);
void crash_save_cpu(struct pt_regs *regs, int cpu);
extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
extern struct kimage *kexec_image; extern struct kimage *kexec_image;
extern struct kimage *kexec_crash_image; extern struct kimage *kexec_crash_image;
@ -410,24 +403,6 @@ bool kexec_load_permitted(int kexec_image_type);
/* flag to track if kexec reboot is in progress */ /* flag to track if kexec reboot is in progress */
extern bool kexec_in_progress; extern bool kexec_in_progress;
int crash_shrink_memory(unsigned long new_size);
ssize_t crash_get_memory_size(void);
#ifndef arch_kexec_protect_crashkres
/*
* Protection mechanism for crashkernel reserved memory after
* the kdump kernel is loaded.
*
* Provide an empty default implementation here -- architecture
* code may override this
*/
static inline void arch_kexec_protect_crashkres(void) { }
#endif
#ifndef arch_kexec_unprotect_crashkres
static inline void arch_kexec_unprotect_crashkres(void) { }
#endif
#ifndef page_to_boot_pfn #ifndef page_to_boot_pfn
static inline unsigned long page_to_boot_pfn(struct page *page) static inline unsigned long page_to_boot_pfn(struct page *page)
{ {
@ -484,24 +459,6 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g
static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { } static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
#endif #endif
#ifndef arch_crash_handle_hotplug_event
static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
#endif
int crash_check_update_elfcorehdr(void);
#ifndef crash_hotplug_cpu_support
static inline int crash_hotplug_cpu_support(void) { return 0; }
#endif
#ifndef crash_hotplug_memory_support
static inline int crash_hotplug_memory_support(void) { return 0; }
#endif
#ifndef crash_get_elfcorehdr_size
static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; }
#endif
extern bool kexec_file_dbg_print; extern bool kexec_file_dbg_print;
#define kexec_dprintk(fmt, ...) \ #define kexec_dprintk(fmt, ...) \

View file

@ -642,7 +642,7 @@ void __weak __init free_initrd_mem(unsigned long start, unsigned long end)
"initrd"); "initrd");
} }
#ifdef CONFIG_KEXEC_CORE #ifdef CONFIG_CRASH_RESERVE
static bool __init kexec_free_initrd(void) static bool __init kexec_free_initrd(void)
{ {
unsigned long crashk_start = (unsigned long)__va(crashk_res.start); unsigned long crashk_start = (unsigned long)__va(crashk_res.start);

View file

@ -70,7 +70,8 @@ obj-$(CONFIG_KALLSYMS_SELFTEST) += kallsyms_selftest.o
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o elfcorehdr.o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o elfcorehdr.o
obj-$(CONFIG_CRASH_RESERVE) += crash_reserve.o obj-$(CONFIG_CRASH_RESERVE) += crash_reserve.o
obj-$(CONFIG_KEXEC_CORE) += kexec_core.o crash_core.o obj-$(CONFIG_KEXEC_CORE) += kexec_core.o
obj-$(CONFIG_CRASH_DUMP) += crash_core.o
obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_KEXEC_FILE) += kexec_file.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o

View file

@ -11,9 +11,14 @@
#include <linux/sizes.h> #include <linux/sizes.h>
#include <linux/kexec.h> #include <linux/kexec.h>
#include <linux/memory.h> #include <linux/memory.h>
#include <linux/mm.h>
#include <linux/cpuhotplug.h> #include <linux/cpuhotplug.h>
#include <linux/memblock.h> #include <linux/memblock.h>
#include <linux/kmemleak.h> #include <linux/kmemleak.h>
#include <linux/crash_core.h>
#include <linux/reboot.h>
#include <linux/btf.h>
#include <linux/objtool.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/sections.h> #include <asm/sections.h>
@ -26,6 +31,131 @@
/* Per cpu memory for storing cpu states in case of system crash. */ /* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t __percpu *crash_notes; note_buf_t __percpu *crash_notes;
#ifdef CONFIG_CRASH_DUMP
int kimage_crash_copy_vmcoreinfo(struct kimage *image)
{
struct page *vmcoreinfo_page;
void *safecopy;
if (!IS_ENABLED(CONFIG_CRASH_DUMP))
return 0;
if (image->type != KEXEC_TYPE_CRASH)
return 0;
/*
* For kdump, allocate one vmcoreinfo safe copy from the
* crash memory. as we have arch_kexec_protect_crashkres()
* after kexec syscall, we naturally protect it from write
* (even read) access under kernel direct mapping. But on
* the other hand, we still need to operate it when crash
* happens to generate vmcoreinfo note, hereby we rely on
* vmap for this purpose.
*/
vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
if (!vmcoreinfo_page) {
pr_warn("Could not allocate vmcoreinfo buffer\n");
return -ENOMEM;
}
safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
if (!safecopy) {
pr_warn("Could not vmap vmcoreinfo buffer\n");
return -ENOMEM;
}
image->vmcoreinfo_data_copy = safecopy;
crash_update_vmcoreinfo_safecopy(safecopy);
return 0;
}
int kexec_should_crash(struct task_struct *p)
{
/*
* If crash_kexec_post_notifiers is enabled, don't run
* crash_kexec() here yet, which must be run after panic
* notifiers in panic().
*/
if (crash_kexec_post_notifiers)
return 0;
/*
* There are 4 panic() calls in make_task_dead() path, each of which
* corresponds to each of these 4 conditions.
*/
if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
return 1;
return 0;
}
int kexec_crash_loaded(void)
{
return !!kexec_crash_image;
}
EXPORT_SYMBOL_GPL(kexec_crash_loaded);
/*
* No panic_cpu check version of crash_kexec(). This function is called
* only when panic_cpu holds the current CPU number; this is the only CPU
* which processes crash_kexec routines.
*/
void __noclone __crash_kexec(struct pt_regs *regs)
{
/* Take the kexec_lock here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
* we are using after a panic on a different cpu.
*
* If the crash kernel was not located in a fixed area
* of memory the xchg(&kexec_crash_image) would be
* sufficient. But since I reuse the memory...
*/
if (kexec_trylock()) {
if (kexec_crash_image) {
struct pt_regs fixed_regs;
crash_setup_regs(&fixed_regs, regs);
crash_save_vmcoreinfo();
machine_crash_shutdown(&fixed_regs);
machine_kexec(kexec_crash_image);
}
kexec_unlock();
}
}
STACK_FRAME_NON_STANDARD(__crash_kexec);
__bpf_kfunc void crash_kexec(struct pt_regs *regs)
{
int old_cpu, this_cpu;
/*
* Only one CPU is allowed to execute the crash_kexec() code as with
* panic(). Otherwise parallel calls of panic() and crash_kexec()
* may stop each other. To exclude them, we use panic_cpu here too.
*/
old_cpu = PANIC_CPU_INVALID;
this_cpu = raw_smp_processor_id();
if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) {
/* This is the 1st CPU which comes here, so go ahead. */
__crash_kexec(regs);
/*
* Reset panic_cpu to allow another panic()/crash_kexec()
* call.
*/
atomic_set(&panic_cpu, PANIC_CPU_INVALID);
}
}
static inline resource_size_t crash_resource_size(const struct resource *res)
{
return !res->end ? 0 : resource_size(res);
}
int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
void **addr, unsigned long *sz) void **addr, unsigned long *sz)
{ {
@ -187,6 +317,130 @@ int crash_exclude_mem_range(struct crash_mem *mem,
return 0; return 0;
} }
ssize_t crash_get_memory_size(void)
{
ssize_t size = 0;
if (!kexec_trylock())
return -EBUSY;
size += crash_resource_size(&crashk_res);
size += crash_resource_size(&crashk_low_res);
kexec_unlock();
return size;
}
static int __crash_shrink_memory(struct resource *old_res,
unsigned long new_size)
{
struct resource *ram_res;
ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
if (!ram_res)
return -ENOMEM;
ram_res->start = old_res->start + new_size;
ram_res->end = old_res->end;
ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
ram_res->name = "System RAM";
if (!new_size) {
release_resource(old_res);
old_res->start = 0;
old_res->end = 0;
} else {
crashk_res.end = ram_res->start - 1;
}
crash_free_reserved_phys_range(ram_res->start, ram_res->end);
insert_resource(&iomem_resource, ram_res);
return 0;
}
int crash_shrink_memory(unsigned long new_size)
{
int ret = 0;
unsigned long old_size, low_size;
if (!kexec_trylock())
return -EBUSY;
if (kexec_crash_image) {
ret = -ENOENT;
goto unlock;
}
low_size = crash_resource_size(&crashk_low_res);
old_size = crash_resource_size(&crashk_res) + low_size;
new_size = roundup(new_size, KEXEC_CRASH_MEM_ALIGN);
if (new_size >= old_size) {
ret = (new_size == old_size) ? 0 : -EINVAL;
goto unlock;
}
/*
* (low_size > new_size) implies that low_size is greater than zero.
* This also means that if low_size is zero, the else branch is taken.
*
* If low_size is greater than 0, (low_size > new_size) indicates that
* crashk_low_res also needs to be shrunken. Otherwise, only crashk_res
* needs to be shrunken.
*/
if (low_size > new_size) {
ret = __crash_shrink_memory(&crashk_res, 0);
if (ret)
goto unlock;
ret = __crash_shrink_memory(&crashk_low_res, new_size);
} else {
ret = __crash_shrink_memory(&crashk_res, new_size - low_size);
}
/* Swap crashk_res and crashk_low_res if needed */
if (!crashk_res.end && crashk_low_res.end) {
crashk_res.start = crashk_low_res.start;
crashk_res.end = crashk_low_res.end;
release_resource(&crashk_low_res);
crashk_low_res.start = 0;
crashk_low_res.end = 0;
insert_resource(&iomem_resource, &crashk_res);
}
unlock:
kexec_unlock();
return ret;
}
void crash_save_cpu(struct pt_regs *regs, int cpu)
{
struct elf_prstatus prstatus;
u32 *buf;
if ((cpu < 0) || (cpu >= nr_cpu_ids))
return;
/* Using ELF notes here is opportunistic.
* I need a well defined structure format
* for the data I pass, and I need tags
* on the data to indicate what information I have
* squirrelled away. ELF notes happen to provide
* all of that, so there is no need to invent something new.
*/
buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
if (!buf)
return;
memset(&prstatus, 0, sizeof(prstatus));
prstatus.common.pr_pid = current->pid;
elf_core_copy_regs(&prstatus.pr_reg, regs);
buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
&prstatus, sizeof(prstatus));
final_note(buf);
}
static int __init crash_notes_memory_init(void) static int __init crash_notes_memory_init(void)
{ {
/* Allocate memory for saving cpu registers. */ /* Allocate memory for saving cpu registers. */
@ -220,6 +474,8 @@ static int __init crash_notes_memory_init(void)
} }
subsys_initcall(crash_notes_memory_init); subsys_initcall(crash_notes_memory_init);
#endif /*CONFIG_CRASH_DUMP*/
#ifdef CONFIG_CRASH_HOTPLUG #ifdef CONFIG_CRASH_HOTPLUG
#undef pr_fmt #undef pr_fmt
#define pr_fmt(fmt) "crash hp: " fmt #define pr_fmt(fmt) "crash hp: " fmt

View file

@ -28,12 +28,14 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
struct kimage *image; struct kimage *image;
bool kexec_on_panic = flags & KEXEC_ON_CRASH; bool kexec_on_panic = flags & KEXEC_ON_CRASH;
#ifdef CONFIG_CRASH_DUMP
if (kexec_on_panic) { if (kexec_on_panic) {
/* Verify we have a valid entry point */ /* Verify we have a valid entry point */
if ((entry < phys_to_boot_phys(crashk_res.start)) || if ((entry < phys_to_boot_phys(crashk_res.start)) ||
(entry > phys_to_boot_phys(crashk_res.end))) (entry > phys_to_boot_phys(crashk_res.end)))
return -EADDRNOTAVAIL; return -EADDRNOTAVAIL;
} }
#endif
/* Allocate and initialize a controlling structure */ /* Allocate and initialize a controlling structure */
image = do_kimage_alloc_init(); image = do_kimage_alloc_init();
@ -44,11 +46,13 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
image->nr_segments = nr_segments; image->nr_segments = nr_segments;
memcpy(image->segment, segments, nr_segments * sizeof(*segments)); memcpy(image->segment, segments, nr_segments * sizeof(*segments));
#ifdef CONFIG_CRASH_DUMP
if (kexec_on_panic) { if (kexec_on_panic) {
/* Enable special crash kernel control page alloc policy. */ /* Enable special crash kernel control page alloc policy. */
image->control_page = crashk_res.start; image->control_page = crashk_res.start;
image->type = KEXEC_TYPE_CRASH; image->type = KEXEC_TYPE_CRASH;
} }
#endif
ret = sanity_check_segment_list(image); ret = sanity_check_segment_list(image);
if (ret) if (ret)
@ -99,13 +103,14 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
if (!kexec_trylock()) if (!kexec_trylock())
return -EBUSY; return -EBUSY;
#ifdef CONFIG_CRASH_DUMP
if (flags & KEXEC_ON_CRASH) { if (flags & KEXEC_ON_CRASH) {
dest_image = &kexec_crash_image; dest_image = &kexec_crash_image;
if (kexec_crash_image) if (kexec_crash_image)
arch_kexec_unprotect_crashkres(); arch_kexec_unprotect_crashkres();
} else { } else
#endif
dest_image = &kexec_image; dest_image = &kexec_image;
}
if (nr_segments == 0) { if (nr_segments == 0) {
/* Uninstall image */ /* Uninstall image */
@ -162,8 +167,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
image = xchg(dest_image, image); image = xchg(dest_image, image);
out: out:
#ifdef CONFIG_CRASH_DUMP
if ((flags & KEXEC_ON_CRASH) && kexec_crash_image) if ((flags & KEXEC_ON_CRASH) && kexec_crash_image)
arch_kexec_protect_crashkres(); arch_kexec_protect_crashkres();
#endif
kimage_free(image); kimage_free(image);
out_unlock: out_unlock:

View file

@ -54,30 +54,6 @@ bool kexec_in_progress = false;
bool kexec_file_dbg_print; bool kexec_file_dbg_print;
int kexec_should_crash(struct task_struct *p)
{
/*
* If crash_kexec_post_notifiers is enabled, don't run
* crash_kexec() here yet, which must be run after panic
* notifiers in panic().
*/
if (crash_kexec_post_notifiers)
return 0;
/*
* There are 4 panic() calls in make_task_dead() path, each of which
* corresponds to each of these 4 conditions.
*/
if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
return 1;
return 0;
}
int kexec_crash_loaded(void)
{
return !!kexec_crash_image;
}
EXPORT_SYMBOL_GPL(kexec_crash_loaded);
/* /*
* When kexec transitions to the new kernel there is a one-to-one * When kexec transitions to the new kernel there is a one-to-one
* mapping between physical and virtual addresses. On processors * mapping between physical and virtual addresses. On processors
@ -209,6 +185,7 @@ int sanity_check_segment_list(struct kimage *image)
if (total_pages > nr_pages / 2) if (total_pages > nr_pages / 2)
return -EINVAL; return -EINVAL;
#ifdef CONFIG_CRASH_DUMP
/* /*
* Verify we have good destination addresses. Normally * Verify we have good destination addresses. Normally
* the caller is responsible for making certain we don't * the caller is responsible for making certain we don't
@ -231,6 +208,7 @@ int sanity_check_segment_list(struct kimage *image)
return -EADDRNOTAVAIL; return -EADDRNOTAVAIL;
} }
} }
#endif
return 0; return 0;
} }
@ -403,6 +381,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
return pages; return pages;
} }
#ifdef CONFIG_CRASH_DUMP
static struct page *kimage_alloc_crash_control_pages(struct kimage *image, static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
unsigned int order) unsigned int order)
{ {
@ -468,6 +447,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
return pages; return pages;
} }
#endif
struct page *kimage_alloc_control_pages(struct kimage *image, struct page *kimage_alloc_control_pages(struct kimage *image,
@ -479,48 +459,16 @@ struct page *kimage_alloc_control_pages(struct kimage *image,
case KEXEC_TYPE_DEFAULT: case KEXEC_TYPE_DEFAULT:
pages = kimage_alloc_normal_control_pages(image, order); pages = kimage_alloc_normal_control_pages(image, order);
break; break;
#ifdef CONFIG_CRASH_DUMP
case KEXEC_TYPE_CRASH: case KEXEC_TYPE_CRASH:
pages = kimage_alloc_crash_control_pages(image, order); pages = kimage_alloc_crash_control_pages(image, order);
break; break;
#endif
} }
return pages; return pages;
} }
int kimage_crash_copy_vmcoreinfo(struct kimage *image)
{
struct page *vmcoreinfo_page;
void *safecopy;
if (image->type != KEXEC_TYPE_CRASH)
return 0;
/*
* For kdump, allocate one vmcoreinfo safe copy from the
* crash memory. as we have arch_kexec_protect_crashkres()
* after kexec syscall, we naturally protect it from write
* (even read) access under kernel direct mapping. But on
* the other hand, we still need to operate it when crash
* happens to generate vmcoreinfo note, hereby we rely on
* vmap for this purpose.
*/
vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
if (!vmcoreinfo_page) {
pr_warn("Could not allocate vmcoreinfo buffer\n");
return -ENOMEM;
}
safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
if (!safecopy) {
pr_warn("Could not vmap vmcoreinfo buffer\n");
return -ENOMEM;
}
image->vmcoreinfo_data_copy = safecopy;
crash_update_vmcoreinfo_safecopy(safecopy);
return 0;
}
static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
{ {
if (*image->entry != 0) if (*image->entry != 0)
@ -603,10 +551,12 @@ void kimage_free(struct kimage *image)
if (!image) if (!image)
return; return;
#ifdef CONFIG_CRASH_DUMP
if (image->vmcoreinfo_data_copy) { if (image->vmcoreinfo_data_copy) {
crash_update_vmcoreinfo_safecopy(NULL); crash_update_vmcoreinfo_safecopy(NULL);
vunmap(image->vmcoreinfo_data_copy); vunmap(image->vmcoreinfo_data_copy);
} }
#endif
kimage_free_extra_pages(image); kimage_free_extra_pages(image);
for_each_kimage_entry(image, ptr, entry) { for_each_kimage_entry(image, ptr, entry) {
@ -824,6 +774,7 @@ static int kimage_load_normal_segment(struct kimage *image,
return result; return result;
} }
#ifdef CONFIG_CRASH_DUMP
static int kimage_load_crash_segment(struct kimage *image, static int kimage_load_crash_segment(struct kimage *image,
struct kexec_segment *segment) struct kexec_segment *segment)
{ {
@ -891,6 +842,7 @@ static int kimage_load_crash_segment(struct kimage *image,
out: out:
return result; return result;
} }
#endif
int kimage_load_segment(struct kimage *image, int kimage_load_segment(struct kimage *image,
struct kexec_segment *segment) struct kexec_segment *segment)
@ -901,9 +853,11 @@ int kimage_load_segment(struct kimage *image,
case KEXEC_TYPE_DEFAULT: case KEXEC_TYPE_DEFAULT:
result = kimage_load_normal_segment(image, segment); result = kimage_load_normal_segment(image, segment);
break; break;
#ifdef CONFIG_CRASH_DUMP
case KEXEC_TYPE_CRASH: case KEXEC_TYPE_CRASH:
result = kimage_load_crash_segment(image, segment); result = kimage_load_crash_segment(image, segment);
break; break;
#endif
} }
return result; return result;
@ -1027,186 +981,6 @@ bool kexec_load_permitted(int kexec_image_type)
return true; return true;
} }
/*
* No panic_cpu check version of crash_kexec(). This function is called
* only when panic_cpu holds the current CPU number; this is the only CPU
* which processes crash_kexec routines.
*/
void __noclone __crash_kexec(struct pt_regs *regs)
{
/* Take the kexec_lock here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
* we are using after a panic on a different cpu.
*
* If the crash kernel was not located in a fixed area
* of memory the xchg(&kexec_crash_image) would be
* sufficient. But since I reuse the memory...
*/
if (kexec_trylock()) {
if (kexec_crash_image) {
struct pt_regs fixed_regs;
crash_setup_regs(&fixed_regs, regs);
crash_save_vmcoreinfo();
machine_crash_shutdown(&fixed_regs);
machine_kexec(kexec_crash_image);
}
kexec_unlock();
}
}
STACK_FRAME_NON_STANDARD(__crash_kexec);
__bpf_kfunc void crash_kexec(struct pt_regs *regs)
{
int old_cpu, this_cpu;
/*
* Only one CPU is allowed to execute the crash_kexec() code as with
* panic(). Otherwise parallel calls of panic() and crash_kexec()
* may stop each other. To exclude them, we use panic_cpu here too.
*/
old_cpu = PANIC_CPU_INVALID;
this_cpu = raw_smp_processor_id();
if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) {
/* This is the 1st CPU which comes here, so go ahead. */
__crash_kexec(regs);
/*
* Reset panic_cpu to allow another panic()/crash_kexec()
* call.
*/
atomic_set(&panic_cpu, PANIC_CPU_INVALID);
}
}
static inline resource_size_t crash_resource_size(const struct resource *res)
{
return !res->end ? 0 : resource_size(res);
}
ssize_t crash_get_memory_size(void)
{
ssize_t size = 0;
if (!kexec_trylock())
return -EBUSY;
size += crash_resource_size(&crashk_res);
size += crash_resource_size(&crashk_low_res);
kexec_unlock();
return size;
}
static int __crash_shrink_memory(struct resource *old_res,
unsigned long new_size)
{
struct resource *ram_res;
ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
if (!ram_res)
return -ENOMEM;
ram_res->start = old_res->start + new_size;
ram_res->end = old_res->end;
ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
ram_res->name = "System RAM";
if (!new_size) {
release_resource(old_res);
old_res->start = 0;
old_res->end = 0;
} else {
crashk_res.end = ram_res->start - 1;
}
crash_free_reserved_phys_range(ram_res->start, ram_res->end);
insert_resource(&iomem_resource, ram_res);
return 0;
}
int crash_shrink_memory(unsigned long new_size)
{
int ret = 0;
unsigned long old_size, low_size;
if (!kexec_trylock())
return -EBUSY;
if (kexec_crash_image) {
ret = -ENOENT;
goto unlock;
}
low_size = crash_resource_size(&crashk_low_res);
old_size = crash_resource_size(&crashk_res) + low_size;
new_size = roundup(new_size, KEXEC_CRASH_MEM_ALIGN);
if (new_size >= old_size) {
ret = (new_size == old_size) ? 0 : -EINVAL;
goto unlock;
}
/*
* (low_size > new_size) implies that low_size is greater than zero.
* This also means that if low_size is zero, the else branch is taken.
*
* If low_size is greater than 0, (low_size > new_size) indicates that
* crashk_low_res also needs to be shrunken. Otherwise, only crashk_res
* needs to be shrunken.
*/
if (low_size > new_size) {
ret = __crash_shrink_memory(&crashk_res, 0);
if (ret)
goto unlock;
ret = __crash_shrink_memory(&crashk_low_res, new_size);
} else {
ret = __crash_shrink_memory(&crashk_res, new_size - low_size);
}
/* Swap crashk_res and crashk_low_res if needed */
if (!crashk_res.end && crashk_low_res.end) {
crashk_res.start = crashk_low_res.start;
crashk_res.end = crashk_low_res.end;
release_resource(&crashk_low_res);
crashk_low_res.start = 0;
crashk_low_res.end = 0;
insert_resource(&iomem_resource, &crashk_res);
}
unlock:
kexec_unlock();
return ret;
}
void crash_save_cpu(struct pt_regs *regs, int cpu)
{
struct elf_prstatus prstatus;
u32 *buf;
if ((cpu < 0) || (cpu >= nr_cpu_ids))
return;
/* Using ELF notes here is opportunistic.
* I need a well defined structure format
* for the data I pass, and I need tags
* on the data to indicate what information I have
* squirrelled away. ELF notes happen to provide
* all of that, so there is no need to invent something new.
*/
buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
if (!buf)
return;
memset(&prstatus, 0, sizeof(prstatus));
prstatus.common.pr_pid = current->pid;
elf_core_copy_regs(&prstatus.pr_reg, regs);
buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
&prstatus, sizeof(prstatus));
final_note(buf);
}
/* /*
* Move into place and start executing a preloaded standalone * Move into place and start executing a preloaded standalone
* executable. If nothing was preloaded return an error. * executable. If nothing was preloaded return an error.

View file

@ -285,11 +285,13 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
kexec_file_dbg_print = !!(flags & KEXEC_FILE_DEBUG); kexec_file_dbg_print = !!(flags & KEXEC_FILE_DEBUG);
image->file_mode = 1; image->file_mode = 1;
#ifdef CONFIG_CRASH_DUMP
if (kexec_on_panic) { if (kexec_on_panic) {
/* Enable special crash kernel control page alloc policy. */ /* Enable special crash kernel control page alloc policy. */
image->control_page = crashk_res.start; image->control_page = crashk_res.start;
image->type = KEXEC_TYPE_CRASH; image->type = KEXEC_TYPE_CRASH;
} }
#endif
ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd, ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
cmdline_ptr, cmdline_len, flags); cmdline_ptr, cmdline_len, flags);
@ -349,13 +351,14 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
if (!kexec_trylock()) if (!kexec_trylock())
return -EBUSY; return -EBUSY;
#ifdef CONFIG_CRASH_DUMP
if (image_type == KEXEC_TYPE_CRASH) { if (image_type == KEXEC_TYPE_CRASH) {
dest_image = &kexec_crash_image; dest_image = &kexec_crash_image;
if (kexec_crash_image) if (kexec_crash_image)
arch_kexec_unprotect_crashkres(); arch_kexec_unprotect_crashkres();
} else { } else
#endif
dest_image = &kexec_image; dest_image = &kexec_image;
}
if (flags & KEXEC_FILE_UNLOAD) if (flags & KEXEC_FILE_UNLOAD)
goto exchange; goto exchange;
@ -419,8 +422,10 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
exchange: exchange:
image = xchg(dest_image, image); image = xchg(dest_image, image);
out: out:
#ifdef CONFIG_CRASH_DUMP
if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image) if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
arch_kexec_protect_crashkres(); arch_kexec_protect_crashkres();
#endif
kexec_unlock(); kexec_unlock();
kimage_free(image); kimage_free(image);
@ -595,12 +600,14 @@ static int kexec_walk_memblock(struct kexec_buf *kbuf,
static int kexec_walk_resources(struct kexec_buf *kbuf, static int kexec_walk_resources(struct kexec_buf *kbuf,
int (*func)(struct resource *, void *)) int (*func)(struct resource *, void *))
{ {
#ifdef CONFIG_CRASH_DUMP
if (kbuf->image->type == KEXEC_TYPE_CRASH) if (kbuf->image->type == KEXEC_TYPE_CRASH)
return walk_iomem_res_desc(crashk_res.desc, return walk_iomem_res_desc(crashk_res.desc,
IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
crashk_res.start, crashk_res.end, crashk_res.start, crashk_res.end,
kbuf, func); kbuf, func);
else if (kbuf->top_down) #endif
if (kbuf->top_down)
return walk_system_ram_res_rev(0, ULONG_MAX, kbuf, func); return walk_system_ram_res_rev(0, ULONG_MAX, kbuf, func);
else else
return walk_system_ram_res(0, ULONG_MAX, kbuf, func); return walk_system_ram_res(0, ULONG_MAX, kbuf, func);

View file

@ -120,6 +120,7 @@ static ssize_t kexec_loaded_show(struct kobject *kobj,
} }
KERNEL_ATTR_RO(kexec_loaded); KERNEL_ATTR_RO(kexec_loaded);
#ifdef CONFIG_CRASH_DUMP
static ssize_t kexec_crash_loaded_show(struct kobject *kobj, static ssize_t kexec_crash_loaded_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf) struct kobj_attribute *attr, char *buf)
{ {
@ -152,6 +153,7 @@ static ssize_t kexec_crash_size_store(struct kobject *kobj,
} }
KERNEL_ATTR_RW(kexec_crash_size); KERNEL_ATTR_RW(kexec_crash_size);
#endif /* CONFIG_CRASH_DUMP*/
#endif /* CONFIG_KEXEC_CORE */ #endif /* CONFIG_KEXEC_CORE */
#ifdef CONFIG_VMCORE_INFO #ifdef CONFIG_VMCORE_INFO
@ -262,9 +264,11 @@ static struct attribute * kernel_attrs[] = {
#endif #endif
#ifdef CONFIG_KEXEC_CORE #ifdef CONFIG_KEXEC_CORE
&kexec_loaded_attr.attr, &kexec_loaded_attr.attr,
#ifdef CONFIG_CRASH_DUMP
&kexec_crash_loaded_attr.attr, &kexec_crash_loaded_attr.attr,
&kexec_crash_size_attr.attr, &kexec_crash_size_attr.attr,
#endif #endif
#endif
#ifdef CONFIG_VMCORE_INFO #ifdef CONFIG_VMCORE_INFO
&vmcoreinfo_attr.attr, &vmcoreinfo_attr.attr,
#ifdef CONFIG_CRASH_HOTPLUG #ifdef CONFIG_CRASH_HOTPLUG