x86/crash: add x86 crash hotplug support

When CPU or memory is hot un/plugged, or off/onlined, the crash
elfcorehdr, which describes the CPUs and memory in the system, must also
be updated.

A new elfcorehdr is generated from the available CPUs and memory and
replaces the existing elfcorehdr.  The segment containing the elfcorehdr
is identified at run-time in crash_core:crash_handle_hotplug_event().

No modifications to purgatory (see 'kexec: exclude elfcorehdr from the
segment digest') or boot_params (as the elfcorehdr= capture kernel command
line parameter pointer remains unchanged and correct) are needed, just
elfcorehdr.

For kexec_file_load(), the elfcorehdr segment size is based on NR_CPUS and
CRASH_MAX_MEMORY_RANGES in order to accommodate a growing number of CPU
and memory resources.

For kexec_load(), the userspace kexec utility needs to size the elfcorehdr
segment in the same/similar manner.

To accommodate kexec_load() syscall in the absence of kexec_file_load()
syscall support, prepare_elf_headers() and dependents are moved outside of
CONFIG_KEXEC_FILE.

[eric.devolder@oracle.com: correct unused function build error]
  Link: https://lkml.kernel.org/r/20230821182644.2143-1-eric.devolder@oracle.com
Link: https://lkml.kernel.org/r/20230814214446.6659-6-eric.devolder@oracle.com
Signed-off-by: Eric DeVolder <eric.devolder@oracle.com>
Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Acked-by: Hari Bathini <hbathini@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Akhil Raj <lf32.dev@gmail.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Mimi Zohar <zohar@linux.ibm.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Weißschuh <linux@weissschuh.net>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Eric DeVolder 2023-08-14 17:44:43 -04:00 committed by Andrew Morton
parent 88a6f89944
commit ea53ad9cf7
3 changed files with 116 additions and 7 deletions

View File

@ -2069,6 +2069,9 @@ config ARCH_SUPPORTS_KEXEC_JUMP
config ARCH_SUPPORTS_CRASH_DUMP
def_bool X86_64 || (X86_32 && HIGHMEM)
config ARCH_SUPPORTS_CRASH_HOTPLUG
def_bool y
config PHYSICAL_START
hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP)
default "0x1000000"

View File

@ -209,6 +209,21 @@ typedef void crash_vmclear_fn(void);
extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
extern void kdump_nmi_shootdown_cpus(void);
#ifdef CONFIG_CRASH_HOTPLUG
void arch_crash_handle_hotplug_event(struct kimage *image);
#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
#ifdef CONFIG_HOTPLUG_CPU
static inline int crash_hotplug_cpu_support(void) { return 1; }
#define crash_hotplug_cpu_support crash_hotplug_cpu_support
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
static inline int crash_hotplug_memory_support(void) { return 1; }
#define crash_hotplug_memory_support crash_hotplug_memory_support
#endif
#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_KEXEC_H */

View File

@ -158,8 +158,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
crash_save_cpu(regs, safe_smp_processor_id());
}
#ifdef CONFIG_KEXEC_FILE
#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_HOTPLUG)
static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
{
unsigned int *nr_ranges = arg;
@ -231,7 +230,7 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
/* Prepare elf headers. Return addr and size */
static int prepare_elf_headers(struct kimage *image, void **addr,
unsigned long *sz)
unsigned long *sz, unsigned long *nr_mem_ranges)
{
struct crash_mem *cmem;
int ret;
@ -249,6 +248,9 @@ static int prepare_elf_headers(struct kimage *image, void **addr,
if (ret)
goto out;
/* Return the computed number of memory ranges, for hotplug usage */
*nr_mem_ranges = cmem->nr_ranges;
/* By default prepare 64bit headers */
ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz);
@ -256,7 +258,9 @@ out:
vfree(cmem);
return ret;
}
#endif
#ifdef CONFIG_KEXEC_FILE
static int add_e820_entry(struct boot_params *params, struct e820_entry *entry)
{
unsigned int nr_e820_entries;
@ -371,18 +375,42 @@ out:
int crash_load_segments(struct kimage *image)
{
int ret;
unsigned long pnum = 0;
struct kexec_buf kbuf = { .image = image, .buf_min = 0,
.buf_max = ULONG_MAX, .top_down = false };
/* Prepare elf headers and add a segment */
ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz);
ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz, &pnum);
if (ret)
return ret;
image->elf_headers = kbuf.buffer;
image->elf_headers_sz = kbuf.bufsz;
image->elf_headers = kbuf.buffer;
image->elf_headers_sz = kbuf.bufsz;
kbuf.memsz = kbuf.bufsz;
#ifdef CONFIG_CRASH_HOTPLUG
/*
* The elfcorehdr segment size accounts for VMCOREINFO, kernel_map,
* maximum CPUs and maximum memory ranges.
*/
if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
pnum = 2 + CONFIG_NR_CPUS_DEFAULT + CONFIG_CRASH_MAX_MEMORY_RANGES;
else
pnum += 2 + CONFIG_NR_CPUS_DEFAULT;
if (pnum < (unsigned long)PN_XNUM) {
kbuf.memsz = pnum * sizeof(Elf64_Phdr);
kbuf.memsz += sizeof(Elf64_Ehdr);
image->elfcorehdr_index = image->nr_segments;
/* Mark as usable to crash kernel, else crash kernel fails on boot */
image->elf_headers_sz = kbuf.memsz;
} else {
pr_err("number of Phdrs %lu exceeds max\n", pnum);
}
#endif
kbuf.memsz = kbuf.bufsz;
kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
@ -395,3 +423,66 @@ int crash_load_segments(struct kimage *image)
return ret;
}
#endif /* CONFIG_KEXEC_FILE */
#ifdef CONFIG_CRASH_HOTPLUG
#undef pr_fmt
#define pr_fmt(fmt) "crash hp: " fmt
/**
* arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes
* @image: a pointer to kexec_crash_image
*
* Prepare the new elfcorehdr and replace the existing elfcorehdr.
*/
void arch_crash_handle_hotplug_event(struct kimage *image)
{
void *elfbuf = NULL, *old_elfcorehdr;
unsigned long nr_mem_ranges;
unsigned long mem, memsz;
unsigned long elfsz = 0;
/*
* Create the new elfcorehdr reflecting the changes to CPU and/or
* memory resources.
*/
if (prepare_elf_headers(image, &elfbuf, &elfsz, &nr_mem_ranges)) {
pr_err("unable to create new elfcorehdr");
goto out;
}
/*
* Obtain address and size of the elfcorehdr segment, and
* check it against the new elfcorehdr buffer.
*/
mem = image->segment[image->elfcorehdr_index].mem;
memsz = image->segment[image->elfcorehdr_index].memsz;
if (elfsz > memsz) {
pr_err("update elfcorehdr elfsz %lu > memsz %lu",
elfsz, memsz);
goto out;
}
/*
* Copy new elfcorehdr over the old elfcorehdr at destination.
*/
old_elfcorehdr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT));
if (!old_elfcorehdr) {
pr_err("mapping elfcorehdr segment failed\n");
goto out;
}
/*
* Temporarily invalidate the crash image while the
* elfcorehdr is updated.
*/
xchg(&kexec_crash_image, NULL);
memcpy_flushcache(old_elfcorehdr, elfbuf, elfsz);
xchg(&kexec_crash_image, image);
kunmap_local(old_elfcorehdr);
pr_debug("updated elfcorehdr\n");
out:
vfree(elfbuf);
}
#endif