From 6bbeb276b71f06c5267bfd154629b1bec82e7136 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 29 Apr 2019 08:23:18 +0800 Subject: [PATCH 1/9] x86/kexec: Add the EFI system tables and ACPI tables to the ident map Currently, only the whole physical memory is identity-mapped for the kexec kernel and the regions reserved by firmware are ignored. However, the recent addition of RSDP parsing in the decompression stage and especially: 33f0df8d843d ("x86/boot: Search for RSDP in the EFI tables") which tries to access EFI system tables and to dig out the RDSP address from there, becomes a problem because in certain configurations, they might not be mapped in the kexec'ed kernel's address space. What is more, this problem doesn't appear on all systems because the kexec kernel uses gigabyte pages to build the identity mapping. And the EFI system tables and ACPI tables can, depending on the system configuration, end up being mapped as part of all physical memory, if they share the same 1 GB area with the physical memory. Therefore, make sure they're always mapped. [ bp: productize half-baked patch: - rewrite commit message. - correct the map_acpi_tables() function name in the !ACPI case. ] Signed-off-by: Kairui Song Signed-off-by: Baoquan He Signed-off-by: Borislav Petkov Tested-by: Dirk van der Merwe Cc: dyoung@redhat.com Cc: fanc.fnst@cn.fujitsu.com Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: j-nomura@ce.jp.nec.com Cc: kexec@lists.infradead.org Cc: "Kirill A. Shutemov" Cc: Lianbo Jiang Cc: Tetsuo Handa Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190429002318.GA25400@MiWiFi-R3L-srv --- arch/x86/kernel/machine_kexec_64.c | 75 ++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index ceba408ea982..3c77bdf7b32a 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -29,6 +30,43 @@ #include #include +#ifdef CONFIG_ACPI +/* + * Used while adding mapping for ACPI tables. + * Can be reused when other iomem regions need be mapped + */ +struct init_pgtable_data { + struct x86_mapping_info *info; + pgd_t *level4p; +}; + +static int mem_region_callback(struct resource *res, void *arg) +{ + struct init_pgtable_data *data = arg; + unsigned long mstart, mend; + + mstart = res->start; + mend = mstart + resource_size(res) - 1; + + return kernel_ident_mapping_init(data->info, data->level4p, mstart, mend); +} + +static int +map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) +{ + unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY; + struct init_pgtable_data data; + + data.info = info; + data.level4p = level4p; + flags = IORESOURCE_MEM | IORESOURCE_BUSY; + return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, + &data, mem_region_callback); +} +#else +static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; } +#endif + #ifdef CONFIG_KEXEC_FILE const struct kexec_file_ops * const kexec_file_loaders[] = { &kexec_bzImage64_ops, @@ -36,6 +74,31 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { }; #endif +static int +map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p) +{ +#ifdef CONFIG_EFI + unsigned long mstart, mend; + + if (!efi_enabled(EFI_BOOT)) + return 0; + + mstart = (boot_params.efi_info.efi_systab | + ((u64)boot_params.efi_info.efi_systab_hi<<32)); + + if (efi_enabled(EFI_64BIT)) + mend = mstart + sizeof(efi_system_table_64_t); + else + mend = mstart + sizeof(efi_system_table_32_t); + + if (!mstart) + return 0; + + return kernel_ident_mapping_init(info, level4p, mstart, mend); +#endif + return 0; +} + static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.p4d); @@ -159,6 +222,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) return result; } + /* + * Prepare EFI systab and ACPI tables for kexec kernel since they are + * not covered by pfn_mapped. + */ + result = map_efi_systab(&info, level4p); + if (result) + return result; + + result = map_acpi_tables(&info, level4p); + if (result) + return result; + return init_transition_pgtable(image, level4p); } From 0a23ebc66a46786769dd68bfdaa3102345819b9c Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Thu, 11 Apr 2019 15:49:32 +0200 Subject: [PATCH 2/9] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels Commit 3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params") broke kexec boot on EFI systems. efi_get_rsdp_addr() in the early parsing code tries to search RSDP from the EFI tables but that will crash because the table address is virtual when the kernel was booted by kexec (set_virtual_address_map() has run in the first kernel and cannot be run again in the second kernel). In the case of kexec, the physical address of EFI tables is provided via efi_setup_data in boot_params, which is set up by kexec(1). Factor out the table parsing code and use different pointers depending on whether the kernel is booted by kexec or not. [ bp: Massage. ] Fixes: 3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params") Signed-off-by: Jun'ichi Nomura Signed-off-by: Borislav Petkov Tested-by: Dirk van der Merwe Cc: Chao Fan Cc: Dave Young Link: https://lkml.kernel.org/r/20190408231011.GA5402@jeru.linux.bs1.fc.nec.co.jp --- arch/x86/boot/compressed/acpi.c | 143 ++++++++++++++++++++++++-------- 1 file changed, 107 insertions(+), 36 deletions(-) diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index ad84239e595e..15255f388a85 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -44,17 +44,109 @@ static acpi_physical_address get_acpi_rsdp(void) return addr; } -/* Search EFI system tables for RSDP. */ -static acpi_physical_address efi_get_rsdp_addr(void) +/* + * Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and + * ACPI_TABLE_GUID are found, take the former, which has more features. + */ +static acpi_physical_address +__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables, + bool efi_64) { acpi_physical_address rsdp_addr = 0; #ifdef CONFIG_EFI - unsigned long systab, systab_tables, config_tables; + int i; + + /* Get EFI tables from systab. */ + for (i = 0; i < nr_tables; i++) { + acpi_physical_address table; + efi_guid_t guid; + + if (efi_64) { + efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables + i; + + guid = tbl->guid; + table = tbl->table; + + if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) { + debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n"); + return 0; + } + } else { + efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables + i; + + guid = tbl->guid; + table = tbl->table; + } + + if (!(efi_guidcmp(guid, ACPI_TABLE_GUID))) + rsdp_addr = table; + else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID))) + return table; + } +#endif + return rsdp_addr; +} + +/* EFI/kexec support is 64-bit only. */ +#ifdef CONFIG_X86_64 +static struct efi_setup_data *get_kexec_setup_data_addr(void) +{ + struct setup_data *data; + u64 pa_data; + + pa_data = boot_params->hdr.setup_data; + while (pa_data) { + data = (struct setup_data *)pa_data; + if (data->type == SETUP_EFI) + return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data)); + + pa_data = data->next; + } + return NULL; +} + +static acpi_physical_address kexec_get_rsdp_addr(void) +{ + efi_system_table_64_t *systab; + struct efi_setup_data *esd; + struct efi_info *ei; + char *sig; + + esd = (struct efi_setup_data *)get_kexec_setup_data_addr(); + if (!esd) + return 0; + + if (!esd->tables) { + debug_putstr("Wrong kexec SETUP_EFI data.\n"); + return 0; + } + + ei = &boot_params->efi_info; + sig = (char *)&ei->efi_loader_signature; + if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) { + debug_putstr("Wrong kexec EFI loader signature.\n"); + return 0; + } + + /* Get systab from boot params. */ + systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32)); + if (!systab) + error("EFI system table not found in kexec boot_params."); + + return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true); +} +#else +static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; } +#endif /* CONFIG_X86_64 */ + +static acpi_physical_address efi_get_rsdp_addr(void) +{ +#ifdef CONFIG_EFI + unsigned long systab, config_tables; unsigned int nr_tables; struct efi_info *ei; bool efi_64; - int size, i; char *sig; ei = &boot_params->efi_info; @@ -88,49 +180,20 @@ static acpi_physical_address efi_get_rsdp_addr(void) config_tables = stbl->tables; nr_tables = stbl->nr_tables; - size = sizeof(efi_config_table_64_t); } else { efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab; config_tables = stbl->tables; nr_tables = stbl->nr_tables; - size = sizeof(efi_config_table_32_t); } if (!config_tables) error("EFI config tables not found."); - /* Get EFI tables from systab. */ - for (i = 0; i < nr_tables; i++) { - acpi_physical_address table; - efi_guid_t guid; - - config_tables += size; - - if (efi_64) { - efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables; - - guid = tbl->guid; - table = tbl->table; - - if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) { - debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n"); - return 0; - } - } else { - efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables; - - guid = tbl->guid; - table = tbl->table; - } - - if (!(efi_guidcmp(guid, ACPI_TABLE_GUID))) - rsdp_addr = table; - else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID))) - return table; - } + return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64); +#else + return 0; #endif - return rsdp_addr; } static u8 compute_checksum(u8 *buffer, u32 length) @@ -220,6 +283,14 @@ acpi_physical_address get_rsdp_addr(void) if (!pa) pa = boot_params->acpi_rsdp_addr; + /* + * Try to get EFI data from setup_data. This can happen when we're a + * kexec'ed kernel and kexec(1) has passed all the required EFI info to + * us. + */ + if (!pa) + pa = kexec_get_rsdp_addr(); + if (!pa) pa = efi_get_rsdp_addr(); From 8e44c7840479edacc4a03d396c8d214576c8d411 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 30 Apr 2019 20:38:34 +0200 Subject: [PATCH 3/9] Revert "x86/boot: Disable RSDP parsing temporarily" TODO: - ask dyoung and Dirk van der Merwe to test again. This reverts commit 36f0c423552dacaca152324b8e9bda42a6d88865. Now that the required fixes are in place, reenable early RSDP parsing. Signed-off-by: Borislav Petkov Cc: Baoquan He Cc: Chao Fan Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: indou.takao@jp.fujitsu.com Cc: Ingo Molnar Cc: Juergen Gross Cc: kasong@redhat.com Cc: Kees Cook Cc: "Kirill A. Shutemov" Cc: msys.mizuma@gmail.com Cc: Thomas Gleixner Cc: Tom Lendacky Cc: x86-ml --- arch/x86/boot/compressed/misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 5a237e8dbf8d..c0d6c560df69 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -352,7 +352,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, boot_params->hdr.loadflags &= ~KASLR_FLAG; /* Save RSDP address for later use. */ - /* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */ + boot_params->acpi_rsdp_addr = get_rsdp_addr(); sanitize_boot_params(boot_params); From 5b51ae969e3d8ab0134ee3c98a769ad6d2cc2e24 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 17 May 2019 15:45:08 +0200 Subject: [PATCH 4/9] x86/boot: Call get_rsdp_addr() after console_init() ... so that early debugging output from the RSDP parsing code can be visible and collected. Suggested-by: Dave Young Signed-off-by: Borislav Petkov Cc: Baoquan He Cc: Chao Fan Cc: Jun'ichi Nomura Cc: Kairui Song Cc: kexec@lists.infradead.org Cc: x86@kernel.org --- arch/x86/boot/compressed/misc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index c0d6c560df69..24e65a0f756d 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -351,9 +351,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, /* Clear flags intended for solely in-kernel use. */ boot_params->hdr.loadflags &= ~KASLR_FLAG; - /* Save RSDP address for later use. */ - boot_params->acpi_rsdp_addr = get_rsdp_addr(); - sanitize_boot_params(boot_params); if (boot_params->screen_info.orig_video_mode == 7) { @@ -368,6 +365,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, cols = boot_params->screen_info.orig_video_cols; console_init(); + + /* + * Save RSDP address for later use. Have this after console_init() + * so that early debugging output from the RSDP parsing code can be + * collected. + */ + boot_params->acpi_rsdp_addr = get_rsdp_addr(); + debug_putstr("early console in extract_kernel\n"); free_mem_ptr = heap; /* Heap */ From 5a949b38839e284b1307540c56b03caf57da9736 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 10 Jun 2019 15:36:17 +0800 Subject: [PATCH 5/9] x86/kexec: Add the ACPI NVS region to the ident map With the recent addition of RSDP parsing in the decompression stage, a kexec-ed kernel now needs ACPI tables to be covered by the identity mapping. And in commit 6bbeb276b71f ("x86/kexec: Add the EFI system tables and ACPI tables to the ident map") the ACPI tables memory region was added to the ident map. But some machines have only an ACPI NVS memory region and the ACPI tables are located in that region. In such case, the kexec-ed kernel will still fail when trying to access ACPI tables if they're not mapped. So add the NVS memory region to the ident map as well. [ bp: Massage. ] Fixes: 6bbeb276b71f ("x86/kexec: Add the EFI system tables and ACPI tables to the ident map") Suggested-by: Junichi Nomura Signed-off-by: Kairui Song Signed-off-by: Borislav Petkov Tested-by: Junichi Nomura Cc: Baoquan He Cc: Chao Fan Cc: Dave Young Cc: Dirk van der Merwe Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: kexec@lists.infradead.org Cc: Lianbo Jiang Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190610073617.19767-1-kasong@redhat.com --- arch/x86/kernel/machine_kexec_64.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 3c77bdf7b32a..b2b88dcaaf88 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -54,14 +54,26 @@ static int mem_region_callback(struct resource *res, void *arg) static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { - unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY; struct init_pgtable_data data; + unsigned long flags; + int ret; data.info = info; data.level4p = level4p; flags = IORESOURCE_MEM | IORESOURCE_BUSY; - return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, - &data, mem_region_callback); + + ret = walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, + &data, mem_region_callback); + if (ret && ret != -EINVAL) + return ret; + + /* ACPI tables could be located in ACPI Non-volatile Storage region */ + ret = walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, + &data, mem_region_callback); + if (ret && ret != -EINVAL) + return ret; + + return 0; } #else static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; } From 2238246ff8d533a5f2327d1f953375876d8a013c Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 27 Jun 2019 12:55:25 +0800 Subject: [PATCH 6/9] x86/boot: Make the GDT 8-byte aligned The segment descriptors are loaded with an implicitly LOCK-ed instruction, which could trigger the split lock #AC exception if the variable is not properly aligned and crosses a cache line. Align the GDT properly so the descriptors are all 8 byte aligned. Signed-off-by: Xiaoyao Li Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Fenghua Yu Link: https://lkml.kernel.org/r/20190627045525.105266-1-xiaoyao.li@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/head_64.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fafb75c6c592..6233ae35d0d9 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -659,6 +659,7 @@ no_longmode: gdt64: .word gdt_end - gdt .quad 0 + .balign 8 gdt: .word gdt_end - gdt .long gdt From f2d08c5d3bcf3f7ef788af122b57a919efa1e9d0 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 24 May 2019 15:38:08 +0800 Subject: [PATCH 7/9] x86/boot: Add xloadflags bits to check for 5-level paging support The current kernel supports 5-level paging mode, and supports dynamically choosing the paging mode during bootup depending on the kernel image, hardware and kernel parameter settings. This flexibility brings several issues to kexec/kdump: 1) Dynamic switching between paging modes requires support in the target kernel. This means kexec from a 5-level paging kernel into a kernel which does not support mode switching is not possible. So the loader needs to be able to analyze the supported paging modes of the kexec target kernel. 2) If running on a 5-level paging kernel and the kexec target kernel is a 4-level paging kernel, the target immage cannot be loaded above the 64TB address space limit. But the kexec loader searches for a load area from top to bottom which would eventually put the target kernel above 64TB when the machine has large enough RAM size. So the loader needs to be able to analyze the paging mode of the target kernel to load it at a suitable spot in the address space. Solution: Add two bits XLF_5LEVEL and XLF_5LEVEL_ENABLED: - Bit XLF_5LEVEL indicates whether 5-level paging mode switching support is available. (Issue #1) - Bit XLF_5LEVEL_ENABLED indicates whether the kernel was compiled with full 5-level paging support (CONFIG_X86_5LEVEL=y). (Issue #2) The loader will use these bits to verify whether the target kernel is suitable to be kexec'ed to from a 5-level paging kernel and to determine the constraints of the target kernel load address. The flags will be used by the kernel kexec subsystem and the userspace kexec tools. [ tglx: Massaged changelog ] Signed-off-by: Baoquan He Signed-off-by: Thomas Gleixner Acked-by: Kirill A. Shutemov Cc: bp@alien8.de Cc: hpa@zytor.com Cc: dyoung@redhat.com Link: https://lkml.kernel.org/r/20190524073810.24298-2-bhe@redhat.com --- arch/x86/boot/header.S | 12 +++++++++++- arch/x86/include/uapi/asm/bootparam.h | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 850b8762e889..be19f4199727 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -419,7 +419,17 @@ xloadflags: # define XLF4 0 #endif - .word XLF0 | XLF1 | XLF23 | XLF4 +#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_5LEVEL +#define XLF56 (XLF_5LEVEL|XLF_5LEVEL_ENABLED) +#else +#define XLF56 XLF_5LEVEL +#endif +#else +#define XLF56 0 +#endif + + .word XLF0 | XLF1 | XLF23 | XLF4 | XLF56 cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, #added with boot protocol diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 60733f137e9a..c895df5482c5 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -29,6 +29,8 @@ #define XLF_EFI_HANDOVER_32 (1<<2) #define XLF_EFI_HANDOVER_64 (1<<3) #define XLF_EFI_KEXEC (1<<4) +#define XLF_5LEVEL (1<<5) +#define XLF_5LEVEL_ENABLED (1<<6) #ifndef __ASSEMBLY__ From ee338b9ee2822e65a85750da6129946c14962410 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 24 May 2019 15:38:09 +0800 Subject: [PATCH 8/9] x86/kexec/64: Prevent kexec from 5-level paging to a 4-level only kernel If the running kernel has 5-level paging activated, the 5-level paging mode is preserved across kexec. If the kexec'ed kernel does not contain support for handling active 5-level paging mode in the decompressor, the decompressor will crash with #GP. Prevent this situation at load time. If 5-level paging is active, check the xloadflags whether the kexec kernel can handle 5-level paging at least in the decompressor. If not, reject the load attempt and print out an error message. Signed-off-by: Baoquan He Signed-off-by: Thomas Gleixner Acked-by: Kirill A. Shutemov Cc: bp@alien8.de Cc: hpa@zytor.com Cc: dyoung@redhat.com Link: https://lkml.kernel.org/r/20190524073810.24298-3-bhe@redhat.com --- arch/x86/kernel/kexec-bzimage64.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 22f60dd26460..7f439739ea3d 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -321,6 +321,11 @@ static int bzImage64_probe(const char *buf, unsigned long len) return ret; } + if (!(header->xloadflags & XLF_5LEVEL) && pgtable_l5_enabled()) { + pr_err("bzImage cannot handle 5-level paging mode.\n"); + return ret; + } + /* I've got a bzImage */ pr_debug("It's a relocatable bzImage64\n"); ret = 0; From 8ff80fbe7e9870078b1cc3c2cdd8f3f223b333a9 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 24 May 2019 15:38:10 +0800 Subject: [PATCH 9/9] x86/kdump/64: Restrict kdump kernel reservation to <64TB Restrict kdump to only reserve crashkernel below 64TB. The reaons is that the kdump may jump from a 5-level paging mode to a 4-level paging mode kernel. If a 4-level paging mode kdump kernel is put above 64TB, then the kdump kernel cannot start. The 1st kernel reserves the kdump kernel region during bootup. At that point it is not known whether the kdump kernel has 5-level or 4-level paging support. To support both restrict the kdump kernel reservation to the lower 64TB address space to ensure that a 4-level paging mode kdump kernel can be loaded and successfully started. [ tglx: Massaged changelog ] Signed-off-by: Baoquan He Signed-off-by: Thomas Gleixner Acked-by: Kirill A. Shutemov Acked-by: Dave Young Cc: bp@alien8.de Cc: hpa@zytor.com Link: https://lkml.kernel.org/r/20190524073810.24298-4-bhe@redhat.com --- arch/x86/kernel/setup.c | 15 ++++++++++++--- include/linux/sizes.h | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 08a5f4a131f5..dcbdf54fb5c1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -453,15 +453,24 @@ static void __init memblock_x86_reserve_range_setup_data(void) #define CRASH_ALIGN SZ_16M /* - * Keep the crash kernel below this limit. On 32 bits earlier kernels - * would limit the kernel to the low 512 MiB due to mapping restrictions. + * Keep the crash kernel below this limit. + * + * On 32 bits earlier kernels would limit the kernel to the low 512 MiB + * due to mapping restrictions. + * + * On 64bit, kdump kernel need be restricted to be under 64TB, which is + * the upper limit of system RAM in 4-level paing mode. Since the kdump + * jumping could be from 5-level to 4-level, the jumping will fail if + * kernel is put above 64TB, and there's no way to detect the paging mode + * of the kernel which will be loaded for dumping during the 1st kernel + * bootup. */ #ifdef CONFIG_X86_32 # define CRASH_ADDR_LOW_MAX SZ_512M # define CRASH_ADDR_HIGH_MAX SZ_512M #else # define CRASH_ADDR_LOW_MAX SZ_4G -# define CRASH_ADDR_HIGH_MAX MAXMEM +# define CRASH_ADDR_HIGH_MAX SZ_64T #endif static int __init reserve_crashkernel_low(void) diff --git a/include/linux/sizes.h b/include/linux/sizes.h index fbde0bc7e882..8651269cb46c 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -47,5 +47,6 @@ #define SZ_2G 0x80000000 #define SZ_4G _AC(0x100000000, ULL) +#define SZ_64T _AC(0x400000000000, ULL) #endif /* __LINUX_SIZES_H__ */