arm64: Do not defer reserve_crashkernel() for platforms with no DMA memory zones

The following patches resulted in deferring crash kernel reservation to
mem_init(), mainly aimed at platforms with DMA memory zones (no IOMMU),
in particular Raspberry Pi 4.

commit 1a8e1cef76 ("arm64: use both ZONE_DMA and ZONE_DMA32")
commit 8424ecdde7 ("arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges")
commit 0a30c53573 ("arm64: mm: Move reserve_crashkernel() into mem_init()")
commit 2687275a58 ("arm64: Force NO_BLOCK_MAPPINGS if crashkernel reservation is required")

Above changes introduced boot slowdown due to linear map creation for
all the memory banks with NO_BLOCK_MAPPINGS, see discussion[1].  The proposed
changes restore crash kernel reservation to earlier behavior thus avoids
slow boot, particularly for platforms with IOMMU (no DMA memory zones).

Tested changes to confirm no ~150ms boot slowdown on our SoC with IOMMU
and 8GB memory.  Also tested with ZONE_DMA and/or ZONE_DMA32 configs to confirm
no regression to deferring scheme of crash kernel memory reservation.
In both cases successfully collected kernel crash dump.

[1] https://lore.kernel.org/all/9436d033-579b-55fa-9b00-6f4b661c2dd7@linux.microsoft.com/

Signed-off-by: Vijay Balakrishna <vijayb@linux.microsoft.com>
Cc: stable@vger.kernel.org
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Link: https://lore.kernel.org/r/1646242689-20744-1-git-send-email-vijayb@linux.microsoft.com
[will: Add #ifdef CONFIG_KEXEC_CORE guards to fix 'crashk_res' references in allnoconfig build]
Signed-off-by: Will Deacon <will@kernel.org>
This commit is contained in:
Vijay Balakrishna 2022-03-02 09:38:09 -08:00 committed by Will Deacon
parent 614c0b9fee
commit 031495635b
2 changed files with 63 additions and 5 deletions

View File

@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr);
* unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
* In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
* otherwise it is empty.
*
* Memory reservation for crash kernel either done early or deferred
* depending on DMA memory zones configs (ZONE_DMA) --
*
* In absence of ZONE_DMA configs arm64_dma_phys_limit initialized
* here instead of max_zone_phys(). This lets early reservation of
* crash kernel memory which has a dependency on arm64_dma_phys_limit.
* Reserving memory early for crash kernel allows linear creation of block
* mappings (greater than page-granularity) for all the memory bank rangs.
* In this scheme a comparatively quicker boot is observed.
*
* If ZONE_DMA configs are defined, crash kernel memory reservation
* is delayed until DMA zone memory range size initilazation performed in
* zone_sizes_init(). The defer is necessary to steer clear of DMA zone
* memory range to avoid overlap allocation. So crash kernel memory boundaries
* are not known when mapping all bank memory ranges, which otherwise means
* not possible to exclude crash kernel range from creating block mappings
* so page-granularity mappings are created for the entire memory range.
* Hence a slightly slower boot is observed.
*
* Note: Page-granularity mapppings are necessary for crash kernel memory
* range for shrinking its size via /sys/kernel/kexec_crash_size interface.
*/
phys_addr_t arm64_dma_phys_limit __ro_after_init;
#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)
phys_addr_t __ro_after_init arm64_dma_phys_limit;
#else
const phys_addr_t arm64_dma_phys_limit = PHYS_MASK + 1;
#endif
#ifdef CONFIG_KEXEC_CORE
/*
@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
if (!arm64_dma_phys_limit)
arm64_dma_phys_limit = dma32_phys_limit;
#endif
if (!arm64_dma_phys_limit)
arm64_dma_phys_limit = PHYS_MASK + 1;
max_zone_pfns[ZONE_NORMAL] = max;
free_area_init(max_zone_pfns);
@ -315,6 +339,9 @@ void __init arm64_memblock_init(void)
early_init_fdt_scan_reserved_mem();
if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32))
reserve_crashkernel();
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
}
@ -361,7 +388,8 @@ void __init bootmem_init(void)
* request_standard_resources() depends on crashkernel's memory being
* reserved, so do it here.
*/
reserve_crashkernel();
if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32))
reserve_crashkernel();
memblock_dump_all();
}

View File

@ -517,7 +517,7 @@ static void __init map_mem(pgd_t *pgdp)
*/
BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE))
if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE))
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*
@ -528,6 +528,17 @@ static void __init map_mem(pgd_t *pgdp)
*/
memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
#ifdef CONFIG_KEXEC_CORE
if (crash_mem_map) {
if (IS_ENABLED(CONFIG_ZONE_DMA) ||
IS_ENABLED(CONFIG_ZONE_DMA32))
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
else if (crashk_res.end)
memblock_mark_nomap(crashk_res.start,
resource_size(&crashk_res));
}
#endif
/* map all the memory banks */
for_each_mem_range(i, &start, &end) {
if (start >= end)
@ -554,6 +565,25 @@ static void __init map_mem(pgd_t *pgdp)
__map_memblock(pgdp, kernel_start, kernel_end,
PAGE_KERNEL, NO_CONT_MAPPINGS);
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
/*
* Use page-level mappings here so that we can shrink the region
* in page granularity and put back unused memory to buddy system
* through /sys/kernel/kexec_crash_size interface.
*/
#ifdef CONFIG_KEXEC_CORE
if (crash_mem_map &&
!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) {
if (crashk_res.end) {
__map_memblock(pgdp, crashk_res.start,
crashk_res.end + 1,
PAGE_KERNEL,
NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
memblock_clear_nomap(crashk_res.start,
resource_size(&crashk_res));
}
}
#endif
}
void mark_rodata_ro(void)