Merge branch 'for-next/kexec' into for-next/core

* for-next/kexec:
  arm64: trans_pgd: remove trans_pgd_map_page()
  arm64: kexec: remove cpu-reset.h
  arm64: kexec: remove the pre-kexec PoC maintenance
  arm64: kexec: keep MMU enabled during kexec relocation
  arm64: kexec: install a copy of the linear-map
  arm64: kexec: use ld script for relocation function
  arm64: kexec: relocate in EL1 mode
  arm64: kexec: configure EL2 vectors for kexec
  arm64: kexec: pass kimage as the only argument to relocation function
  arm64: kexec: Use dcache ops macros instead of open-coding
  arm64: kexec: skip relocation code for inplace kexec
  arm64: kexec: flush image and lists during kexec load time
  arm64: hibernate: abstract ttrb0 setup function
  arm64: trans_pgd: hibernate: Add trans_pgd_copy_el2_vectors
  arm64: kernel: add helper for booted at EL2 and not VHE
This commit is contained in:
Will Deacon 2021-10-29 12:24:47 +01:00
commit d8a2c0fba5
19 changed files with 357 additions and 340 deletions

View File

@ -1135,7 +1135,7 @@ config CRASH_DUMP
config TRANS_TABLE
def_bool y
depends on HIBERNATION
depends on HIBERNATION || KEXEC_CORE
config XEN_DOM0
def_bool y

View File

@ -380,19 +380,19 @@ alternative_endif
/*
* Macro to perform a data cache maintenance for the interval
* [start, end)
* [start, end) with dcache line size explicitly provided.
*
* op: operation passed to dc instruction
* domain: domain used in dsb instruciton
* start: starting virtual address of the region
* end: end virtual address of the region
* linesz: dcache line size
* fixup: optional label to branch to on user fault
* Corrupts: start, end, tmp1, tmp2
* Corrupts: start, end, tmp
*/
.macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
dcache_line_size \tmp1, \tmp2
sub \tmp2, \tmp1, #1
bic \start, \start, \tmp2
.macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup
sub \tmp, \linesz, #1
bic \start, \start, \tmp
.Ldcache_op\@:
.ifc \op, cvau
__dcache_op_workaround_clean_cache \op, \start
@ -411,7 +411,7 @@ alternative_endif
.endif
.endif
.endif
add \start, \start, \tmp1
add \start, \start, \linesz
cmp \start, \end
b.lo .Ldcache_op\@
dsb \domain
@ -419,6 +419,22 @@ alternative_endif
_cond_extable .Ldcache_op\@, \fixup
.endm
/*
* Macro to perform a data cache maintenance for the interval
* [start, end)
*
* op: operation passed to dc instruction
* domain: domain used in dsb instruciton
* start: starting virtual address of the region
* end: end virtual address of the region
* fixup: optional label to branch to on user fault
* Corrupts: start, end, tmp1, tmp2
*/
.macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
dcache_line_size \tmp1, \tmp2
dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup
.endm
/*
* Macro to perform an instruction cache maintenance for the interval
* [start, end)
@ -442,6 +458,25 @@ alternative_endif
_cond_extable .Licache_op\@, \fixup
.endm
/*
* To prevent the possibility of old and new partial table walks being visible
* in the tlb, switch the ttbr to a zero page when we invalidate the old
* records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
* Even switching to our copied tables will cause a changed output address at
* each stage of the walk.
*/
.macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2
phys_to_ttbr \tmp, \zero_page
msr ttbr1_el1, \tmp
isb
tlbi vmalle1
dsb nsh
phys_to_ttbr \tmp, \page_table
offset_ttbr1 \tmp, \tmp2
msr ttbr1_el1, \tmp
isb
.endm
/*
* reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
*/

View File

@ -90,12 +90,24 @@ static inline void crash_prepare_suspend(void) {}
static inline void crash_post_resume(void) {}
#endif
#if defined(CONFIG_KEXEC_CORE)
void cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
unsigned long arg0, unsigned long arg1,
unsigned long arg2);
#endif
#define ARCH_HAS_KIMAGE_ARCH
struct kimage_arch {
void *dtb;
phys_addr_t dtb_mem;
phys_addr_t kern_reloc;
phys_addr_t el2_vectors;
phys_addr_t ttbr0;
phys_addr_t ttbr1;
phys_addr_t zero_page;
unsigned long phys_offset;
unsigned long t0sz;
};
#ifdef CONFIG_KEXEC_FILE

View File

@ -115,6 +115,30 @@ static inline void cpu_install_idmap(void)
cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
}
/*
* Load our new page tables. A strict BBM approach requires that we ensure that
* TLBs are free of any entries that may overlap with the global mappings we are
* about to install.
*
* For a real hibernate/resume/kexec cycle TTBR0 currently points to a zero
* page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI runtime
* services), while for a userspace-driven test_resume cycle it points to
* userspace page tables (and we must point it at a zero page ourselves).
*
* We change T0SZ as part of installing the idmap. This is undone by
* cpu_uninstall_idmap() in __cpu_suspend_exit().
*/
static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
{
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
__cpu_set_tcr_t0sz(t0sz);
/* avoid cpu_switch_mm() and its SW-PAN and CNP interactions */
write_sysreg(ttbr0, ttbr0_el1);
isb();
}
/*
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
* avoiding the possibility of conflicting TLB entries being allocated.

View File

@ -21,5 +21,6 @@ extern char __exittext_begin[], __exittext_end[];
extern char __irqentry_text_start[], __irqentry_text_end[];
extern char __mmuoff_data_start[], __mmuoff_data_end[];
extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[];
#endif /* __ASM_SECTIONS_H */

View File

@ -1,8 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2020, Microsoft Corporation.
* Pavel Tatashin <pasha.tatashin@soleen.com>
* Copyright (c) 2021, Microsoft Corporation.
* Pasha Tatashin <pasha.tatashin@soleen.com>
*/
#ifndef _ASM_TRANS_TABLE_H
@ -15,7 +15,7 @@
/*
* trans_alloc_page
* - Allocator that should return exactly one zeroed page, if this
* allocator fails, trans_pgd_create_copy() and trans_pgd_map_page()
* allocator fails, trans_pgd_create_copy() and trans_pgd_idmap_page()
* return -ENOMEM error.
*
* trans_alloc_arg
@ -30,10 +30,12 @@ struct trans_pgd_info {
int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd,
unsigned long start, unsigned long end);
int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd,
void *page, unsigned long dst_addr, pgprot_t pgprot);
int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
unsigned long *t0sz, void *page);
int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info,
phys_addr_t *el2_vectors);
extern char trans_pgd_stub_vectors[];
#endif /* _ASM_TRANS_TABLE_H */

View File

@ -67,6 +67,8 @@
*/
extern u32 __boot_cpu_mode[2];
#define ARM64_VECTOR_TABLE_LEN SZ_2K
void __hyp_set_vectors(phys_addr_t phys_vector_base);
void __hyp_reset_vectors(void);
@ -128,6 +130,11 @@ static __always_inline bool is_protected_kvm_enabled(void)
return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE);
}
static inline bool is_hyp_nvhe(void)
{
return is_hyp_mode_available() && !is_kernel_in_hyp_mode();
}
#endif /* __ASSEMBLY__ */
#endif /* ! __ASM__VIRT_H */

View File

@ -9,6 +9,7 @@
#include <linux/arm_sdei.h>
#include <linux/sched.h>
#include <linux/kexec.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
@ -170,6 +171,16 @@ int main(void)
DEFINE(PTRAUTH_KERNEL_KEY_APIA, offsetof(struct ptrauth_keys_kernel, apia));
#endif
BLANK();
#endif
#ifdef CONFIG_KEXEC_CORE
DEFINE(KIMAGE_ARCH_DTB_MEM, offsetof(struct kimage, arch.dtb_mem));
DEFINE(KIMAGE_ARCH_EL2_VECTORS, offsetof(struct kimage, arch.el2_vectors));
DEFINE(KIMAGE_ARCH_ZERO_PAGE, offsetof(struct kimage, arch.zero_page));
DEFINE(KIMAGE_ARCH_PHYS_OFFSET, offsetof(struct kimage, arch.phys_offset));
DEFINE(KIMAGE_ARCH_TTBR1, offsetof(struct kimage, arch.ttbr1));
DEFINE(KIMAGE_HEAD, offsetof(struct kimage, head));
DEFINE(KIMAGE_START, offsetof(struct kimage, start));
BLANK();
#endif
return 0;
}

View File

@ -16,8 +16,7 @@
.pushsection .idmap.text, "awx"
/*
* __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for
* cpu_soft_restart.
* cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2)
*
* @el2_switch: Flag to indicate a switch to EL2 is needed.
* @entry: Location to jump to for soft reset.
@ -29,7 +28,7 @@
* branch to what would be the reset vector. It must be executed with the
* flat identity mapping.
*/
SYM_CODE_START(__cpu_soft_restart)
SYM_CODE_START(cpu_soft_restart)
mov_q x12, INIT_SCTLR_EL1_MMU_OFF
pre_disable_mmu_workaround
/*
@ -48,6 +47,6 @@ SYM_CODE_START(__cpu_soft_restart)
mov x1, x3 // arg1
mov x2, x4 // arg2
br x8
SYM_CODE_END(__cpu_soft_restart)
SYM_CODE_END(cpu_soft_restart)
.popsection

View File

@ -1,32 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* CPU reset routines
*
* Copyright (C) 2015 Huawei Futurewei Technologies.
*/
#ifndef _ARM64_CPU_RESET_H
#define _ARM64_CPU_RESET_H
#include <asm/virt.h>
void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
unsigned long arg0, unsigned long arg1, unsigned long arg2);
static inline void __noreturn __nocfi cpu_soft_restart(unsigned long entry,
unsigned long arg0,
unsigned long arg1,
unsigned long arg2)
{
typeof(__cpu_soft_restart) *restart;
unsigned long el2_switch = !is_kernel_in_hyp_mode() &&
is_hyp_mode_available();
restart = (void *)__pa_symbol(function_nocfi(__cpu_soft_restart));
cpu_install_idmap();
restart(el2_switch, entry, arg0, arg1, arg2);
unreachable();
}
#endif

View File

@ -15,26 +15,6 @@
#include <asm/page.h>
#include <asm/virt.h>
/*
* To prevent the possibility of old and new partial table walks being visible
* in the tlb, switch the ttbr to a zero page when we invalidate the old
* records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
* Even switching to our copied tables will cause a changed output address at
* each stage of the walk.
*/
.macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2
phys_to_ttbr \tmp, \zero_page
msr ttbr1_el1, \tmp
isb
tlbi vmalle1
dsb nsh
phys_to_ttbr \tmp, \page_table
offset_ttbr1 \tmp, \tmp2
msr ttbr1_el1, \tmp
isb
.endm
/*
* Resume from hibernate
*
@ -112,56 +92,4 @@ alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
hvc #0
3: ret
SYM_CODE_END(swsusp_arch_suspend_exit)
/*
* Restore the hyp stub.
* This must be done before the hibernate page is unmapped by _cpu_resume(),
* but happens before any of the hyp-stub's code is cleaned to PoC.
*
* x24: The physical address of __hyp_stub_vectors
*/
SYM_CODE_START_LOCAL(el1_sync)
msr vbar_el2, x24
eret
SYM_CODE_END(el1_sync)
.macro invalid_vector label
SYM_CODE_START_LOCAL(\label)
b \label
SYM_CODE_END(\label)
.endm
invalid_vector el2_sync_invalid
invalid_vector el2_irq_invalid
invalid_vector el2_fiq_invalid
invalid_vector el2_error_invalid
invalid_vector el1_sync_invalid
invalid_vector el1_irq_invalid
invalid_vector el1_fiq_invalid
invalid_vector el1_error_invalid
/* el2 vectors - switch el2 here while we restore the memory image. */
.align 11
SYM_CODE_START(hibernate_el2_vectors)
ventry el2_sync_invalid // Synchronous EL2t
ventry el2_irq_invalid // IRQ EL2t
ventry el2_fiq_invalid // FIQ EL2t
ventry el2_error_invalid // Error EL2t
ventry el2_sync_invalid // Synchronous EL2h
ventry el2_irq_invalid // IRQ EL2h
ventry el2_fiq_invalid // FIQ EL2h
ventry el2_error_invalid // Error EL2h
ventry el1_sync // Synchronous 64-bit EL1
ventry el1_irq_invalid // IRQ 64-bit EL1
ventry el1_fiq_invalid // FIQ 64-bit EL1
ventry el1_error_invalid // Error 64-bit EL1
ventry el1_sync_invalid // Synchronous 32-bit EL1
ventry el1_irq_invalid // IRQ 32-bit EL1
ventry el1_fiq_invalid // FIQ 32-bit EL1
ventry el1_error_invalid // Error 32-bit EL1
SYM_CODE_END(hibernate_el2_vectors)
.popsection

View File

@ -49,10 +49,7 @@
extern int in_suspend;
/* Do we need to reset el2? */
#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
/* temporary el2 vectors in the __hibernate_exit_text section. */
extern char hibernate_el2_vectors[];
#define el2_reset_needed() (is_hyp_nvhe())
/* hyp-stub vectors, used to restore el2 during resume from hibernate. */
extern char __hyp_stub_vectors[];
@ -215,26 +212,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
if (rc)
return rc;
/*
* Load our new page tables. A strict BBM approach requires that we
* ensure that TLBs are free of any entries that may overlap with the
* global mappings we are about to install.
*
* For a real hibernate/resume cycle TTBR0 currently points to a zero
* page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
* runtime services), while for a userspace-driven test_resume cycle it
* points to userspace page tables (and we must point it at a zero page
* ourselves).
*
* We change T0SZ as part of installing the idmap. This is undone by
* cpu_uninstall_idmap() in __cpu_suspend_exit().
*/
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
__cpu_set_tcr_t0sz(t0sz);
write_sysreg(trans_ttbr0, ttbr0_el1);
isb();
cpu_install_ttbr0(trans_ttbr0, t0sz);
*phys_dst_addr = virt_to_phys(page);
return 0;
@ -434,6 +412,7 @@ int swsusp_arch_resume(void)
void *zero_page;
size_t exit_size;
pgd_t *tmp_pg_dir;
phys_addr_t el2_vectors;
void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
void *, phys_addr_t, phys_addr_t);
struct trans_pgd_info trans_info = {
@ -461,6 +440,14 @@ int swsusp_arch_resume(void)
return -ENOMEM;
}
if (el2_reset_needed()) {
rc = trans_pgd_copy_el2_vectors(&trans_info, &el2_vectors);
if (rc) {
pr_err("Failed to setup el2 vectors\n");
return rc;
}
}
exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
/*
* Copy swsusp_arch_suspend_exit() to a safe page. This will generate
@ -473,26 +460,14 @@ int swsusp_arch_resume(void)
return rc;
}
/*
* The hibernate exit text contains a set of el2 vectors, that will
* be executed at el2 with the mmu off in order to reload hyp-stub.
*/
dcache_clean_inval_poc((unsigned long)hibernate_exit,
(unsigned long)hibernate_exit + exit_size);
/*
* KASLR will cause the el2 vectors to be in a different location in
* the resumed kernel. Load hibernate's temporary copy into el2.
*
* We can skip this step if we booted at EL1, or are running with VHE.
*/
if (el2_reset_needed()) {
phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit;
el2_vectors += hibernate_el2_vectors -
__hibernate_exit_text_start; /* offset */
if (el2_reset_needed())
__hyp_set_vectors(el2_vectors);
}
hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
resume_hdr.reenter_kernel, restore_pblist,

View File

@ -21,12 +21,8 @@
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
#include "cpu-reset.h"
/* Global variables for the arm64_relocate_new_kernel routine. */
extern const unsigned char arm64_relocate_new_kernel[];
extern const unsigned long arm64_relocate_new_kernel_size;
#include <asm/sections.h>
#include <asm/trans_pgd.h>
/**
* kexec_image_info - For debugging output.
@ -43,7 +39,9 @@ static void _kexec_image_info(const char *func, int line,
pr_debug(" start: %lx\n", kimage->start);
pr_debug(" head: %lx\n", kimage->head);
pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
pr_debug(" dtb_mem: %pa\n", &kimage->arch.dtb_mem);
pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
pr_debug(" el2_vectors: %pa\n", &kimage->arch.el2_vectors);
for (i = 0; i < kimage->nr_segments; i++) {
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
@ -60,29 +58,6 @@ void machine_kexec_cleanup(struct kimage *kimage)
/* Empty routine needed to avoid build errors. */
}
int machine_kexec_post_load(struct kimage *kimage)
{
void *reloc_code = page_to_virt(kimage->control_code_page);
memcpy(reloc_code, arm64_relocate_new_kernel,
arm64_relocate_new_kernel_size);
kimage->arch.kern_reloc = __pa(reloc_code);
kexec_image_info(kimage);
/*
* For execution with the MMU off, reloc_code needs to be cleaned to the
* PoC and invalidated from the I-cache.
*/
dcache_clean_inval_poc((unsigned long)reloc_code,
(unsigned long)reloc_code +
arm64_relocate_new_kernel_size);
icache_inval_pou((uintptr_t)reloc_code,
(uintptr_t)reloc_code +
arm64_relocate_new_kernel_size);
return 0;
}
/**
* machine_kexec_prepare - Prepare for a kexec reboot.
*
@ -100,45 +75,6 @@ int machine_kexec_prepare(struct kimage *kimage)
return 0;
}
/**
* kexec_list_flush - Helper to flush the kimage list and source pages to PoC.
*/
static void kexec_list_flush(struct kimage *kimage)
{
kimage_entry_t *entry;
for (entry = &kimage->head; ; entry++) {
unsigned int flag;
unsigned long addr;
/* flush the list entries. */
dcache_clean_inval_poc((unsigned long)entry,
(unsigned long)entry +
sizeof(kimage_entry_t));
flag = *entry & IND_FLAGS;
if (flag == IND_DONE)
break;
addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK);
switch (flag) {
case IND_INDIRECTION:
/* Set entry point just before the new list page. */
entry = (kimage_entry_t *)addr - 1;
break;
case IND_SOURCE:
/* flush the source pages. */
dcache_clean_inval_poc(addr, addr + PAGE_SIZE);
break;
case IND_DESTINATION:
break;
default:
BUG();
}
}
}
/**
* kexec_segment_flush - Helper to flush the kimage segments to PoC.
*/
@ -163,6 +99,75 @@ static void kexec_segment_flush(const struct kimage *kimage)
}
}
/* Allocates pages for kexec page table */
static void *kexec_page_alloc(void *arg)
{
struct kimage *kimage = (struct kimage *)arg;
struct page *page = kimage_alloc_control_pages(kimage, 0);
if (!page)
return NULL;
memset(page_address(page), 0, PAGE_SIZE);
return page_address(page);
}
int machine_kexec_post_load(struct kimage *kimage)
{
int rc;
pgd_t *trans_pgd;
void *reloc_code = page_to_virt(kimage->control_code_page);
long reloc_size;
struct trans_pgd_info info = {
.trans_alloc_page = kexec_page_alloc,
.trans_alloc_arg = kimage,
};
/* If in place, relocation is not used, only flush next kernel */
if (kimage->head & IND_DONE) {
kexec_segment_flush(kimage);
kexec_image_info(kimage);
return 0;
}
kimage->arch.el2_vectors = 0;
if (is_hyp_nvhe()) {
rc = trans_pgd_copy_el2_vectors(&info,
&kimage->arch.el2_vectors);
if (rc)
return rc;
}
/* Create a copy of the linear map */
trans_pgd = kexec_page_alloc(kimage);
if (!trans_pgd)
return -ENOMEM;
rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END);
if (rc)
return rc;
kimage->arch.ttbr1 = __pa(trans_pgd);
kimage->arch.zero_page = __pa(empty_zero_page);
reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start;
memcpy(reloc_code, __relocate_new_kernel_start, reloc_size);
kimage->arch.kern_reloc = __pa(reloc_code);
rc = trans_pgd_idmap_page(&info, &kimage->arch.ttbr0,
&kimage->arch.t0sz, reloc_code);
if (rc)
return rc;
kimage->arch.phys_offset = virt_to_phys(kimage) - (long)kimage;
/* Flush the reloc_code in preparation for its execution. */
dcache_clean_inval_poc((unsigned long)reloc_code,
(unsigned long)reloc_code + reloc_size);
icache_inval_pou((uintptr_t)reloc_code,
(uintptr_t)reloc_code + reloc_size);
kexec_image_info(kimage);
return 0;
}
/**
* machine_kexec - Do the kexec reboot.
*
@ -180,31 +185,35 @@ void machine_kexec(struct kimage *kimage)
WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
"Some CPUs may be stale, kdump will be unreliable.\n");
/* Flush the kimage list and its buffers. */
kexec_list_flush(kimage);
/* Flush the new image if already in place. */
if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE))
kexec_segment_flush(kimage);
pr_info("Bye!\n");
local_daif_mask();
/*
* cpu_soft_restart will shutdown the MMU, disable data caches, then
* transfer control to the kern_reloc which contains a copy of
* the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel
* uses physical addressing to relocate the new image to its final
* position and transfers control to the image entry point when the
* relocation is complete.
* Both restart and kernel_reloc will shutdown the MMU, disable data
* caches. However, restart will start new kernel or purgatory directly,
* kernel_reloc contains the body of arm64_relocate_new_kernel
* In kexec case, kimage->start points to purgatory assuming that
* kernel entry and dtb address are embedded in purgatory by
* userspace (kexec-tools).
* In kexec_file case, the kernel starts directly without purgatory.
*/
cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start,
kimage->arch.dtb_mem);
if (kimage->head & IND_DONE) {
typeof(cpu_soft_restart) *restart;
cpu_install_idmap();
restart = (void *)__pa_symbol(function_nocfi(cpu_soft_restart));
restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem,
0, 0);
} else {
void (*kernel_reloc)(struct kimage *kimage);
if (is_hyp_nvhe())
__hyp_set_vectors(kimage->arch.el2_vectors);
cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz);
kernel_reloc = (void *)kimage->arch.kern_reloc;
kernel_reloc(kimage);
}
BUG(); /* Should never get here. */
}
@ -261,8 +270,6 @@ void arch_kexec_protect_crashkres(void)
{
int i;
kexec_segment_flush(kexec_crash_image);
for (i = 0; i < kexec_crash_image->nr_segments; i++)
set_memory_valid(
__phys_to_virt(kexec_crash_image->segment[i].mem),

View File

@ -4,6 +4,8 @@
*
* Copyright (C) Linaro.
* Copyright (C) Huawei Futurewei Technologies.
* Copyright (C) 2021, Microsoft Corporation.
* Pasha Tatashin <pasha.tatashin@soleen.com>
*/
#include <linux/kexec.h>
@ -13,7 +15,16 @@
#include <asm/kexec.h>
#include <asm/page.h>
#include <asm/sysreg.h>
#include <asm/virt.h>
.macro turn_off_mmu tmp1, tmp2
mov_q \tmp1, INIT_SCTLR_EL1_MMU_OFF
pre_disable_mmu_workaround
msr sctlr_el1, \tmp1
isb
.endm
.section ".kexec_relocate.text", "ax"
/*
* arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
*
@ -27,33 +38,24 @@
*/
SYM_CODE_START(arm64_relocate_new_kernel)
/* Setup the list loop variables. */
mov x18, x2 /* x18 = dtb address */
mov x17, x1 /* x17 = kimage_start */
mov x16, x0 /* x16 = kimage_head */
mov x14, xzr /* x14 = entry ptr */
mov x13, xzr /* x13 = copy dest */
/* Check if the new image needs relocation. */
tbnz x16, IND_DONE_BIT, .Ldone
ldr x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */
ldr x17, [x0, #KIMAGE_ARCH_TTBR1] /* x17 = linear map copy */
ldr x16, [x0, #KIMAGE_HEAD] /* x16 = kimage_head */
ldr x22, [x0, #KIMAGE_ARCH_PHYS_OFFSET] /* x22 phys_offset */
raw_dcache_line_size x15, x1 /* x15 = dcache line size */
break_before_make_ttbr_switch x18, x17, x1, x2 /* set linear map */
.Lloop:
and x12, x16, PAGE_MASK /* x12 = addr */
sub x12, x12, x22 /* Convert x12 to virt */
/* Test the entry flags. */
.Ltest_source:
tbz x16, IND_SOURCE_BIT, .Ltest_indirection
/* Invalidate dest page to PoC. */
mov x2, x13
add x20, x2, #PAGE_SIZE
sub x1, x15, #1
bic x2, x2, x1
2: dc ivac, x2
add x2, x2, x15
cmp x2, x20
b.lo 2b
dsb sy
mov x19, x13
copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
add x1, x19, #PAGE_SIZE
dcache_by_myline_op civac, sy, x19, x1, x15, x20
b .Lnext
.Ltest_indirection:
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
@ -65,31 +67,26 @@ SYM_CODE_START(arm64_relocate_new_kernel)
.Lnext:
ldr x16, [x14], #8 /* entry = *ptr++ */
tbz x16, IND_DONE_BIT, .Lloop /* while (!(entry & DONE)) */
.Ldone:
/* wait for writes from copy_page to finish */
dsb nsh
ic iallu
dsb nsh
isb
ldr x4, [x0, #KIMAGE_START] /* relocation start */
ldr x1, [x0, #KIMAGE_ARCH_EL2_VECTORS] /* relocation start */
ldr x0, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */
turn_off_mmu x12, x13
/* Start new image. */
mov x0, x18
mov x1, xzr
cbz x1, .Lel1
mov x1, x4 /* relocation start */
mov x2, x0 /* dtb address */
mov x3, xzr
mov x4, xzr
mov x0, #HVC_SOFT_RESTART
hvc #0 /* Jumps from el2 */
.Lel1:
mov x2, xzr
mov x3, xzr
br x17
br x4 /* Jumps from el1 */
SYM_CODE_END(arm64_relocate_new_kernel)
.align 3 /* To keep the 64-bit values below naturally aligned. */
.Lcopy_end:
.org KEXEC_CONTROL_PAGE_SIZE
/*
* arm64_relocate_new_kernel_size - Number of bytes to copy to the
* control_code_page.
*/
.globl arm64_relocate_new_kernel_size
arm64_relocate_new_kernel_size:
.quad .Lcopy_end - arm64_relocate_new_kernel

View File

@ -202,7 +202,7 @@ unsigned long sdei_arch_get_entry_point(int conduit)
* dropped to EL1 because we don't support VHE, then we can't support
* SDEI.
*/
if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) {
if (is_hyp_nvhe()) {
pr_err("Not supported on this hardware/boot configuration\n");
goto out_err;
}

View File

@ -63,6 +63,7 @@
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
#include <asm/kernel-pgtable.h>
#include <asm/kexec.h>
#include <asm/memory.h>
#include <asm/page.h>
@ -100,6 +101,16 @@ jiffies = jiffies_64;
#define HIBERNATE_TEXT
#endif
#ifdef CONFIG_KEXEC_CORE
#define KEXEC_TEXT \
. = ALIGN(SZ_4K); \
__relocate_new_kernel_start = .; \
*(.kexec_relocate.text) \
__relocate_new_kernel_end = .;
#else
#define KEXEC_TEXT
#endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define TRAMP_TEXT \
. = ALIGN(PAGE_SIZE); \
@ -160,6 +171,7 @@ SECTIONS
HYPERVISOR_TEXT
IDMAP_TEXT
HIBERNATE_TEXT
KEXEC_TEXT
TRAMP_TEXT
*(.gnu.warning)
. = ALIGN(16);
@ -347,3 +359,10 @@ ASSERT(swapper_pg_dir - reserved_pg_dir == RESERVED_SWAPPER_OFFSET,
ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET,
"TRAMP_SWAPPER_OFFSET is wrong!")
#endif
#ifdef CONFIG_KEXEC_CORE
/* kexec relocation code should fit into one KEXEC_CONTROL_PAGE_SIZE */
ASSERT(__relocate_new_kernel_end - (__relocate_new_kernel_start & ~(SZ_4K - 1))
<= SZ_4K, "kexec relocation code is too big or misaligned")
ASSERT(KEXEC_CONTROL_PAGE_SIZE >= SZ_4K, "KEXEC_CONTROL_PAGE_SIZE is broken")
#endif

View File

@ -7,6 +7,7 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o
obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o
obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
obj-$(CONFIG_ARM64_MTE) += mteswap.o
KASAN_SANITIZE_physaddr.o += n

View File

@ -0,0 +1,65 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2021, Microsoft Corporation.
* Pasha Tatashin <pasha.tatashin@soleen.com>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/kvm_asm.h>
.macro invalid_vector label
SYM_CODE_START_LOCAL(\label)
.align 7
b \label
SYM_CODE_END(\label)
.endm
.macro el1_sync_vector
SYM_CODE_START_LOCAL(el1_sync)
.align 7
cmp x0, #HVC_SET_VECTORS /* Called from hibernate */
b.ne 1f
msr vbar_el2, x1
mov x0, xzr
eret
1: cmp x0, #HVC_SOFT_RESTART /* Called from kexec */
b.ne 2f
mov x0, x2
mov x2, x4
mov x4, x1
mov x1, x3
br x4
2: /* Unexpected argument, set an error */
mov_q x0, HVC_STUB_ERR
eret
SYM_CODE_END(el1_sync)
.endm
SYM_CODE_START(trans_pgd_stub_vectors)
invalid_vector hyp_stub_el2t_sync_invalid // Synchronous EL2t
invalid_vector hyp_stub_el2t_irq_invalid // IRQ EL2t
invalid_vector hyp_stub_el2t_fiq_invalid // FIQ EL2t
invalid_vector hyp_stub_el2t_error_invalid // Error EL2t
invalid_vector hyp_stub_el2h_sync_invalid // Synchronous EL2h
invalid_vector hyp_stub_el2h_irq_invalid // IRQ EL2h
invalid_vector hyp_stub_el2h_fiq_invalid // FIQ EL2h
invalid_vector hyp_stub_el2h_error_invalid // Error EL2h
el1_sync_vector // Synchronous 64-bit EL1
invalid_vector hyp_stub_el1_irq_invalid // IRQ 64-bit EL1
invalid_vector hyp_stub_el1_fiq_invalid // FIQ 64-bit EL1
invalid_vector hyp_stub_el1_error_invalid // Error 64-bit EL1
invalid_vector hyp_stub_32b_el1_sync_invalid // Synchronous 32-bit EL1
invalid_vector hyp_stub_32b_el1_irq_invalid // IRQ 32-bit EL1
invalid_vector hyp_stub_32b_el1_fiq_invalid // FIQ 32-bit EL1
invalid_vector hyp_stub_32b_el1_error_invalid // Error 32-bit EL1
.align 11
SYM_INNER_LABEL(__trans_pgd_stub_vectors_end, SYM_L_LOCAL)
SYM_CODE_END(trans_pgd_stub_vectors)
# Check the trans_pgd_stub_vectors didn't overflow
.org . - (__trans_pgd_stub_vectors_end - trans_pgd_stub_vectors) + SZ_2K

View File

@ -5,8 +5,8 @@
*
* This file derived from: arch/arm64/kernel/hibernate.c
*
* Copyright (c) 2020, Microsoft Corporation.
* Pavel Tatashin <pasha.tatashin@soleen.com>
* Copyright (c) 2021, Microsoft Corporation.
* Pasha Tatashin <pasha.tatashin@soleen.com>
*
*/
@ -217,63 +217,6 @@ int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp,
return rc;
}
/*
* Add map entry to trans_pgd for a base-size page at PTE level.
* info: contains allocator and its argument
* trans_pgd: page table in which new map is added.
* page: page to be mapped.
* dst_addr: new VA address for the page
* pgprot: protection for the page.
*
* Returns 0 on success, and -ENOMEM on failure.
*/
int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd,
void *page, unsigned long dst_addr, pgprot_t pgprot)
{
pgd_t *pgdp;
p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
if (pgd_none(READ_ONCE(*pgdp))) {
p4dp = trans_alloc(info);
if (!pgdp)
return -ENOMEM;
pgd_populate(NULL, pgdp, p4dp);
}
p4dp = p4d_offset(pgdp, dst_addr);
if (p4d_none(READ_ONCE(*p4dp))) {
pudp = trans_alloc(info);
if (!pudp)
return -ENOMEM;
p4d_populate(NULL, p4dp, pudp);
}
pudp = pud_offset(p4dp, dst_addr);
if (pud_none(READ_ONCE(*pudp))) {
pmdp = trans_alloc(info);
if (!pmdp)
return -ENOMEM;
pud_populate(NULL, pudp, pmdp);
}
pmdp = pmd_offset(pudp, dst_addr);
if (pmd_none(READ_ONCE(*pmdp))) {
ptep = trans_alloc(info);
if (!ptep)
return -ENOMEM;
pmd_populate_kernel(NULL, pmdp, ptep);
}
ptep = pte_offset_kernel(pmdp, dst_addr);
set_pte(ptep, pfn_pte(virt_to_pfn(page), pgprot));
return 0;
}
/*
* The page we want to idmap may be outside the range covered by VA_BITS that
* can be built using the kernel's p?d_populate() helpers. As a one off, for a
@ -322,3 +265,26 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
return 0;
}
/*
* Create a copy of the vector table so we can call HVC_SET_VECTORS or
* HVC_SOFT_RESTART from contexts where the table may be overwritten.
*/
int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info,
phys_addr_t *el2_vectors)
{
void *hyp_stub = trans_alloc(info);
if (!hyp_stub)
return -ENOMEM;
*el2_vectors = virt_to_phys(hyp_stub);
memcpy(hyp_stub, &trans_pgd_stub_vectors, ARM64_VECTOR_TABLE_LEN);
caches_clean_inval_pou((unsigned long)hyp_stub,
(unsigned long)hyp_stub +
ARM64_VECTOR_TABLE_LEN);
dcache_clean_inval_poc((unsigned long)hyp_stub,
(unsigned long)hyp_stub +
ARM64_VECTOR_TABLE_LEN);
return 0;
}