2019-06-03 05:44:50 +00:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
|
|
|
* Low-level CPU initialisation
|
|
|
|
* Based on arch/arm/kernel/head.S
|
|
|
|
*
|
|
|
|
* Copyright (C) 1994-2002 Russell King
|
|
|
|
* Copyright (C) 2003-2012 ARM Ltd.
|
|
|
|
* Authors: Catalin Marinas <catalin.marinas@arm.com>
|
|
|
|
* Will Deacon <will.deacon@arm.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/linkage.h>
|
|
|
|
#include <linux/init.h>
|
2020-06-09 04:32:42 +00:00
|
|
|
#include <linux/pgtable.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
|
arm64: simplify ptrauth initialization
Currently __cpu_setup conditionally initializes the address
authentication keys and enables them in SCTLR_EL1, doing so differently
for the primary CPU and secondary CPUs, and skipping this work for CPUs
returning from an idle state. For the latter case, cpu_do_resume
restores the keys and SCTLR_EL1 value after the MMU has been enabled.
This flow is rather difficult to follow, so instead let's move the
primary and secondary CPU initialization into their respective boot
paths. By following the example of cpu_do_resume and doing so once the
MMU is enabled, we can always initialize the keys from the values in
thread_struct, and avoid the machinery necessary to pass the keys in
secondary_data or open-coding initialization for the boot CPU.
This means we perform an additional RMW of SCTLR_EL1, but we already do
this in the cpu_do_resume path, and for other features in cpufeature.c,
so this isn't a major concern in a bringup path. Note that even while
the enable bits are clear, the key registers are accessible.
As this now renders the argument to __cpu_setup redundant, let's also
remove that entirely. Future extensions can follow a similar approach to
initialize values that differ for primary/secondary CPUs.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Reviewed-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Cc: Amit Daniel Kachhap <amit.kachhap@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20200423101606.37601-3-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2020-04-23 10:16:06 +00:00
|
|
|
#include <asm/asm_pointer_auth.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
#include <asm/assembler.h>
|
2016-04-18 15:09:47 +00:00
|
|
|
#include <asm/boot.h>
|
arm64: Implement stack trace termination record
Reliable stacktracing requires that we identify when a stacktrace is
terminated early. We can do this by ensuring all tasks have a final
frame record at a known location on their task stack, and checking
that this is the final frame record in the chain.
We'd like to use task_pt_regs(task)->stackframe as the final frame
record, as this is already setup upon exception entry from EL0. For
kernel tasks we need to consistently reserve the pt_regs and point x29
at this, which we can do with small changes to __primary_switched,
__secondary_switched, and copy_process().
Since the final frame record must be at a specific location, we must
create the final frame record in __primary_switched and
__secondary_switched rather than leaving this to start_kernel and
secondary_start_kernel. Thus, __primary_switched and
__secondary_switched will now show up in stacktraces for the idle tasks.
Since the final frame record is now identified by its location rather
than by its contents, we identify it at the start of unwind_frame(),
before we read any values from it.
External debuggers may terminate the stack trace when FP == 0. In the
pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the
debugger may print an extra record 0x0 at the end. While this is not
pretty, this does not do any harm. This is a small price to pay for
having reliable stack trace termination in the kernel. That said, gdb
does not show the extra record probably because it uses DWARF and not
frame pointers for stack traces.
Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
[Mark: rebase, use ASM_BUG(), update comments, update commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-10 11:00:26 +00:00
|
|
|
#include <asm/bug.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
#include <asm/ptrace.h>
|
|
|
|
#include <asm/asm-offsets.h>
|
2014-03-26 18:25:55 +00:00
|
|
|
#include <asm/cache.h>
|
2012-08-29 17:32:18 +00:00
|
|
|
#include <asm/cputype.h>
|
2020-12-02 18:41:04 +00:00
|
|
|
#include <asm/el2_setup.h>
|
2016-01-26 08:13:44 +00:00
|
|
|
#include <asm/elf.h>
|
2018-11-15 05:52:46 +00:00
|
|
|
#include <asm/image.h>
|
2015-10-19 13:19:27 +00:00
|
|
|
#include <asm/kernel-pgtable.h>
|
2014-02-19 09:33:14 +00:00
|
|
|
#include <asm/kvm_arm.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
#include <asm/memory.h>
|
|
|
|
#include <asm/pgtable-hwdef.h>
|
|
|
|
#include <asm/page.h>
|
2020-04-27 16:00:16 +00:00
|
|
|
#include <asm/scs.h>
|
2016-02-23 10:31:42 +00:00
|
|
|
#include <asm/smp.h>
|
2015-10-19 13:19:35 +00:00
|
|
|
#include <asm/sysreg.h>
|
|
|
|
#include <asm/thread_info.h>
|
2012-10-26 14:40:05 +00:00
|
|
|
#include <asm/virt.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2017-03-23 19:00:46 +00:00
|
|
|
#include "efi-header.S"
|
|
|
|
|
2020-08-25 13:54:40 +00:00
|
|
|
#define __PHYS_OFFSET KERNEL_START
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2020-08-25 13:54:40 +00:00
|
|
|
#if (PAGE_OFFSET & 0x1fffff) != 0
|
2014-06-24 15:51:37 +00:00
|
|
|
#error PAGE_OFFSET must be at least 2MB aligned
|
2012-03-05 11:49:27 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Kernel startup entry point.
|
|
|
|
* ---------------------------
|
|
|
|
*
|
|
|
|
* The requirements are:
|
|
|
|
* MMU = off, D-cache = off, I-cache = on or off,
|
|
|
|
* x0 = physical address to the FDT blob.
|
|
|
|
*
|
|
|
|
* This code is mostly position independent so you call this at
|
2020-08-25 13:54:40 +00:00
|
|
|
* __pa(PAGE_OFFSET).
|
2012-03-05 11:49:27 +00:00
|
|
|
*
|
|
|
|
* Note that the callee-saved registers are used for storing variables
|
|
|
|
* that are useful before the MMU is enabled. The allocations are described
|
|
|
|
* in the entry routines.
|
|
|
|
*/
|
|
|
|
__HEAD
|
|
|
|
/*
|
|
|
|
* DO NOT MODIFY. Image header expected by Linux boot-loaders.
|
|
|
|
*/
|
2020-11-17 12:47:29 +00:00
|
|
|
efi_signature_nop // special NOP to identity as PE/COFF executable
|
2020-03-26 17:14:23 +00:00
|
|
|
b primary_entry // branch to kernel start, magic
|
2020-08-25 13:54:40 +00:00
|
|
|
.quad 0 // Image load offset from start of RAM, little-endian
|
2015-12-26 12:48:02 +00:00
|
|
|
le64sym _kernel_size_le // Effective size of kernel image, little-endian
|
|
|
|
le64sym _kernel_flags_le // Informative flags, little-endian
|
2013-08-14 23:10:00 +00:00
|
|
|
.quad 0 // reserved
|
|
|
|
.quad 0 // reserved
|
|
|
|
.quad 0 // reserved
|
2018-11-15 05:52:46 +00:00
|
|
|
.ascii ARM64_IMAGE_MAGIC // Magic number
|
2020-11-17 12:47:29 +00:00
|
|
|
.long .Lpe_header_offset // Offset to the PE header.
|
2014-04-16 02:47:52 +00:00
|
|
|
|
2017-03-23 19:00:46 +00:00
|
|
|
__EFI_PE_HEADER
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2016-03-30 15:43:07 +00:00
|
|
|
__INIT
|
|
|
|
|
2016-08-31 11:05:17 +00:00
|
|
|
/*
|
|
|
|
* The following callee saved general purpose registers are used on the
|
|
|
|
* primary lowlevel boot path:
|
|
|
|
*
|
|
|
|
* Register Scope Purpose
|
2020-03-26 17:14:23 +00:00
|
|
|
* x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
|
|
|
|
* x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
|
|
|
|
* x28 __create_page_tables() callee preserved temp register
|
|
|
|
* x19/x20 __primary_switch() callee preserved temp registers
|
|
|
|
* x24 __primary_switch() .. relocate_kernel() current RELR displacement
|
2016-08-31 11:05:17 +00:00
|
|
|
*/
|
2020-03-26 17:14:23 +00:00
|
|
|
SYM_CODE_START(primary_entry)
|
2015-03-17 09:55:12 +00:00
|
|
|
bl preserve_boot_args
|
2020-11-13 12:49:23 +00:00
|
|
|
bl init_kernel_el // w0=cpu_boot_mode
|
2016-08-31 11:05:15 +00:00
|
|
|
adrp x23, __PHYS_OFFSET
|
|
|
|
and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0
|
2013-10-11 13:52:16 +00:00
|
|
|
bl set_cpu_boot_mode_flag
|
2016-08-16 19:02:32 +00:00
|
|
|
bl __create_page_tables
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
2015-03-18 14:55:20 +00:00
|
|
|
* The following calls CPU setup code, see arch/arm64/mm/proc.S for
|
|
|
|
* details.
|
2012-03-05 11:49:27 +00:00
|
|
|
* On return, the CPU will be ready for the MMU to be turned on and
|
|
|
|
* the TCR will have been set.
|
|
|
|
*/
|
2016-04-18 15:09:43 +00:00
|
|
|
bl __cpu_setup // initialise processor
|
2016-08-31 11:05:13 +00:00
|
|
|
b __primary_switch
|
2020-03-26 17:14:23 +00:00
|
|
|
SYM_CODE_END(primary_entry)
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2015-03-17 09:55:12 +00:00
|
|
|
/*
|
|
|
|
* Preserve the arguments passed by the bootloader in x0 .. x3
|
|
|
|
*/
|
2020-02-18 19:58:34 +00:00
|
|
|
SYM_CODE_START_LOCAL(preserve_boot_args)
|
2015-03-17 09:55:12 +00:00
|
|
|
mov x21, x0 // x21=FDT
|
|
|
|
|
|
|
|
adr_l x0, boot_args // record the contents of
|
|
|
|
stp x21, x1, [x0] // x0 .. x3 at kernel entry
|
|
|
|
stp x2, x3, [x0, #16]
|
|
|
|
|
|
|
|
dmb sy // needed before dc ivac with
|
|
|
|
// MMU off
|
|
|
|
|
2021-05-24 08:29:53 +00:00
|
|
|
add x1, x0, #0x20 // 4 x 8 bytes
|
arm64: Rename arm64-internal cache maintenance functions
Although naming across the codebase isn't that consistent, it
tends to follow certain patterns. Moreover, the term "flush"
isn't defined in the Arm Architecture reference manual, and might
be interpreted to mean clean, invalidate, or both for a cache.
Rename arm64-internal functions to make the naming internally
consistent, as well as making it consistent with the Arm ARM, by
specifying whether it applies to the instruction, data, or both
caches, whether the operation is a clean, invalidate, or both.
Also specify which point the operation applies to, i.e., to the
point of unification (PoU), coherency (PoC), or persistence
(PoP).
This commit applies the following sed transformation to all files
under arch/arm64:
"s/\b__flush_cache_range\b/caches_clean_inval_pou_macro/g;"\
"s/\b__flush_icache_range\b/caches_clean_inval_pou/g;"\
"s/\binvalidate_icache_range\b/icache_inval_pou/g;"\
"s/\b__flush_dcache_area\b/dcache_clean_inval_poc/g;"\
"s/\b__inval_dcache_area\b/dcache_inval_poc/g;"\
"s/__clean_dcache_area_poc\b/dcache_clean_poc/g;"\
"s/\b__clean_dcache_area_pop\b/dcache_clean_pop/g;"\
"s/\b__clean_dcache_area_pou\b/dcache_clean_pou/g;"\
"s/\b__flush_cache_user_range\b/caches_clean_inval_user_pou/g;"\
"s/\b__flush_icache_all\b/icache_inval_all_pou/g;"
Note that __clean_dcache_area_poc is deliberately missing a word
boundary check at the beginning in order to match the efistub
symbols in image-vars.h.
Also note that, despite its name, __flush_icache_range operates
on both instruction and data caches. The name change here
reflects that.
No functional change intended.
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-19-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-24 08:30:01 +00:00
|
|
|
b dcache_inval_poc // tail call
|
2020-02-18 19:58:34 +00:00
|
|
|
SYM_CODE_END(preserve_boot_args)
|
2015-03-17 09:55:12 +00:00
|
|
|
|
2014-11-21 21:50:41 +00:00
|
|
|
/*
|
|
|
|
* Macro to create a table entry to the next page.
|
|
|
|
*
|
|
|
|
* tbl: page table address
|
|
|
|
* virt: virtual address
|
|
|
|
* shift: #imm page table shift
|
|
|
|
* ptrs: #imm pointers per table page
|
|
|
|
*
|
|
|
|
* Preserves: virt
|
arm64: allow ID map to be extended to 52 bits
Currently, when using VA_BITS < 48, if the ID map text happens to be
placed in physical memory above VA_BITS, we increase the VA size (up to
48) and create a new table level, in order to map in the ID map text.
This is okay because the system always supports 48 bits of VA.
This patch extends the code such that if the system supports 52 bits of
VA, and the ID map text is placed that high up, then we increase the VA
size accordingly, up to 52.
One difference from the current implementation is that so far the
condition of VA_BITS < 48 has meant that the top level table is always
"full", with the maximum number of entries, and an extra table level is
always needed. Now, when VA_BITS = 48 (and using 64k pages), the top
level table is not full, and we simply need to increase the number of
entries in it, instead of creating a new table level.
Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Bob Picco <bob.picco@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
[catalin.marinas@arm.com: reduce arguments to __create_hyp_mappings()]
[catalin.marinas@arm.com: reworked/renamed __cpu_uses_extended_idmap_level()]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-12-13 17:07:24 +00:00
|
|
|
* Corrupts: ptrs, tmp1, tmp2
|
2014-11-21 21:50:41 +00:00
|
|
|
* Returns: tbl -> next level table page address
|
|
|
|
*/
|
|
|
|
.macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
|
2017-12-13 17:07:19 +00:00
|
|
|
add \tmp1, \tbl, #PAGE_SIZE
|
2018-01-29 11:59:59 +00:00
|
|
|
phys_to_pte \tmp2, \tmp1
|
2017-12-13 17:07:19 +00:00
|
|
|
orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
|
2014-11-21 21:50:41 +00:00
|
|
|
lsr \tmp1, \virt, #\shift
|
arm64: allow ID map to be extended to 52 bits
Currently, when using VA_BITS < 48, if the ID map text happens to be
placed in physical memory above VA_BITS, we increase the VA size (up to
48) and create a new table level, in order to map in the ID map text.
This is okay because the system always supports 48 bits of VA.
This patch extends the code such that if the system supports 52 bits of
VA, and the ID map text is placed that high up, then we increase the VA
size accordingly, up to 52.
One difference from the current implementation is that so far the
condition of VA_BITS < 48 has meant that the top level table is always
"full", with the maximum number of entries, and an extra table level is
always needed. Now, when VA_BITS = 48 (and using 64k pages), the top
level table is not full, and we simply need to increase the number of
entries in it, instead of creating a new table level.
Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Bob Picco <bob.picco@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
[catalin.marinas@arm.com: reduce arguments to __create_hyp_mappings()]
[catalin.marinas@arm.com: reworked/renamed __cpu_uses_extended_idmap_level()]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-12-13 17:07:24 +00:00
|
|
|
sub \ptrs, \ptrs, #1
|
|
|
|
and \tmp1, \tmp1, \ptrs // table index
|
2014-11-21 21:50:41 +00:00
|
|
|
str \tmp2, [\tbl, \tmp1, lsl #3]
|
|
|
|
add \tbl, \tbl, #PAGE_SIZE // next level table page
|
|
|
|
.endm
|
|
|
|
|
|
|
|
/*
|
2018-01-11 10:11:59 +00:00
|
|
|
* Macro to populate page table entries, these entries can be pointers to the next level
|
|
|
|
* or last level entries pointing to physical memory.
|
2014-11-21 21:50:41 +00:00
|
|
|
*
|
2018-01-11 10:11:59 +00:00
|
|
|
* tbl: page table address
|
|
|
|
* rtbl: pointer to page table or physical memory
|
|
|
|
* index: start index to write
|
|
|
|
* eindex: end index to write - [index, eindex] written to
|
|
|
|
* flags: flags for pagetable entry to or in
|
|
|
|
* inc: increment to rtbl between each entry
|
|
|
|
* tmp1: temporary variable
|
|
|
|
*
|
|
|
|
* Preserves: tbl, eindex, flags, inc
|
|
|
|
* Corrupts: index, tmp1
|
|
|
|
* Returns: rtbl
|
2014-11-21 21:50:41 +00:00
|
|
|
*/
|
2018-01-11 10:11:59 +00:00
|
|
|
.macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1
|
2018-01-29 11:59:59 +00:00
|
|
|
.Lpe\@: phys_to_pte \tmp1, \rtbl
|
2018-01-11 10:11:59 +00:00
|
|
|
orr \tmp1, \tmp1, \flags // tmp1 = table entry
|
|
|
|
str \tmp1, [\tbl, \index, lsl #3]
|
|
|
|
add \rtbl, \rtbl, \inc // rtbl = pa next level
|
|
|
|
add \index, \index, #1
|
|
|
|
cmp \index, \eindex
|
|
|
|
b.ls .Lpe\@
|
|
|
|
.endm
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compute indices of table entries from virtual address range. If multiple entries
|
|
|
|
* were needed in the previous page table level then the next page table level is assumed
|
|
|
|
* to be composed of multiple pages. (This effectively scales the end index).
|
|
|
|
*
|
|
|
|
* vstart: virtual address of start of range
|
|
|
|
* vend: virtual address of end of range
|
|
|
|
* shift: shift used to transform virtual address into index
|
|
|
|
* ptrs: number of entries in page table
|
|
|
|
* istart: index in table corresponding to vstart
|
|
|
|
* iend: index in table corresponding to vend
|
|
|
|
* count: On entry: how many extra entries were required in previous level, scales
|
|
|
|
* our end index.
|
|
|
|
* On exit: returns how many extra entries required for next page table level
|
|
|
|
*
|
|
|
|
* Preserves: vstart, vend, shift, ptrs
|
|
|
|
* Returns: istart, iend, count
|
|
|
|
*/
|
|
|
|
.macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
|
|
|
|
lsr \iend, \vend, \shift
|
|
|
|
mov \istart, \ptrs
|
|
|
|
sub \istart, \istart, #1
|
|
|
|
and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1)
|
|
|
|
mov \istart, \ptrs
|
|
|
|
mul \istart, \istart, \count
|
2021-05-18 10:14:03 +00:00
|
|
|
add \iend, \iend, \istart // iend += count * ptrs
|
2018-01-11 10:11:59 +00:00
|
|
|
// our entries span multiple tables
|
|
|
|
|
|
|
|
lsr \istart, \vstart, \shift
|
|
|
|
mov \count, \ptrs
|
|
|
|
sub \count, \count, #1
|
|
|
|
and \istart, \istart, \count
|
|
|
|
|
|
|
|
sub \count, \iend, \istart
|
2014-11-21 21:50:41 +00:00
|
|
|
.endm
|
|
|
|
|
|
|
|
/*
|
2018-01-11 10:11:59 +00:00
|
|
|
* Map memory for specified virtual address range. Each level of page table needed supports
|
|
|
|
* multiple entries. If a level requires n entries the next page table level is assumed to be
|
|
|
|
* formed from n pages.
|
|
|
|
*
|
|
|
|
* tbl: location of page table
|
|
|
|
* rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE)
|
|
|
|
* vstart: start address to map
|
|
|
|
* vend: end address to map - we map [vstart, vend]
|
|
|
|
* flags: flags to use to map last level entries
|
|
|
|
* phys: physical address corresponding to vstart - physical memory is contiguous
|
|
|
|
* pgds: the number of pgd entries
|
2014-11-21 21:50:41 +00:00
|
|
|
*
|
2018-01-11 10:11:59 +00:00
|
|
|
* Temporaries: istart, iend, tmp, count, sv - these need to be different registers
|
|
|
|
* Preserves: vstart, vend, flags
|
|
|
|
* Corrupts: tbl, rtbl, istart, iend, tmp, count, sv
|
2014-11-21 21:50:41 +00:00
|
|
|
*/
|
2018-01-11 10:11:59 +00:00
|
|
|
.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
|
|
|
|
add \rtbl, \tbl, #PAGE_SIZE
|
|
|
|
mov \sv, \rtbl
|
|
|
|
mov \count, #0
|
|
|
|
compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
|
|
|
|
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
|
|
|
|
mov \tbl, \sv
|
|
|
|
mov \sv, \rtbl
|
|
|
|
|
|
|
|
#if SWAPPER_PGTABLE_LEVELS > 3
|
|
|
|
compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
|
|
|
|
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
|
|
|
|
mov \tbl, \sv
|
|
|
|
mov \sv, \rtbl
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if SWAPPER_PGTABLE_LEVELS > 2
|
|
|
|
compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
|
|
|
|
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
|
|
|
|
mov \tbl, \sv
|
|
|
|
#endif
|
|
|
|
|
|
|
|
compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
|
|
|
|
bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
|
|
|
|
populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
|
2014-11-21 21:50:41 +00:00
|
|
|
.endm
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Setup the initial page tables. We only setup the barest amount which is
|
|
|
|
* required to get the kernel running. The following sections are required:
|
|
|
|
* - identity mapping to enable the MMU (low address, TTBR0)
|
|
|
|
* - first few MB of the kernel linear mapping to jump to once the MMU has
|
2015-06-01 11:40:32 +00:00
|
|
|
* been enabled
|
2014-11-21 21:50:41 +00:00
|
|
|
*/
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_START_LOCAL(__create_page_tables)
|
arm64: add support for kernel ASLR
This adds support for KASLR is implemented, based on entropy provided by
the bootloader in the /chosen/kaslr-seed DT property. Depending on the size
of the address space (VA_BITS) and the page size, the entropy in the
virtual displacement is up to 13 bits (16k/2 levels) and up to 25 bits (all
4 levels), with the sidenote that displacements that result in the kernel
image straddling a 1GB/32MB/512MB alignment boundary (for 4KB/16KB/64KB
granule kernels, respectively) are not allowed, and will be rounded up to
an acceptable value.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled, the module region is
randomized independently from the core kernel. This makes it less likely
that the location of core kernel data structures can be determined by an
adversary, but causes all function calls from modules into the core kernel
to be resolved via entries in the module PLTs.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is not enabled, the module region is
randomized by choosing a page aligned 128 MB region inside the interval
[_etext - 128 MB, _stext + 128 MB). This gives between 10 and 14 bits of
entropy (depending on page size), independently of the kernel randomization,
but still guarantees that modules are within the range of relative branch
and jump instructions (with the caveat that, since the module region is
shared with other uses of the vmalloc area, modules may need to be loaded
further away if the module region is exhausted)
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-01-26 13:12:01 +00:00
|
|
|
mov x28, lr
|
2014-11-21 21:50:41 +00:00
|
|
|
|
|
|
|
/*
|
2018-09-24 16:56:18 +00:00
|
|
|
* Invalidate the init page tables to avoid potential dirty cache lines
|
|
|
|
* being evicted. Other page tables are allocated in rodata as part of
|
|
|
|
* the kernel image, and thus are clean to the PoC per the boot
|
|
|
|
* protocol.
|
2014-11-21 21:50:41 +00:00
|
|
|
*/
|
2018-09-24 16:56:18 +00:00
|
|
|
adrp x0, init_pg_dir
|
arm64/mm: Separate boot-time page tables from swapper_pg_dir
Since the address of swapper_pg_dir is fixed for a given kernel image,
it is an attractive target for manipulation via an arbitrary write. To
mitigate this we'd like to make it read-only by moving it into the
rodata section.
We require that swapper_pg_dir is at a fixed offset from tramp_pg_dir
and reserved_ttbr0, so these will also need to move into rodata.
However, swapper_pg_dir is allocated along with some transient page
tables used for boot which we do not want to move into rodata.
As a step towards this, this patch separates the boot-time page tables
into a new init_pg_dir, and reduces swapper_pg_dir to the single page it
needs to be. This allows us to retain the relationship between
swapper_pg_dir, tramp_pg_dir, and swapper_pg_dir, while cleanly
separating these from the boot-time page tables.
The init_pg_dir holds all of the pgd/pud/pmd/pte levels needed during
boot, and all of these levels will be freed when we switch to the
swapper_pg_dir, which is initialized by the existing code in
paging_init(). Since we start off on the init_pg_dir, we no longer need
to allocate a transient page table in paging_init() in order to ensure
that swapper_pg_dir isn't live while we initialize it.
There should be no functional change as a result of this patch.
Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
Reviewed-by: James Morse <james.morse@arm.com>
[Mark: place init_pg_dir after BSS, fold mm changes, commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2018-09-24 14:47:49 +00:00
|
|
|
adrp x1, init_pg_end
|
arm64: Rename arm64-internal cache maintenance functions
Although naming across the codebase isn't that consistent, it
tends to follow certain patterns. Moreover, the term "flush"
isn't defined in the Arm Architecture reference manual, and might
be interpreted to mean clean, invalidate, or both for a cache.
Rename arm64-internal functions to make the naming internally
consistent, as well as making it consistent with the Arm ARM, by
specifying whether it applies to the instruction, data, or both
caches, whether the operation is a clean, invalidate, or both.
Also specify which point the operation applies to, i.e., to the
point of unification (PoU), coherency (PoC), or persistence
(PoP).
This commit applies the following sed transformation to all files
under arch/arm64:
"s/\b__flush_cache_range\b/caches_clean_inval_pou_macro/g;"\
"s/\b__flush_icache_range\b/caches_clean_inval_pou/g;"\
"s/\binvalidate_icache_range\b/icache_inval_pou/g;"\
"s/\b__flush_dcache_area\b/dcache_clean_inval_poc/g;"\
"s/\b__inval_dcache_area\b/dcache_inval_poc/g;"\
"s/__clean_dcache_area_poc\b/dcache_clean_poc/g;"\
"s/\b__clean_dcache_area_pop\b/dcache_clean_pop/g;"\
"s/\b__clean_dcache_area_pou\b/dcache_clean_pou/g;"\
"s/\b__flush_cache_user_range\b/caches_clean_inval_user_pou/g;"\
"s/\b__flush_icache_all\b/icache_inval_all_pou/g;"
Note that __clean_dcache_area_poc is deliberately missing a word
boundary check at the beginning in order to match the efistub
symbols in image-vars.h.
Also note that, despite its name, __flush_icache_range operates
on both instruction and data caches. The name change here
reflects that.
No functional change intended.
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-19-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-24 08:30:01 +00:00
|
|
|
bl dcache_inval_poc
|
2014-11-21 21:50:41 +00:00
|
|
|
|
|
|
|
/*
|
2018-09-24 16:56:18 +00:00
|
|
|
* Clear the init page tables.
|
2014-11-21 21:50:41 +00:00
|
|
|
*/
|
2018-09-24 16:56:18 +00:00
|
|
|
adrp x0, init_pg_dir
|
arm64/mm: Separate boot-time page tables from swapper_pg_dir
Since the address of swapper_pg_dir is fixed for a given kernel image,
it is an attractive target for manipulation via an arbitrary write. To
mitigate this we'd like to make it read-only by moving it into the
rodata section.
We require that swapper_pg_dir is at a fixed offset from tramp_pg_dir
and reserved_ttbr0, so these will also need to move into rodata.
However, swapper_pg_dir is allocated along with some transient page
tables used for boot which we do not want to move into rodata.
As a step towards this, this patch separates the boot-time page tables
into a new init_pg_dir, and reduces swapper_pg_dir to the single page it
needs to be. This allows us to retain the relationship between
swapper_pg_dir, tramp_pg_dir, and swapper_pg_dir, while cleanly
separating these from the boot-time page tables.
The init_pg_dir holds all of the pgd/pud/pmd/pte levels needed during
boot, and all of these levels will be freed when we switch to the
swapper_pg_dir, which is initialized by the existing code in
paging_init(). Since we start off on the init_pg_dir, we no longer need
to allocate a transient page table in paging_init() in order to ensure
that swapper_pg_dir isn't live while we initialize it.
There should be no functional change as a result of this patch.
Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
Reviewed-by: James Morse <james.morse@arm.com>
[Mark: place init_pg_dir after BSS, fold mm changes, commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2018-09-24 14:47:49 +00:00
|
|
|
adrp x1, init_pg_end
|
2018-01-11 10:11:59 +00:00
|
|
|
sub x1, x1, x0
|
2014-11-21 21:50:41 +00:00
|
|
|
1: stp xzr, xzr, [x0], #16
|
|
|
|
stp xzr, xzr, [x0], #16
|
|
|
|
stp xzr, xzr, [x0], #16
|
|
|
|
stp xzr, xzr, [x0], #16
|
2017-07-25 10:55:39 +00:00
|
|
|
subs x1, x1, #64
|
|
|
|
b.ne 1b
|
2014-11-21 21:50:41 +00:00
|
|
|
|
2016-04-18 15:09:45 +00:00
|
|
|
mov x7, SWAPPER_MM_MMUFLAGS
|
2014-11-21 21:50:41 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Create the identity mapping.
|
|
|
|
*/
|
2016-08-16 19:02:32 +00:00
|
|
|
adrp x0, idmap_pg_dir
|
2015-06-01 11:40:33 +00:00
|
|
|
adrp x3, __idmap_text_start // __pa(__idmap_text_start)
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
|
2019-08-07 15:55:22 +00:00
|
|
|
#ifdef CONFIG_ARM64_VA_BITS_52
|
2018-12-06 22:50:41 +00:00
|
|
|
mrs_s x6, SYS_ID_AA64MMFR2_EL1
|
|
|
|
and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
|
|
|
|
mov x5, #52
|
|
|
|
cbnz x6, 1f
|
|
|
|
#endif
|
2019-08-07 15:55:17 +00:00
|
|
|
mov x5, #VA_BITS_MIN
|
2018-12-06 22:50:41 +00:00
|
|
|
1:
|
2019-08-07 15:55:18 +00:00
|
|
|
adr_l x6, vabits_actual
|
2018-12-06 22:50:41 +00:00
|
|
|
str x5, [x6]
|
|
|
|
dmb sy
|
|
|
|
dc ivac, x6 // Invalidate potentially stale cache line
|
|
|
|
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
/*
|
arm64: allow ID map to be extended to 52 bits
Currently, when using VA_BITS < 48, if the ID map text happens to be
placed in physical memory above VA_BITS, we increase the VA size (up to
48) and create a new table level, in order to map in the ID map text.
This is okay because the system always supports 48 bits of VA.
This patch extends the code such that if the system supports 52 bits of
VA, and the ID map text is placed that high up, then we increase the VA
size accordingly, up to 52.
One difference from the current implementation is that so far the
condition of VA_BITS < 48 has meant that the top level table is always
"full", with the maximum number of entries, and an extra table level is
always needed. Now, when VA_BITS = 48 (and using 64k pages), the top
level table is not full, and we simply need to increase the number of
entries in it, instead of creating a new table level.
Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Bob Picco <bob.picco@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
[catalin.marinas@arm.com: reduce arguments to __create_hyp_mappings()]
[catalin.marinas@arm.com: reworked/renamed __cpu_uses_extended_idmap_level()]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-12-13 17:07:24 +00:00
|
|
|
* VA_BITS may be too small to allow for an ID mapping to be created
|
|
|
|
* that covers system RAM if that is located sufficiently high in the
|
|
|
|
* physical address space. So for the ID map, use an extended virtual
|
|
|
|
* range in that case, and configure an additional translation level
|
|
|
|
* if needed.
|
|
|
|
*
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
* Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
|
2015-06-01 11:40:33 +00:00
|
|
|
* entire ID map region can be mapped. As T0SZ == (64 - #bits used),
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
* this number conveniently equals the number of leading zeroes in
|
2015-06-01 11:40:33 +00:00
|
|
|
* the physical address of __idmap_text_end.
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
*/
|
2015-06-01 11:40:33 +00:00
|
|
|
adrp x5, __idmap_text_end
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
clz x5, x5
|
2021-03-10 17:15:11 +00:00
|
|
|
cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
|
arm64: allow ID map to be extended to 52 bits
Currently, when using VA_BITS < 48, if the ID map text happens to be
placed in physical memory above VA_BITS, we increase the VA size (up to
48) and create a new table level, in order to map in the ID map text.
This is okay because the system always supports 48 bits of VA.
This patch extends the code such that if the system supports 52 bits of
VA, and the ID map text is placed that high up, then we increase the VA
size accordingly, up to 52.
One difference from the current implementation is that so far the
condition of VA_BITS < 48 has meant that the top level table is always
"full", with the maximum number of entries, and an extra table level is
always needed. Now, when VA_BITS = 48 (and using 64k pages), the top
level table is not full, and we simply need to increase the number of
entries in it, instead of creating a new table level.
Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Bob Picco <bob.picco@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
[catalin.marinas@arm.com: reduce arguments to __create_hyp_mappings()]
[catalin.marinas@arm.com: reworked/renamed __cpu_uses_extended_idmap_level()]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-12-13 17:07:24 +00:00
|
|
|
b.ge 1f // .. then skip VA range extension
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
|
2015-03-24 15:10:21 +00:00
|
|
|
adr_l x6, idmap_t0sz
|
|
|
|
str x5, [x6]
|
|
|
|
dmb sy
|
|
|
|
dc ivac, x6 // Invalidate potentially stale cache line
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
|
arm64: allow ID map to be extended to 52 bits
Currently, when using VA_BITS < 48, if the ID map text happens to be
placed in physical memory above VA_BITS, we increase the VA size (up to
48) and create a new table level, in order to map in the ID map text.
This is okay because the system always supports 48 bits of VA.
This patch extends the code such that if the system supports 52 bits of
VA, and the ID map text is placed that high up, then we increase the VA
size accordingly, up to 52.
One difference from the current implementation is that so far the
condition of VA_BITS < 48 has meant that the top level table is always
"full", with the maximum number of entries, and an extra table level is
always needed. Now, when VA_BITS = 48 (and using 64k pages), the top
level table is not full, and we simply need to increase the number of
entries in it, instead of creating a new table level.
Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Bob Picco <bob.picco@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
[catalin.marinas@arm.com: reduce arguments to __create_hyp_mappings()]
[catalin.marinas@arm.com: reworked/renamed __cpu_uses_extended_idmap_level()]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-12-13 17:07:24 +00:00
|
|
|
#if (VA_BITS < 48)
|
|
|
|
#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
|
|
|
|
#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If VA_BITS < 48, we have to configure an additional table level.
|
|
|
|
* First, we have to verify our assumption that the current value of
|
|
|
|
* VA_BITS was chosen such that all translation levels are fully
|
|
|
|
* utilised, and that lowering T0SZ will always result in an additional
|
|
|
|
* translation level to be configured.
|
|
|
|
*/
|
|
|
|
#if VA_BITS != EXTRA_SHIFT
|
|
|
|
#error "Mismatch between VA_BITS and page size/number of translation levels"
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
#endif
|
|
|
|
|
arm64: allow ID map to be extended to 52 bits
Currently, when using VA_BITS < 48, if the ID map text happens to be
placed in physical memory above VA_BITS, we increase the VA size (up to
48) and create a new table level, in order to map in the ID map text.
This is okay because the system always supports 48 bits of VA.
This patch extends the code such that if the system supports 52 bits of
VA, and the ID map text is placed that high up, then we increase the VA
size accordingly, up to 52.
One difference from the current implementation is that so far the
condition of VA_BITS < 48 has meant that the top level table is always
"full", with the maximum number of entries, and an extra table level is
always needed. Now, when VA_BITS = 48 (and using 64k pages), the top
level table is not full, and we simply need to increase the number of
entries in it, instead of creating a new table level.
Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Bob Picco <bob.picco@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
[catalin.marinas@arm.com: reduce arguments to __create_hyp_mappings()]
[catalin.marinas@arm.com: reworked/renamed __cpu_uses_extended_idmap_level()]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-12-13 17:07:24 +00:00
|
|
|
mov x4, EXTRA_PTRS
|
|
|
|
create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
|
|
|
|
#else
|
|
|
|
/*
|
|
|
|
* If VA_BITS == 48, we don't have to configure an additional
|
|
|
|
* translation level, but the top-level table has more entries.
|
|
|
|
*/
|
|
|
|
mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
|
|
|
|
str_l x4, idmap_ptrs_per_pgd, x5
|
|
|
|
#endif
|
|
|
|
1:
|
|
|
|
ldr_l x4, idmap_ptrs_per_pgd
|
2015-06-01 11:40:33 +00:00
|
|
|
adr_l x6, __idmap_text_end // __pa(__idmap_text_end)
|
2018-01-11 10:11:59 +00:00
|
|
|
|
|
|
|
map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
|
2014-11-21 21:50:41 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Map the kernel image (starting with PHYS_OFFSET).
|
|
|
|
*/
|
arm64/mm: Separate boot-time page tables from swapper_pg_dir
Since the address of swapper_pg_dir is fixed for a given kernel image,
it is an attractive target for manipulation via an arbitrary write. To
mitigate this we'd like to make it read-only by moving it into the
rodata section.
We require that swapper_pg_dir is at a fixed offset from tramp_pg_dir
and reserved_ttbr0, so these will also need to move into rodata.
However, swapper_pg_dir is allocated along with some transient page
tables used for boot which we do not want to move into rodata.
As a step towards this, this patch separates the boot-time page tables
into a new init_pg_dir, and reduces swapper_pg_dir to the single page it
needs to be. This allows us to retain the relationship between
swapper_pg_dir, tramp_pg_dir, and swapper_pg_dir, while cleanly
separating these from the boot-time page tables.
The init_pg_dir holds all of the pgd/pud/pmd/pte levels needed during
boot, and all of these levels will be freed when we switch to the
swapper_pg_dir, which is initialized by the existing code in
paging_init(). Since we start off on the init_pg_dir, we no longer need
to allocate a transient page table in paging_init() in order to ensure
that swapper_pg_dir isn't live while we initialize it.
There should be no functional change as a result of this patch.
Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
Reviewed-by: James Morse <james.morse@arm.com>
[Mark: place init_pg_dir after BSS, fold mm changes, commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2018-09-24 14:47:49 +00:00
|
|
|
adrp x0, init_pg_dir
|
2020-08-25 13:54:40 +00:00
|
|
|
mov_q x5, KIMAGE_VADDR // compile time __va(_text)
|
arm64: add support for kernel ASLR
This adds support for KASLR is implemented, based on entropy provided by
the bootloader in the /chosen/kaslr-seed DT property. Depending on the size
of the address space (VA_BITS) and the page size, the entropy in the
virtual displacement is up to 13 bits (16k/2 levels) and up to 25 bits (all
4 levels), with the sidenote that displacements that result in the kernel
image straddling a 1GB/32MB/512MB alignment boundary (for 4KB/16KB/64KB
granule kernels, respectively) are not allowed, and will be rounded up to
an acceptable value.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled, the module region is
randomized independently from the core kernel. This makes it less likely
that the location of core kernel data structures can be determined by an
adversary, but causes all function calls from modules into the core kernel
to be resolved via entries in the module PLTs.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is not enabled, the module region is
randomized by choosing a page aligned 128 MB region inside the interval
[_etext - 128 MB, _stext + 128 MB). This gives between 10 and 14 bits of
entropy (depending on page size), independently of the kernel randomization,
but still guarantees that modules are within the range of relative branch
and jump instructions (with the caveat that, since the module region is
shared with other uses of the vmalloc area, modules may need to be loaded
further away if the module region is exhausted)
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-01-26 13:12:01 +00:00
|
|
|
add x5, x5, x23 // add KASLR displacement
|
arm64: allow ID map to be extended to 52 bits
Currently, when using VA_BITS < 48, if the ID map text happens to be
placed in physical memory above VA_BITS, we increase the VA size (up to
48) and create a new table level, in order to map in the ID map text.
This is okay because the system always supports 48 bits of VA.
This patch extends the code such that if the system supports 52 bits of
VA, and the ID map text is placed that high up, then we increase the VA
size accordingly, up to 52.
One difference from the current implementation is that so far the
condition of VA_BITS < 48 has meant that the top level table is always
"full", with the maximum number of entries, and an extra table level is
always needed. Now, when VA_BITS = 48 (and using 64k pages), the top
level table is not full, and we simply need to increase the number of
entries in it, instead of creating a new table level.
Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Bob Picco <bob.picco@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
[catalin.marinas@arm.com: reduce arguments to __create_hyp_mappings()]
[catalin.marinas@arm.com: reworked/renamed __cpu_uses_extended_idmap_level()]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-12-13 17:07:24 +00:00
|
|
|
mov x4, PTRS_PER_PGD
|
arm64: don't map TEXT_OFFSET bytes below the kernel if we can avoid it
For historical reasons, the kernel Image must be loaded into physical
memory at a 512 KB offset above a 2 MB aligned base address. The region
between the base address and the start of the kernel Image has no
significance to the kernel itself, but it is currently mapped explicitly
into the early kernel VMA range for all translation granules.
In some cases (i.e., 4 KB granule), this is unavoidable, due to the 2 MB
granularity of the early kernel mappings. However, in other cases, e.g.,
when running with larger page sizes, or in the future, with more granular
KASLR, there is no reason to map it explicitly like we do currently.
So update the logic so that the region is mapped only if that happens as
a side effect of rounding the start address of the kernel to swapper block
size, and leave it unmapped otherwise.
Since the symbol kernel_img_size now simply resolves to the memory
footprint of the kernel Image, we can drop its definition from image.h
and opencode its calculation.
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-04-18 15:09:46 +00:00
|
|
|
adrp x6, _end // runtime __pa(_end)
|
|
|
|
adrp x3, _text // runtime __pa(_text)
|
|
|
|
sub x6, x6, x3 // _end - _text
|
|
|
|
add x6, x6, x5 // runtime __va(_end)
|
2018-01-11 10:11:59 +00:00
|
|
|
|
|
|
|
map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
|
2014-11-21 21:50:41 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Since the page tables have been populated with non-cacheable
|
arm64/kernel: Fix range on invalidating dcache for boot page tables
Prior to commit 8eb7e28d4c642c31 ("arm64/mm: move runtime pgds to
rodata"), idmap_pgd_dir, tramp_pg_dir, reserved_ttbr0, swapper_pg_dir,
and init_pg_dir were contiguous at the end of the kernel image. The
maintenance at the end of __create_page_tables assumed these were
contiguous, and affected everything from the start of idmap_pg_dir
to the end of init_pg_dir.
That commit moved all but init_pg_dir into the .rodata section, with
other data placed between idmap_pg_dir and init_pg_dir, but did not
update the maintenance. Hence the maintenance is performed on much
more data than necessary (but as the bootloader previously made this
clean to the PoC there is no functional problem).
As we only alter idmap_pg_dir, and init_pg_dir, we only need to perform
maintenance for these. As the other dirs are in .rodata, the bootloader
will have initialised them as expected and cleaned them to the PoC. The
kernel will initialize them as necessary after enabling the MMU.
This patch reworks the maintenance to only cover the idmap_pg_dir and
init_pg_dir to avoid this unnecessary work.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20200427235700.112220-1-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
2020-04-27 23:57:00 +00:00
|
|
|
* accesses (MMU disabled), invalidate those tables again to
|
|
|
|
* remove any speculatively loaded cache lines.
|
2014-11-21 21:50:41 +00:00
|
|
|
*/
|
arm64/kernel: Fix range on invalidating dcache for boot page tables
Prior to commit 8eb7e28d4c642c31 ("arm64/mm: move runtime pgds to
rodata"), idmap_pgd_dir, tramp_pg_dir, reserved_ttbr0, swapper_pg_dir,
and init_pg_dir were contiguous at the end of the kernel image. The
maintenance at the end of __create_page_tables assumed these were
contiguous, and affected everything from the start of idmap_pg_dir
to the end of init_pg_dir.
That commit moved all but init_pg_dir into the .rodata section, with
other data placed between idmap_pg_dir and init_pg_dir, but did not
update the maintenance. Hence the maintenance is performed on much
more data than necessary (but as the bootloader previously made this
clean to the PoC there is no functional problem).
As we only alter idmap_pg_dir, and init_pg_dir, we only need to perform
maintenance for these. As the other dirs are in .rodata, the bootloader
will have initialised them as expected and cleaned them to the PoC. The
kernel will initialize them as necessary after enabling the MMU.
This patch reworks the maintenance to only cover the idmap_pg_dir and
init_pg_dir to avoid this unnecessary work.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20200427235700.112220-1-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
2020-04-27 23:57:00 +00:00
|
|
|
dmb sy
|
|
|
|
|
2016-08-16 19:02:32 +00:00
|
|
|
adrp x0, idmap_pg_dir
|
arm64/kernel: Fix range on invalidating dcache for boot page tables
Prior to commit 8eb7e28d4c642c31 ("arm64/mm: move runtime pgds to
rodata"), idmap_pgd_dir, tramp_pg_dir, reserved_ttbr0, swapper_pg_dir,
and init_pg_dir were contiguous at the end of the kernel image. The
maintenance at the end of __create_page_tables assumed these were
contiguous, and affected everything from the start of idmap_pg_dir
to the end of init_pg_dir.
That commit moved all but init_pg_dir into the .rodata section, with
other data placed between idmap_pg_dir and init_pg_dir, but did not
update the maintenance. Hence the maintenance is performed on much
more data than necessary (but as the bootloader previously made this
clean to the PoC there is no functional problem).
As we only alter idmap_pg_dir, and init_pg_dir, we only need to perform
maintenance for these. As the other dirs are in .rodata, the bootloader
will have initialised them as expected and cleaned them to the PoC. The
kernel will initialize them as necessary after enabling the MMU.
This patch reworks the maintenance to only cover the idmap_pg_dir and
init_pg_dir to avoid this unnecessary work.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20200427235700.112220-1-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
2020-04-27 23:57:00 +00:00
|
|
|
adrp x1, idmap_pg_end
|
arm64: Rename arm64-internal cache maintenance functions
Although naming across the codebase isn't that consistent, it
tends to follow certain patterns. Moreover, the term "flush"
isn't defined in the Arm Architecture reference manual, and might
be interpreted to mean clean, invalidate, or both for a cache.
Rename arm64-internal functions to make the naming internally
consistent, as well as making it consistent with the Arm ARM, by
specifying whether it applies to the instruction, data, or both
caches, whether the operation is a clean, invalidate, or both.
Also specify which point the operation applies to, i.e., to the
point of unification (PoU), coherency (PoC), or persistence
(PoP).
This commit applies the following sed transformation to all files
under arch/arm64:
"s/\b__flush_cache_range\b/caches_clean_inval_pou_macro/g;"\
"s/\b__flush_icache_range\b/caches_clean_inval_pou/g;"\
"s/\binvalidate_icache_range\b/icache_inval_pou/g;"\
"s/\b__flush_dcache_area\b/dcache_clean_inval_poc/g;"\
"s/\b__inval_dcache_area\b/dcache_inval_poc/g;"\
"s/__clean_dcache_area_poc\b/dcache_clean_poc/g;"\
"s/\b__clean_dcache_area_pop\b/dcache_clean_pop/g;"\
"s/\b__clean_dcache_area_pou\b/dcache_clean_pou/g;"\
"s/\b__flush_cache_user_range\b/caches_clean_inval_user_pou/g;"\
"s/\b__flush_icache_all\b/icache_inval_all_pou/g;"
Note that __clean_dcache_area_poc is deliberately missing a word
boundary check at the beginning in order to match the efistub
symbols in image-vars.h.
Also note that, despite its name, __flush_icache_range operates
on both instruction and data caches. The name change here
reflects that.
No functional change intended.
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-19-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-24 08:30:01 +00:00
|
|
|
bl dcache_inval_poc
|
arm64/kernel: Fix range on invalidating dcache for boot page tables
Prior to commit 8eb7e28d4c642c31 ("arm64/mm: move runtime pgds to
rodata"), idmap_pgd_dir, tramp_pg_dir, reserved_ttbr0, swapper_pg_dir,
and init_pg_dir were contiguous at the end of the kernel image. The
maintenance at the end of __create_page_tables assumed these were
contiguous, and affected everything from the start of idmap_pg_dir
to the end of init_pg_dir.
That commit moved all but init_pg_dir into the .rodata section, with
other data placed between idmap_pg_dir and init_pg_dir, but did not
update the maintenance. Hence the maintenance is performed on much
more data than necessary (but as the bootloader previously made this
clean to the PoC there is no functional problem).
As we only alter idmap_pg_dir, and init_pg_dir, we only need to perform
maintenance for these. As the other dirs are in .rodata, the bootloader
will have initialised them as expected and cleaned them to the PoC. The
kernel will initialize them as necessary after enabling the MMU.
This patch reworks the maintenance to only cover the idmap_pg_dir and
init_pg_dir to avoid this unnecessary work.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20200427235700.112220-1-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
2020-04-27 23:57:00 +00:00
|
|
|
|
|
|
|
adrp x0, init_pg_dir
|
arm64/mm: Separate boot-time page tables from swapper_pg_dir
Since the address of swapper_pg_dir is fixed for a given kernel image,
it is an attractive target for manipulation via an arbitrary write. To
mitigate this we'd like to make it read-only by moving it into the
rodata section.
We require that swapper_pg_dir is at a fixed offset from tramp_pg_dir
and reserved_ttbr0, so these will also need to move into rodata.
However, swapper_pg_dir is allocated along with some transient page
tables used for boot which we do not want to move into rodata.
As a step towards this, this patch separates the boot-time page tables
into a new init_pg_dir, and reduces swapper_pg_dir to the single page it
needs to be. This allows us to retain the relationship between
swapper_pg_dir, tramp_pg_dir, and swapper_pg_dir, while cleanly
separating these from the boot-time page tables.
The init_pg_dir holds all of the pgd/pud/pmd/pte levels needed during
boot, and all of these levels will be freed when we switch to the
swapper_pg_dir, which is initialized by the existing code in
paging_init(). Since we start off on the init_pg_dir, we no longer need
to allocate a transient page table in paging_init() in order to ensure
that swapper_pg_dir isn't live while we initialize it.
There should be no functional change as a result of this patch.
Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
Reviewed-by: James Morse <james.morse@arm.com>
[Mark: place init_pg_dir after BSS, fold mm changes, commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2018-09-24 14:47:49 +00:00
|
|
|
adrp x1, init_pg_end
|
arm64: Rename arm64-internal cache maintenance functions
Although naming across the codebase isn't that consistent, it
tends to follow certain patterns. Moreover, the term "flush"
isn't defined in the Arm Architecture reference manual, and might
be interpreted to mean clean, invalidate, or both for a cache.
Rename arm64-internal functions to make the naming internally
consistent, as well as making it consistent with the Arm ARM, by
specifying whether it applies to the instruction, data, or both
caches, whether the operation is a clean, invalidate, or both.
Also specify which point the operation applies to, i.e., to the
point of unification (PoU), coherency (PoC), or persistence
(PoP).
This commit applies the following sed transformation to all files
under arch/arm64:
"s/\b__flush_cache_range\b/caches_clean_inval_pou_macro/g;"\
"s/\b__flush_icache_range\b/caches_clean_inval_pou/g;"\
"s/\binvalidate_icache_range\b/icache_inval_pou/g;"\
"s/\b__flush_dcache_area\b/dcache_clean_inval_poc/g;"\
"s/\b__inval_dcache_area\b/dcache_inval_poc/g;"\
"s/__clean_dcache_area_poc\b/dcache_clean_poc/g;"\
"s/\b__clean_dcache_area_pop\b/dcache_clean_pop/g;"\
"s/\b__clean_dcache_area_pou\b/dcache_clean_pou/g;"\
"s/\b__flush_cache_user_range\b/caches_clean_inval_user_pou/g;"\
"s/\b__flush_icache_all\b/icache_inval_all_pou/g;"
Note that __clean_dcache_area_poc is deliberately missing a word
boundary check at the beginning in order to match the efistub
symbols in image-vars.h.
Also note that, despite its name, __flush_icache_range operates
on both instruction and data caches. The name change here
reflects that.
No functional change intended.
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-19-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-24 08:30:01 +00:00
|
|
|
bl dcache_inval_poc
|
2014-11-21 21:50:41 +00:00
|
|
|
|
arm64: add support for kernel ASLR
This adds support for KASLR is implemented, based on entropy provided by
the bootloader in the /chosen/kaslr-seed DT property. Depending on the size
of the address space (VA_BITS) and the page size, the entropy in the
virtual displacement is up to 13 bits (16k/2 levels) and up to 25 bits (all
4 levels), with the sidenote that displacements that result in the kernel
image straddling a 1GB/32MB/512MB alignment boundary (for 4KB/16KB/64KB
granule kernels, respectively) are not allowed, and will be rounded up to
an acceptable value.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled, the module region is
randomized independently from the core kernel. This makes it less likely
that the location of core kernel data structures can be determined by an
adversary, but causes all function calls from modules into the core kernel
to be resolved via entries in the module PLTs.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is not enabled, the module region is
randomized by choosing a page aligned 128 MB region inside the interval
[_etext - 128 MB, _stext + 128 MB). This gives between 10 and 14 bits of
entropy (depending on page size), independently of the kernel randomization,
but still guarantees that modules are within the range of relative branch
and jump instructions (with the caveat that, since the module region is
shared with other uses of the vmalloc area, modules may need to be loaded
further away if the module region is exhausted)
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-01-26 13:12:01 +00:00
|
|
|
ret x28
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_END(__create_page_tables)
|
2014-11-21 21:50:41 +00:00
|
|
|
|
arm64: Implement stack trace termination record
Reliable stacktracing requires that we identify when a stacktrace is
terminated early. We can do this by ensuring all tasks have a final
frame record at a known location on their task stack, and checking
that this is the final frame record in the chain.
We'd like to use task_pt_regs(task)->stackframe as the final frame
record, as this is already setup upon exception entry from EL0. For
kernel tasks we need to consistently reserve the pt_regs and point x29
at this, which we can do with small changes to __primary_switched,
__secondary_switched, and copy_process().
Since the final frame record must be at a specific location, we must
create the final frame record in __primary_switched and
__secondary_switched rather than leaving this to start_kernel and
secondary_start_kernel. Thus, __primary_switched and
__secondary_switched will now show up in stacktraces for the idle tasks.
Since the final frame record is now identified by its location rather
than by its contents, we identify it at the start of unwind_frame(),
before we read any values from it.
External debuggers may terminate the stack trace when FP == 0. In the
pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the
debugger may print an extra record 0x0 at the end. While this is not
pretty, this does not do any harm. This is a small price to pay for
having reliable stack trace termination in the kernel. That said, gdb
does not show the extra record probably because it uses DWARF and not
frame pointers for stack traces.
Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
[Mark: rebase, use ASM_BUG(), update comments, update commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-10 11:00:26 +00:00
|
|
|
/*
|
2021-05-20 11:50:30 +00:00
|
|
|
* Initialize CPU registers with task-specific and cpu-specific context.
|
|
|
|
*
|
arm64: Implement stack trace termination record
Reliable stacktracing requires that we identify when a stacktrace is
terminated early. We can do this by ensuring all tasks have a final
frame record at a known location on their task stack, and checking
that this is the final frame record in the chain.
We'd like to use task_pt_regs(task)->stackframe as the final frame
record, as this is already setup upon exception entry from EL0. For
kernel tasks we need to consistently reserve the pt_regs and point x29
at this, which we can do with small changes to __primary_switched,
__secondary_switched, and copy_process().
Since the final frame record must be at a specific location, we must
create the final frame record in __primary_switched and
__secondary_switched rather than leaving this to start_kernel and
secondary_start_kernel. Thus, __primary_switched and
__secondary_switched will now show up in stacktraces for the idle tasks.
Since the final frame record is now identified by its location rather
than by its contents, we identify it at the start of unwind_frame(),
before we read any values from it.
External debuggers may terminate the stack trace when FP == 0. In the
pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the
debugger may print an extra record 0x0 at the end. While this is not
pretty, this does not do any harm. This is a small price to pay for
having reliable stack trace termination in the kernel. That said, gdb
does not show the extra record probably because it uses DWARF and not
frame pointers for stack traces.
Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
[Mark: rebase, use ASM_BUG(), update comments, update commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-10 11:00:26 +00:00
|
|
|
* Create a final frame record at task_pt_regs(current)->stackframe, so
|
|
|
|
* that the unwinder can identify the final frame record of any task by
|
|
|
|
* its location in the task stack. We reserve the entire pt_regs space
|
|
|
|
* for consistency with user tasks and kthreads.
|
|
|
|
*/
|
2021-05-20 11:50:31 +00:00
|
|
|
.macro init_cpu_task tsk, tmp1, tmp2
|
2021-05-20 11:50:30 +00:00
|
|
|
msr sp_el0, \tsk
|
|
|
|
|
2021-05-20 11:50:31 +00:00
|
|
|
ldr \tmp1, [\tsk, #TSK_STACK]
|
|
|
|
add sp, \tmp1, #THREAD_SIZE
|
arm64: Implement stack trace termination record
Reliable stacktracing requires that we identify when a stacktrace is
terminated early. We can do this by ensuring all tasks have a final
frame record at a known location on their task stack, and checking
that this is the final frame record in the chain.
We'd like to use task_pt_regs(task)->stackframe as the final frame
record, as this is already setup upon exception entry from EL0. For
kernel tasks we need to consistently reserve the pt_regs and point x29
at this, which we can do with small changes to __primary_switched,
__secondary_switched, and copy_process().
Since the final frame record must be at a specific location, we must
create the final frame record in __primary_switched and
__secondary_switched rather than leaving this to start_kernel and
secondary_start_kernel. Thus, __primary_switched and
__secondary_switched will now show up in stacktraces for the idle tasks.
Since the final frame record is now identified by its location rather
than by its contents, we identify it at the start of unwind_frame(),
before we read any values from it.
External debuggers may terminate the stack trace when FP == 0. In the
pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the
debugger may print an extra record 0x0 at the end. While this is not
pretty, this does not do any harm. This is a small price to pay for
having reliable stack trace termination in the kernel. That said, gdb
does not show the extra record probably because it uses DWARF and not
frame pointers for stack traces.
Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
[Mark: rebase, use ASM_BUG(), update comments, update commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-10 11:00:26 +00:00
|
|
|
sub sp, sp, #PT_REGS_SIZE
|
2021-05-20 11:50:30 +00:00
|
|
|
|
arm64: Implement stack trace termination record
Reliable stacktracing requires that we identify when a stacktrace is
terminated early. We can do this by ensuring all tasks have a final
frame record at a known location on their task stack, and checking
that this is the final frame record in the chain.
We'd like to use task_pt_regs(task)->stackframe as the final frame
record, as this is already setup upon exception entry from EL0. For
kernel tasks we need to consistently reserve the pt_regs and point x29
at this, which we can do with small changes to __primary_switched,
__secondary_switched, and copy_process().
Since the final frame record must be at a specific location, we must
create the final frame record in __primary_switched and
__secondary_switched rather than leaving this to start_kernel and
secondary_start_kernel. Thus, __primary_switched and
__secondary_switched will now show up in stacktraces for the idle tasks.
Since the final frame record is now identified by its location rather
than by its contents, we identify it at the start of unwind_frame(),
before we read any values from it.
External debuggers may terminate the stack trace when FP == 0. In the
pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the
debugger may print an extra record 0x0 at the end. While this is not
pretty, this does not do any harm. This is a small price to pay for
having reliable stack trace termination in the kernel. That said, gdb
does not show the extra record probably because it uses DWARF and not
frame pointers for stack traces.
Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
[Mark: rebase, use ASM_BUG(), update comments, update commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-10 11:00:26 +00:00
|
|
|
stp xzr, xzr, [sp, #S_STACKFRAME]
|
|
|
|
add x29, sp, #S_STACKFRAME
|
2021-05-20 11:50:30 +00:00
|
|
|
|
2021-05-27 10:55:29 +00:00
|
|
|
scs_load \tsk
|
2021-05-20 11:50:31 +00:00
|
|
|
|
|
|
|
adr_l \tmp1, __per_cpu_offset
|
|
|
|
ldr w\tmp2, [\tsk, #TSK_CPU]
|
|
|
|
ldr \tmp1, [\tmp1, \tmp2, lsl #3]
|
|
|
|
set_this_cpu_offset \tmp1
|
arm64: Implement stack trace termination record
Reliable stacktracing requires that we identify when a stacktrace is
terminated early. We can do this by ensuring all tasks have a final
frame record at a known location on their task stack, and checking
that this is the final frame record in the chain.
We'd like to use task_pt_regs(task)->stackframe as the final frame
record, as this is already setup upon exception entry from EL0. For
kernel tasks we need to consistently reserve the pt_regs and point x29
at this, which we can do with small changes to __primary_switched,
__secondary_switched, and copy_process().
Since the final frame record must be at a specific location, we must
create the final frame record in __primary_switched and
__secondary_switched rather than leaving this to start_kernel and
secondary_start_kernel. Thus, __primary_switched and
__secondary_switched will now show up in stacktraces for the idle tasks.
Since the final frame record is now identified by its location rather
than by its contents, we identify it at the start of unwind_frame(),
before we read any values from it.
External debuggers may terminate the stack trace when FP == 0. In the
pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the
debugger may print an extra record 0x0 at the end. While this is not
pretty, this does not do any harm. This is a small price to pay for
having reliable stack trace termination in the kernel. That said, gdb
does not show the extra record probably because it uses DWARF and not
frame pointers for stack traces.
Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
[Mark: rebase, use ASM_BUG(), update comments, update commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-10 11:00:26 +00:00
|
|
|
.endm
|
|
|
|
|
2014-11-21 21:50:41 +00:00
|
|
|
/*
|
2015-03-04 10:51:48 +00:00
|
|
|
* The following fragment of code is executed with the MMU enabled.
|
2016-08-31 11:05:15 +00:00
|
|
|
*
|
|
|
|
* x0 = __PHYS_OFFSET
|
2014-11-21 21:50:41 +00:00
|
|
|
*/
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_START_LOCAL(__primary_switched)
|
2021-05-20 11:50:30 +00:00
|
|
|
adr_l x4, init_task
|
2021-05-20 11:50:31 +00:00
|
|
|
init_cpu_task x4, x5, x6
|
2016-08-31 11:05:16 +00:00
|
|
|
|
2015-12-26 11:46:40 +00:00
|
|
|
adr_l x8, vectors // load VBAR_EL1 with virtual
|
|
|
|
msr vbar_el1, x8 // vector table address
|
|
|
|
isb
|
|
|
|
|
2021-05-20 11:50:30 +00:00
|
|
|
stp x29, x30, [sp, #-16]!
|
2016-08-31 11:05:16 +00:00
|
|
|
mov x29, sp
|
|
|
|
|
2016-08-31 11:05:15 +00:00
|
|
|
str_l x21, __fdt_pointer, x5 // Save FDT pointer
|
|
|
|
|
|
|
|
ldr_l x4, kimage_vaddr // Save the offset between
|
|
|
|
sub x4, x4, x0 // the kernel virtual and
|
|
|
|
str_l x4, kimage_voffset, x5 // physical mappings
|
|
|
|
|
2016-01-06 11:05:27 +00:00
|
|
|
// Clear BSS
|
|
|
|
adr_l x0, __bss_start
|
|
|
|
mov x1, xzr
|
|
|
|
adr_l x2, __bss_stop
|
|
|
|
sub x2, x2, x0
|
|
|
|
bl __pi_memset
|
arm64: mm: place empty_zero_page in bss
Currently the zero page is set up in paging_init, and thus we cannot use
the zero page earlier. We use the zero page as a reserved TTBR value
from which no TLB entries may be allocated (e.g. when uninstalling the
idmap). To enable such usage earlier (as may be required for invasive
changes to the kernel page tables), and to minimise the time that the
idmap is active, we need to be able to use the zero page before
paging_init.
This patch follows the example set by x86, by allocating the zero page
at compile time, in .bss. This means that the zero page itself is
available immediately upon entry to start_kernel (as we zero .bss before
this), and also means that the zero page takes up no space in the raw
Image binary. The associated struct page is allocated in bootmem_init,
and remains unavailable until this time.
Outside of arch code, the only users of empty_zero_page assume that the
empty_zero_page symbol refers to the zeroed memory itself, and that
ZERO_PAGE(x) must be used to acquire the associated struct page,
following the example of x86. This patch also brings arm64 inline with
these assumptions.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-01-25 11:44:57 +00:00
|
|
|
dsb ishst // Make zero page visible to PTW
|
2016-01-06 11:05:27 +00:00
|
|
|
|
2020-12-22 20:02:06 +00:00
|
|
|
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
|
2015-10-12 15:52:58 +00:00
|
|
|
bl kasan_early_init
|
arm64: add support for kernel ASLR
This adds support for KASLR is implemented, based on entropy provided by
the bootloader in the /chosen/kaslr-seed DT property. Depending on the size
of the address space (VA_BITS) and the page size, the entropy in the
virtual displacement is up to 13 bits (16k/2 levels) and up to 25 bits (all
4 levels), with the sidenote that displacements that result in the kernel
image straddling a 1GB/32MB/512MB alignment boundary (for 4KB/16KB/64KB
granule kernels, respectively) are not allowed, and will be rounded up to
an acceptable value.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled, the module region is
randomized independently from the core kernel. This makes it less likely
that the location of core kernel data structures can be determined by an
adversary, but causes all function calls from modules into the core kernel
to be resolved via entries in the module PLTs.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is not enabled, the module region is
randomized by choosing a page aligned 128 MB region inside the interval
[_etext - 128 MB, _stext + 128 MB). This gives between 10 and 14 bits of
entropy (depending on page size), independently of the kernel randomization,
but still guarantees that modules are within the range of relative branch
and jump instructions (with the caveat that, since the module region is
shared with other uses of the vmalloc area, modules may need to be loaded
further away if the module region is exhausted)
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-01-26 13:12:01 +00:00
|
|
|
#endif
|
2021-02-08 09:57:21 +00:00
|
|
|
mov x0, x21 // pass FDT address in x0
|
|
|
|
bl early_fdt_map // Try mapping the FDT early
|
2021-02-08 09:57:22 +00:00
|
|
|
bl init_feature_override // Parse cpu feature overrides
|
arm64: add support for kernel ASLR
This adds support for KASLR is implemented, based on entropy provided by
the bootloader in the /chosen/kaslr-seed DT property. Depending on the size
of the address space (VA_BITS) and the page size, the entropy in the
virtual displacement is up to 13 bits (16k/2 levels) and up to 25 bits (all
4 levels), with the sidenote that displacements that result in the kernel
image straddling a 1GB/32MB/512MB alignment boundary (for 4KB/16KB/64KB
granule kernels, respectively) are not allowed, and will be rounded up to
an acceptable value.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled, the module region is
randomized independently from the core kernel. This makes it less likely
that the location of core kernel data structures can be determined by an
adversary, but causes all function calls from modules into the core kernel
to be resolved via entries in the module PLTs.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is not enabled, the module region is
randomized by choosing a page aligned 128 MB region inside the interval
[_etext - 128 MB, _stext + 128 MB). This gives between 10 and 14 bits of
entropy (depending on page size), independently of the kernel randomization,
but still guarantees that modules are within the range of relative branch
and jump instructions (with the caveat that, since the module region is
shared with other uses of the vmalloc area, modules may need to be loaded
further away if the module region is exhausted)
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-01-26 13:12:01 +00:00
|
|
|
#ifdef CONFIG_RANDOMIZE_BASE
|
2016-04-18 15:09:47 +00:00
|
|
|
tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?
|
|
|
|
b.ne 0f
|
arm64: add support for kernel ASLR
This adds support for KASLR is implemented, based on entropy provided by
the bootloader in the /chosen/kaslr-seed DT property. Depending on the size
of the address space (VA_BITS) and the page size, the entropy in the
virtual displacement is up to 13 bits (16k/2 levels) and up to 25 bits (all
4 levels), with the sidenote that displacements that result in the kernel
image straddling a 1GB/32MB/512MB alignment boundary (for 4KB/16KB/64KB
granule kernels, respectively) are not allowed, and will be rounded up to
an acceptable value.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled, the module region is
randomized independently from the core kernel. This makes it less likely
that the location of core kernel data structures can be determined by an
adversary, but causes all function calls from modules into the core kernel
to be resolved via entries in the module PLTs.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is not enabled, the module region is
randomized by choosing a page aligned 128 MB region inside the interval
[_etext - 128 MB, _stext + 128 MB). This gives between 10 and 14 bits of
entropy (depending on page size), independently of the kernel randomization,
but still guarantees that modules are within the range of relative branch
and jump instructions (with the caveat that, since the module region is
shared with other uses of the vmalloc area, modules may need to be loaded
further away if the module region is exhausted)
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-01-26 13:12:01 +00:00
|
|
|
bl kaslr_early_init // parse FDT for KASLR options
|
|
|
|
cbz x0, 0f // KASLR disabled? just proceed
|
2016-04-18 15:09:47 +00:00
|
|
|
orr x23, x23, x0 // record KASLR offset
|
2016-08-31 11:05:16 +00:00
|
|
|
ldp x29, x30, [sp], #16 // we must enable KASLR, return
|
|
|
|
ret // to __primary_switch()
|
arm64: add support for kernel ASLR
This adds support for KASLR is implemented, based on entropy provided by
the bootloader in the /chosen/kaslr-seed DT property. Depending on the size
of the address space (VA_BITS) and the page size, the entropy in the
virtual displacement is up to 13 bits (16k/2 levels) and up to 25 bits (all
4 levels), with the sidenote that displacements that result in the kernel
image straddling a 1GB/32MB/512MB alignment boundary (for 4KB/16KB/64KB
granule kernels, respectively) are not allowed, and will be rounded up to
an acceptable value.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled, the module region is
randomized independently from the core kernel. This makes it less likely
that the location of core kernel data structures can be determined by an
adversary, but causes all function calls from modules into the core kernel
to be resolved via entries in the module PLTs.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is not enabled, the module region is
randomized by choosing a page aligned 128 MB region inside the interval
[_etext - 128 MB, _stext + 128 MB). This gives between 10 and 14 bits of
entropy (depending on page size), independently of the kernel randomization,
but still guarantees that modules are within the range of relative branch
and jump instructions (with the caveat that, since the module region is
shared with other uses of the vmalloc area, modules may need to be loaded
further away if the module region is exhausted)
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-01-26 13:12:01 +00:00
|
|
|
0:
|
2015-10-12 15:52:58 +00:00
|
|
|
#endif
|
2021-02-08 09:57:14 +00:00
|
|
|
bl switch_to_vhe // Prefer VHE if possible
|
2021-05-20 11:50:30 +00:00
|
|
|
ldp x29, x30, [sp], #16
|
arm64: Implement stack trace termination record
Reliable stacktracing requires that we identify when a stacktrace is
terminated early. We can do this by ensuring all tasks have a final
frame record at a known location on their task stack, and checking
that this is the final frame record in the chain.
We'd like to use task_pt_regs(task)->stackframe as the final frame
record, as this is already setup upon exception entry from EL0. For
kernel tasks we need to consistently reserve the pt_regs and point x29
at this, which we can do with small changes to __primary_switched,
__secondary_switched, and copy_process().
Since the final frame record must be at a specific location, we must
create the final frame record in __primary_switched and
__secondary_switched rather than leaving this to start_kernel and
secondary_start_kernel. Thus, __primary_switched and
__secondary_switched will now show up in stacktraces for the idle tasks.
Since the final frame record is now identified by its location rather
than by its contents, we identify it at the start of unwind_frame(),
before we read any values from it.
External debuggers may terminate the stack trace when FP == 0. In the
pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the
debugger may print an extra record 0x0 at the end. While this is not
pretty, this does not do any harm. This is a small price to pay for
having reliable stack trace termination in the kernel. That said, gdb
does not show the extra record probably because it uses DWARF and not
frame pointers for stack traces.
Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
[Mark: rebase, use ASM_BUG(), update comments, update commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-10 11:00:26 +00:00
|
|
|
bl start_kernel
|
|
|
|
ASM_BUG()
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_END(__primary_switched)
|
2014-11-21 21:50:41 +00:00
|
|
|
|
2020-03-12 09:40:02 +00:00
|
|
|
.pushsection ".rodata", "a"
|
|
|
|
SYM_DATA_START(kimage_vaddr)
|
2020-08-25 13:54:40 +00:00
|
|
|
.quad _text
|
2020-03-12 09:40:02 +00:00
|
|
|
SYM_DATA_END(kimage_vaddr)
|
|
|
|
EXPORT_SYMBOL(kimage_vaddr)
|
|
|
|
.popsection
|
|
|
|
|
2014-11-21 21:50:41 +00:00
|
|
|
/*
|
|
|
|
* end early head section, begin head code that is also used for
|
|
|
|
* hotplug and needs to have the same protections as the text region
|
|
|
|
*/
|
2018-01-29 12:00:00 +00:00
|
|
|
.section ".idmap.text","awx"
|
arm64: add support for kernel ASLR
This adds support for KASLR is implemented, based on entropy provided by
the bootloader in the /chosen/kaslr-seed DT property. Depending on the size
of the address space (VA_BITS) and the page size, the entropy in the
virtual displacement is up to 13 bits (16k/2 levels) and up to 25 bits (all
4 levels), with the sidenote that displacements that result in the kernel
image straddling a 1GB/32MB/512MB alignment boundary (for 4KB/16KB/64KB
granule kernels, respectively) are not allowed, and will be rounded up to
an acceptable value.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled, the module region is
randomized independently from the core kernel. This makes it less likely
that the location of core kernel data structures can be determined by an
adversary, but causes all function calls from modules into the core kernel
to be resolved via entries in the module PLTs.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is not enabled, the module region is
randomized by choosing a page aligned 128 MB region inside the interval
[_etext - 128 MB, _stext + 128 MB). This gives between 10 and 14 bits of
entropy (depending on page size), independently of the kernel randomization,
but still guarantees that modules are within the range of relative branch
and jump instructions (with the caveat that, since the module region is
shared with other uses of the vmalloc area, modules may need to be loaded
further away if the module region is exhausted)
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-01-26 13:12:01 +00:00
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
2020-11-13 12:49:23 +00:00
|
|
|
* Starting from EL2 or EL1, configure the CPU to execute at the highest
|
|
|
|
* reachable EL supported by the kernel in a chosen default state. If dropping
|
|
|
|
* from EL2 to EL1, configure EL2 before configuring EL1.
|
2013-10-11 13:52:16 +00:00
|
|
|
*
|
2020-11-13 12:49:25 +00:00
|
|
|
* Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if
|
|
|
|
* SCTLR_ELx.EOS is clear), we place an ISB prior to ERET.
|
2013-10-11 13:52:16 +00:00
|
|
|
*
|
2017-01-09 14:31:55 +00:00
|
|
|
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
|
2013-10-11 13:52:16 +00:00
|
|
|
* booted in EL1 or EL2 respectively.
|
2012-03-05 11:49:27 +00:00
|
|
|
*/
|
2020-11-13 12:49:23 +00:00
|
|
|
SYM_FUNC_START(init_kernel_el)
|
2012-03-05 11:49:27 +00:00
|
|
|
mrs x0, CurrentEL
|
2014-06-06 13:16:21 +00:00
|
|
|
cmp x0, #CurrentEL_EL2
|
2020-11-13 12:49:25 +00:00
|
|
|
b.eq init_el2
|
|
|
|
|
|
|
|
SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
|
2021-04-08 13:10:09 +00:00
|
|
|
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
|
|
|
|
msr sctlr_el1, x0
|
2013-10-11 13:52:17 +00:00
|
|
|
isb
|
2020-11-13 12:49:25 +00:00
|
|
|
mov_q x0, INIT_PSTATE_EL1
|
|
|
|
msr spsr_el1, x0
|
|
|
|
msr elr_el1, lr
|
|
|
|
mov w0, #BOOT_CPU_MODE_EL1
|
|
|
|
eret
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2020-11-13 12:49:25 +00:00
|
|
|
SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
|
2020-12-02 18:41:04 +00:00
|
|
|
mov_q x0, HCR_HOST_NVHE_FLAGS
|
|
|
|
msr hcr_el2, x0
|
2017-10-31 15:51:04 +00:00
|
|
|
isb
|
2020-12-02 18:41:04 +00:00
|
|
|
|
2021-02-08 09:57:17 +00:00
|
|
|
init_el2_state
|
2017-10-31 15:51:04 +00:00
|
|
|
|
2012-10-19 16:46:27 +00:00
|
|
|
/* Hypervisor stub */
|
2020-12-02 18:41:04 +00:00
|
|
|
adr_l x0, __hyp_stub_vectors
|
2012-10-19 16:46:27 +00:00
|
|
|
msr vbar_el2, x0
|
2020-11-13 12:49:25 +00:00
|
|
|
isb
|
2020-12-02 18:41:04 +00:00
|
|
|
|
2021-04-08 13:10:09 +00:00
|
|
|
/*
|
|
|
|
* Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
|
|
|
|
* making it impossible to start in nVHE mode. Is that
|
|
|
|
* compliant with the architecture? Absolutely not!
|
|
|
|
*/
|
|
|
|
mrs x0, hcr_el2
|
|
|
|
and x0, x0, #HCR_E2H
|
|
|
|
cbz x0, 1f
|
|
|
|
|
|
|
|
/* Switching to VHE requires a sane SCTLR_EL1 as a start */
|
|
|
|
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
|
|
|
|
msr_s SYS_SCTLR_EL12, x0
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Force an eret into a helper "function", and let it return
|
|
|
|
* to our original caller... This makes sure that we have
|
|
|
|
* initialised the basic PSTATE state.
|
|
|
|
*/
|
|
|
|
mov x0, #INIT_PSTATE_EL2
|
|
|
|
msr spsr_el1, x0
|
|
|
|
adr x0, __cpu_stick_to_vhe
|
|
|
|
msr elr_el1, x0
|
|
|
|
eret
|
|
|
|
|
|
|
|
1:
|
|
|
|
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
|
|
|
|
msr sctlr_el1, x0
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
msr elr_el2, lr
|
2020-11-13 12:49:25 +00:00
|
|
|
mov w0, #BOOT_CPU_MODE_EL2
|
2012-03-05 11:49:27 +00:00
|
|
|
eret
|
2021-04-08 13:10:09 +00:00
|
|
|
|
|
|
|
__cpu_stick_to_vhe:
|
|
|
|
mov x0, #HVC_VHE_RESTART
|
|
|
|
hvc #0
|
|
|
|
mov x0, #BOOT_CPU_MODE_EL2
|
|
|
|
ret
|
2020-11-13 12:49:23 +00:00
|
|
|
SYM_FUNC_END(init_kernel_el)
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2013-10-11 13:52:16 +00:00
|
|
|
/*
|
|
|
|
* Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
|
2017-01-09 14:31:55 +00:00
|
|
|
* in w0. See arch/arm64/include/asm/virt.h for more info.
|
2013-10-11 13:52:16 +00:00
|
|
|
*/
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
|
2015-03-17 08:14:29 +00:00
|
|
|
adr_l x1, __boot_cpu_mode
|
2016-08-31 11:05:12 +00:00
|
|
|
cmp w0, #BOOT_CPU_MODE_EL2
|
2013-10-11 13:52:16 +00:00
|
|
|
b.ne 1f
|
|
|
|
add x1, x1, #4
|
2021-05-18 10:14:05 +00:00
|
|
|
1: str w0, [x1] // Save CPU boot mode
|
2014-05-02 15:24:13 +00:00
|
|
|
dmb sy
|
|
|
|
dc ivac, x1 // Invalidate potentially stale cache line
|
2013-10-11 13:52:16 +00:00
|
|
|
ret
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_END(set_cpu_boot_mode_flag)
|
2013-10-11 13:52:16 +00:00
|
|
|
|
2016-08-24 17:27:29 +00:00
|
|
|
/*
|
|
|
|
* These values are written with the MMU off, but read with the MMU on.
|
|
|
|
* Writers will invalidate the corresponding address, discarding up to a
|
|
|
|
* 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures
|
|
|
|
* sufficient alignment that the CWG doesn't overlap another section.
|
|
|
|
*/
|
|
|
|
.pushsection ".mmuoff.data.write", "aw"
|
2012-10-26 14:40:05 +00:00
|
|
|
/*
|
|
|
|
* We need to find out the CPU boot mode long after boot, so we need to
|
|
|
|
* store it in a writable variable.
|
|
|
|
*
|
|
|
|
* This is not in .bss, because we set it sufficiently early that the boot-time
|
|
|
|
* zeroing of .bss would clobber it.
|
|
|
|
*/
|
2020-02-18 19:58:35 +00:00
|
|
|
SYM_DATA_START(__boot_cpu_mode)
|
2012-10-26 14:40:05 +00:00
|
|
|
.long BOOT_CPU_MODE_EL2
|
2015-03-13 16:14:36 +00:00
|
|
|
.long BOOT_CPU_MODE_EL1
|
2020-02-18 19:58:35 +00:00
|
|
|
SYM_DATA_END(__boot_cpu_mode)
|
2016-08-24 17:27:29 +00:00
|
|
|
/*
|
|
|
|
* The booting CPU updates the failed status @__early_cpu_boot_status,
|
|
|
|
* with MMU turned off.
|
|
|
|
*/
|
2020-02-18 19:58:35 +00:00
|
|
|
SYM_DATA_START(__early_cpu_boot_status)
|
2019-04-30 10:35:04 +00:00
|
|
|
.quad 0
|
2020-02-18 19:58:35 +00:00
|
|
|
SYM_DATA_END(__early_cpu_boot_status)
|
2016-08-24 17:27:29 +00:00
|
|
|
|
2012-10-26 14:40:05 +00:00
|
|
|
.popsection
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
|
|
|
* This provides a "holding pen" for platforms to hold all secondary
|
|
|
|
* cores are held until we're ready for them to initialise.
|
|
|
|
*/
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_START(secondary_holding_pen)
|
2020-11-13 12:49:23 +00:00
|
|
|
bl init_kernel_el // w0=cpu_boot_mode
|
2013-10-11 13:52:16 +00:00
|
|
|
bl set_cpu_boot_mode_flag
|
2012-03-05 11:49:27 +00:00
|
|
|
mrs x0, mpidr_el1
|
2016-04-18 15:09:45 +00:00
|
|
|
mov_q x1, MPIDR_HWID_BITMASK
|
2012-08-29 17:32:18 +00:00
|
|
|
and x0, x0, x1
|
2015-03-10 14:00:03 +00:00
|
|
|
adr_l x3, secondary_holding_pen_release
|
2012-03-05 11:49:27 +00:00
|
|
|
pen: ldr x4, [x3]
|
|
|
|
cmp x4, x0
|
|
|
|
b.eq secondary_startup
|
|
|
|
wfe
|
|
|
|
b pen
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_END(secondary_holding_pen)
|
arm64: factor out spin-table boot method
The arm64 kernel has an internal holding pen, which is necessary for
some systems where we can't bring CPUs online individually and must hold
multiple CPUs in a safe area until the kernel is able to handle them.
The current SMP infrastructure for arm64 is closely coupled to this
holding pen, and alternative boot methods must launch CPUs into the pen,
where they sit before they are launched into the kernel proper.
With PSCI (and possibly other future boot methods), we can bring CPUs
online individually, and need not perform the secondary_holding_pen
dance. Instead, this patch factors the holding pen management code out
to the spin-table boot method code, as it is the only boot method
requiring the pen.
A new entry point for secondaries, secondary_entry is added for other
boot methods to use, which bypasses the holding pen and its associated
overhead when bringing CPUs online. The smp.pen.text section is also
removed, as the pen can live in head.text without problem.
The cpu_operations structure is extended with two new functions,
cpu_boot and cpu_postboot, for bringing a cpu into the kernel and
performing any post-boot cleanup required by a bootmethod (e.g.
resetting the secondary_holding_pen_release to INVALID_HWID).
Documentation is added for cpu_operations.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2013-10-24 19:30:16 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Secondary entry point that jumps straight into the kernel. Only to
|
|
|
|
* be used where CPUs are brought online dynamically by the kernel.
|
|
|
|
*/
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_START(secondary_entry)
|
2020-11-13 12:49:23 +00:00
|
|
|
bl init_kernel_el // w0=cpu_boot_mode
|
2013-11-18 18:56:42 +00:00
|
|
|
bl set_cpu_boot_mode_flag
|
arm64: factor out spin-table boot method
The arm64 kernel has an internal holding pen, which is necessary for
some systems where we can't bring CPUs online individually and must hold
multiple CPUs in a safe area until the kernel is able to handle them.
The current SMP infrastructure for arm64 is closely coupled to this
holding pen, and alternative boot methods must launch CPUs into the pen,
where they sit before they are launched into the kernel proper.
With PSCI (and possibly other future boot methods), we can bring CPUs
online individually, and need not perform the secondary_holding_pen
dance. Instead, this patch factors the holding pen management code out
to the spin-table boot method code, as it is the only boot method
requiring the pen.
A new entry point for secondaries, secondary_entry is added for other
boot methods to use, which bypasses the holding pen and its associated
overhead when bringing CPUs online. The smp.pen.text section is also
removed, as the pen can live in head.text without problem.
The cpu_operations structure is extended with two new functions,
cpu_boot and cpu_postboot, for bringing a cpu into the kernel and
performing any post-boot cleanup required by a bootmethod (e.g.
resetting the secondary_holding_pen_release to INVALID_HWID).
Documentation is added for cpu_operations.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2013-10-24 19:30:16 +00:00
|
|
|
b secondary_startup
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_END(secondary_entry)
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_START_LOCAL(secondary_startup)
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
|
|
|
* Common entry point for secondary CPUs.
|
|
|
|
*/
|
2021-02-08 09:57:14 +00:00
|
|
|
bl switch_to_vhe
|
2018-12-06 22:50:40 +00:00
|
|
|
bl __cpu_secondary_check52bitva
|
2015-03-18 14:55:20 +00:00
|
|
|
bl __cpu_setup // initialise processor
|
2018-09-24 13:51:13 +00:00
|
|
|
adrp x1, swapper_pg_dir
|
2016-08-31 11:05:14 +00:00
|
|
|
bl __enable_mmu
|
|
|
|
ldr x8, =__secondary_switched
|
|
|
|
br x8
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_END(secondary_startup)
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_START_LOCAL(__secondary_switched)
|
2015-12-26 11:46:40 +00:00
|
|
|
adr_l x5, vectors
|
|
|
|
msr vbar_el1, x5
|
|
|
|
isb
|
|
|
|
|
2016-02-23 10:31:42 +00:00
|
|
|
adr_l x0, secondary_data
|
arm64: split thread_info from task stack
This patch moves arm64's struct thread_info from the task stack into
task_struct. This protects thread_info from corruption in the case of
stack overflows, and makes its address harder to determine if stack
addresses are leaked, making a number of attacks more difficult. Precise
detection and handling of overflow is left for subsequent patches.
Largely, this involves changing code to store the task_struct in sp_el0,
and acquire the thread_info from the task struct. Core code now
implements current_thread_info(), and as noted in <linux/sched.h> this
relies on offsetof(task_struct, thread_info) == 0, enforced by core
code.
This change means that the 'tsk' register used in entry.S now points to
a task_struct, rather than a thread_info as it used to. To make this
clear, the TI_* field offsets are renamed to TSK_TI_*, with asm-offsets
appropriately updated to account for the structural change.
Userspace clobbers sp_el0, and we can no longer restore this from the
stack. Instead, the current task is cached in a per-cpu variable that we
can safely access from early assembly as interrupts are disabled (and we
are thus not preemptible).
Both secondary entry and idle are updated to stash the sp and task
pointer separately.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: James Morse <james.morse@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-11-03 20:23:13 +00:00
|
|
|
ldr x2, [x0, #CPU_BOOT_TASK]
|
2019-08-27 13:36:38 +00:00
|
|
|
cbz x2, __secondary_too_slow
|
2021-05-20 11:50:29 +00:00
|
|
|
|
2021-05-20 11:50:31 +00:00
|
|
|
init_cpu_task x2, x1, x3
|
arm64: simplify ptrauth initialization
Currently __cpu_setup conditionally initializes the address
authentication keys and enables them in SCTLR_EL1, doing so differently
for the primary CPU and secondary CPUs, and skipping this work for CPUs
returning from an idle state. For the latter case, cpu_do_resume
restores the keys and SCTLR_EL1 value after the MMU has been enabled.
This flow is rather difficult to follow, so instead let's move the
primary and secondary CPU initialization into their respective boot
paths. By following the example of cpu_do_resume and doing so once the
MMU is enabled, we can always initialize the keys from the values in
thread_struct, and avoid the machinery necessary to pass the keys in
secondary_data or open-coding initialization for the boot CPU.
This means we perform an additional RMW of SCTLR_EL1, but we already do
this in the cpu_do_resume path, and for other features in cpufeature.c,
so this isn't a major concern in a bringup path. Note that even while
the enable bits are clear, the key registers are accessible.
As this now renders the argument to __cpu_setup redundant, let's also
remove that entirely. Future extensions can follow a similar approach to
initialize values that differ for primary/secondary CPUs.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Reviewed-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Cc: Amit Daniel Kachhap <amit.kachhap@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20200423101606.37601-3-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2020-04-23 10:16:06 +00:00
|
|
|
|
|
|
|
#ifdef CONFIG_ARM64_PTR_AUTH
|
|
|
|
ptrauth_keys_init_cpu x2, x3, x4, x5
|
|
|
|
#endif
|
|
|
|
|
arm64: Implement stack trace termination record
Reliable stacktracing requires that we identify when a stacktrace is
terminated early. We can do this by ensuring all tasks have a final
frame record at a known location on their task stack, and checking
that this is the final frame record in the chain.
We'd like to use task_pt_regs(task)->stackframe as the final frame
record, as this is already setup upon exception entry from EL0. For
kernel tasks we need to consistently reserve the pt_regs and point x29
at this, which we can do with small changes to __primary_switched,
__secondary_switched, and copy_process().
Since the final frame record must be at a specific location, we must
create the final frame record in __primary_switched and
__secondary_switched rather than leaving this to start_kernel and
secondary_start_kernel. Thus, __primary_switched and
__secondary_switched will now show up in stacktraces for the idle tasks.
Since the final frame record is now identified by its location rather
than by its contents, we identify it at the start of unwind_frame(),
before we read any values from it.
External debuggers may terminate the stack trace when FP == 0. In the
pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the
debugger may print an extra record 0x0 at the end. While this is not
pretty, this does not do any harm. This is a small price to pay for
having reliable stack trace termination in the kernel. That said, gdb
does not show the extra record probably because it uses DWARF and not
frame pointers for stack traces.
Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
[Mark: rebase, use ASM_BUG(), update comments, update commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-10 11:00:26 +00:00
|
|
|
bl secondary_start_kernel
|
|
|
|
ASM_BUG()
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_END(__secondary_switched)
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_START_LOCAL(__secondary_too_slow)
|
2019-08-27 13:36:38 +00:00
|
|
|
wfe
|
|
|
|
wfi
|
|
|
|
b __secondary_too_slow
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_END(__secondary_too_slow)
|
2019-08-27 13:36:38 +00:00
|
|
|
|
2016-02-23 10:31:42 +00:00
|
|
|
/*
|
|
|
|
* The booting CPU updates the failed status @__early_cpu_boot_status,
|
|
|
|
* with MMU turned off.
|
|
|
|
*
|
|
|
|
* update_early_cpu_boot_status tmp, status
|
|
|
|
* - Corrupts tmp1, tmp2
|
|
|
|
* - Writes 'status' to __early_cpu_boot_status and makes sure
|
|
|
|
* it is committed to memory.
|
|
|
|
*/
|
|
|
|
|
|
|
|
.macro update_early_cpu_boot_status status, tmp1, tmp2
|
|
|
|
mov \tmp2, #\status
|
arm64: fix invalidation of wrong __early_cpu_boot_status cacheline
In head.S, the str_l macro, which takes a source register, a symbol name
and a temp register, is used to store a status value to the variable
__early_cpu_boot_status. Subsequently, the value of the temp register is
reused to invalidate any cachelines covering this variable.
However, since str_l resolves to
adrp \tmp, \sym
str \src, [\tmp, :lo12:\sym]
the temp register never actually holds the address of the variable but
only of the 4 KB window that covers it, and reusing it leads to the
wrong cacheline being invalidated. So instead, take the address
explicitly before doing the store, and reuse that value to perform
the cache invalidation.
Fixes: bb9052744f4b ("arm64: Handle early CPU boot failures")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Suzuki K Poulose <Suzuki.Poulose@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-04-15 10:11:21 +00:00
|
|
|
adr_l \tmp1, __early_cpu_boot_status
|
|
|
|
str \tmp2, [\tmp1]
|
2016-02-23 10:31:42 +00:00
|
|
|
dmb sy
|
|
|
|
dc ivac, \tmp1 // Invalidate potentially stale cache line
|
|
|
|
.endm
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
2015-03-17 07:59:53 +00:00
|
|
|
* Enable the MMU.
|
2012-03-05 11:49:27 +00:00
|
|
|
*
|
2015-03-17 07:59:53 +00:00
|
|
|
* x0 = SCTLR_EL1 value for turning on the MMU.
|
2018-09-24 13:51:13 +00:00
|
|
|
* x1 = TTBR1_EL1 value
|
2015-03-17 07:59:53 +00:00
|
|
|
*
|
2016-08-31 11:05:14 +00:00
|
|
|
* Returns to the caller via x30/lr. This requires the caller to be covered
|
|
|
|
* by the .idmap.text section.
|
2015-10-19 13:19:35 +00:00
|
|
|
*
|
|
|
|
* Checks if the selected granule size is supported by the CPU.
|
|
|
|
* If it isn't, park the CPU
|
2012-03-05 11:49:27 +00:00
|
|
|
*/
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_START(__enable_mmu)
|
2018-09-24 13:51:13 +00:00
|
|
|
mrs x2, ID_AA64MMFR0_EL1
|
|
|
|
ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4
|
2021-03-10 05:53:10 +00:00
|
|
|
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
|
|
|
|
b.lt __no_granule_support
|
|
|
|
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
|
|
|
|
b.gt __no_granule_support
|
2018-09-24 13:51:13 +00:00
|
|
|
update_early_cpu_boot_status 0, x2, x3
|
|
|
|
adrp x2, idmap_pg_dir
|
|
|
|
phys_to_ttbr x1, x1
|
|
|
|
phys_to_ttbr x2, x2
|
|
|
|
msr ttbr0_el1, x2 // load TTBR0
|
2019-08-07 15:55:19 +00:00
|
|
|
offset_ttbr1 x1, x3
|
2018-09-24 13:51:13 +00:00
|
|
|
msr ttbr1_el1, x1 // load TTBR1
|
2012-03-05 11:49:27 +00:00
|
|
|
isb
|
2021-02-08 09:57:12 +00:00
|
|
|
|
|
|
|
set_sctlr_el1 x0
|
|
|
|
|
2016-08-31 11:05:14 +00:00
|
|
|
ret
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_END(__enable_mmu)
|
2015-10-19 13:19:35 +00:00
|
|
|
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_START(__cpu_secondary_check52bitva)
|
2019-08-07 15:55:22 +00:00
|
|
|
#ifdef CONFIG_ARM64_VA_BITS_52
|
2019-08-07 15:55:23 +00:00
|
|
|
ldr_l x0, vabits_actual
|
2018-12-06 22:50:40 +00:00
|
|
|
cmp x0, #52
|
|
|
|
b.ne 2f
|
|
|
|
|
|
|
|
mrs_s x0, SYS_ID_AA64MMFR2_EL1
|
|
|
|
and x0, x0, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
|
|
|
|
cbnz x0, 2f
|
|
|
|
|
2018-12-10 14:21:13 +00:00
|
|
|
update_early_cpu_boot_status \
|
|
|
|
CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1
|
2018-12-06 22:50:40 +00:00
|
|
|
1: wfe
|
|
|
|
wfi
|
|
|
|
b 1b
|
|
|
|
|
|
|
|
#endif
|
|
|
|
2: ret
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_END(__cpu_secondary_check52bitva)
|
2018-12-06 22:50:40 +00:00
|
|
|
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_START_LOCAL(__no_granule_support)
|
2016-02-23 10:31:42 +00:00
|
|
|
/* Indicate that this CPU can't boot and is stuck in the kernel */
|
2018-12-10 14:21:13 +00:00
|
|
|
update_early_cpu_boot_status \
|
|
|
|
CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_NO_GRAN, x1, x2
|
2016-02-23 10:31:42 +00:00
|
|
|
1:
|
2015-10-19 13:19:35 +00:00
|
|
|
wfe
|
2016-02-23 10:31:42 +00:00
|
|
|
wfi
|
2016-08-31 11:05:13 +00:00
|
|
|
b 1b
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_END(__no_granule_support)
|
2016-04-18 15:09:42 +00:00
|
|
|
|
2016-04-18 15:09:43 +00:00
|
|
|
#ifdef CONFIG_RELOCATABLE
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_START_LOCAL(__relocate_kernel)
|
2016-04-18 15:09:43 +00:00
|
|
|
/*
|
|
|
|
* Iterate over each entry in the relocation table, and apply the
|
|
|
|
* relocations in place.
|
|
|
|
*/
|
|
|
|
ldr w9, =__rela_offset // offset to reloc table
|
|
|
|
ldr w10, =__rela_size // size of reloc table
|
|
|
|
|
2016-04-18 15:09:45 +00:00
|
|
|
mov_q x11, KIMAGE_VADDR // default virtual offset
|
2016-04-18 15:09:43 +00:00
|
|
|
add x11, x11, x23 // actual virtual offset
|
|
|
|
add x9, x9, x11 // __va(.rela)
|
|
|
|
add x10, x9, x10 // __va(.rela) + sizeof(.rela)
|
|
|
|
|
|
|
|
0: cmp x9, x10
|
arm64: relocatable: suppress R_AARCH64_ABS64 relocations in vmlinux
The linker routines that we rely on to produce a relocatable PIE binary
treat it as a shared ELF object in some ways, i.e., it emits symbol based
R_AARCH64_ABS64 relocations into the final binary since doing so would be
appropriate when linking a shared library that is subject to symbol
preemption. (This means that an executable can override certain symbols
that are exported by a shared library it is linked with, and that the
shared library *must* update all its internal references as well, and point
them to the version provided by the executable.)
Symbol preemption does not occur for OS hosted PIE executables, let alone
for vmlinux, and so we would prefer to get rid of these symbol based
relocations. This would allow us to simplify the relocation routines, and
to strip the .dynsym, .dynstr and .hash sections from the binary. (Note
that these are tiny, and are placed in the .init segment, but they clutter
up the vmlinux binary.)
Note that these R_AARCH64_ABS64 relocations are only emitted for absolute
references to symbols defined in the linker script, all other relocatable
quantities are covered by anonymous R_AARCH64_RELATIVE relocations that
simply list the offsets to all 64-bit values in the binary that need to be
fixed up based on the offset between the link time and run time addresses.
Fortunately, GNU ld has a -Bsymbolic option, which is intended for shared
libraries to allow them to ignore symbol preemption, and unconditionally
bind all internal symbol references to its own definitions. So set it for
our PIE binary as well, and get rid of the asoociated sections and the
relocation code that processes them.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[will: fixed conflict with __dynsym_offset linker script entry]
Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-07-24 12:00:13 +00:00
|
|
|
b.hs 1f
|
2019-08-01 01:18:42 +00:00
|
|
|
ldp x12, x13, [x9], #24
|
|
|
|
ldr x14, [x9, #-8]
|
|
|
|
cmp w13, #R_AARCH64_RELATIVE
|
arm64: relocatable: suppress R_AARCH64_ABS64 relocations in vmlinux
The linker routines that we rely on to produce a relocatable PIE binary
treat it as a shared ELF object in some ways, i.e., it emits symbol based
R_AARCH64_ABS64 relocations into the final binary since doing so would be
appropriate when linking a shared library that is subject to symbol
preemption. (This means that an executable can override certain symbols
that are exported by a shared library it is linked with, and that the
shared library *must* update all its internal references as well, and point
them to the version provided by the executable.)
Symbol preemption does not occur for OS hosted PIE executables, let alone
for vmlinux, and so we would prefer to get rid of these symbol based
relocations. This would allow us to simplify the relocation routines, and
to strip the .dynsym, .dynstr and .hash sections from the binary. (Note
that these are tiny, and are placed in the .init segment, but they clutter
up the vmlinux binary.)
Note that these R_AARCH64_ABS64 relocations are only emitted for absolute
references to symbols defined in the linker script, all other relocatable
quantities are covered by anonymous R_AARCH64_RELATIVE relocations that
simply list the offsets to all 64-bit values in the binary that need to be
fixed up based on the offset between the link time and run time addresses.
Fortunately, GNU ld has a -Bsymbolic option, which is intended for shared
libraries to allow them to ignore symbol preemption, and unconditionally
bind all internal symbol references to its own definitions. So set it for
our PIE binary as well, and get rid of the asoociated sections and the
relocation code that processes them.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[will: fixed conflict with __dynsym_offset linker script entry]
Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-07-24 12:00:13 +00:00
|
|
|
b.ne 0b
|
2019-08-01 01:18:42 +00:00
|
|
|
add x14, x14, x23 // relocate
|
|
|
|
str x14, [x12, x23]
|
2016-04-18 15:09:43 +00:00
|
|
|
b 0b
|
2019-08-01 01:18:42 +00:00
|
|
|
|
|
|
|
1:
|
|
|
|
#ifdef CONFIG_RELR
|
|
|
|
/*
|
|
|
|
* Apply RELR relocations.
|
|
|
|
*
|
|
|
|
* RELR is a compressed format for storing relative relocations. The
|
|
|
|
* encoded sequence of entries looks like:
|
|
|
|
* [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
|
|
|
|
*
|
|
|
|
* i.e. start with an address, followed by any number of bitmaps. The
|
|
|
|
* address entry encodes 1 relocation. The subsequent bitmap entries
|
|
|
|
* encode up to 63 relocations each, at subsequent offsets following
|
|
|
|
* the last address entry.
|
|
|
|
*
|
|
|
|
* The bitmap entries must have 1 in the least significant bit. The
|
|
|
|
* assumption here is that an address cannot have 1 in lsb. Odd
|
|
|
|
* addresses are not supported. Any odd addresses are stored in the RELA
|
|
|
|
* section, which is handled above.
|
|
|
|
*
|
|
|
|
* Excluding the least significant bit in the bitmap, each non-zero
|
|
|
|
* bit in the bitmap represents a relocation to be applied to
|
|
|
|
* a corresponding machine word that follows the base address
|
|
|
|
* word. The second least significant bit represents the machine
|
|
|
|
* word immediately following the initial address, and each bit
|
|
|
|
* that follows represents the next word, in linear order. As such,
|
|
|
|
* a single bitmap can encode up to 63 relocations in a 64-bit object.
|
|
|
|
*
|
|
|
|
* In this implementation we store the address of the next RELR table
|
|
|
|
* entry in x9, the address being relocated by the current address or
|
|
|
|
* bitmap entry in x13 and the address being relocated by the current
|
|
|
|
* bit in x14.
|
|
|
|
*
|
|
|
|
* Because addends are stored in place in the binary, RELR relocations
|
|
|
|
* cannot be applied idempotently. We use x24 to keep track of the
|
|
|
|
* currently applied displacement so that we can correctly relocate if
|
|
|
|
* __relocate_kernel is called twice with non-zero displacements (i.e.
|
|
|
|
* if there is both a physical misalignment and a KASLR displacement).
|
|
|
|
*/
|
|
|
|
ldr w9, =__relr_offset // offset to reloc table
|
|
|
|
ldr w10, =__relr_size // size of reloc table
|
|
|
|
add x9, x9, x11 // __va(.relr)
|
|
|
|
add x10, x9, x10 // __va(.relr) + sizeof(.relr)
|
|
|
|
|
|
|
|
sub x15, x23, x24 // delta from previous offset
|
|
|
|
cbz x15, 7f // nothing to do if unchanged
|
|
|
|
mov x24, x23 // save new offset
|
|
|
|
|
|
|
|
2: cmp x9, x10
|
|
|
|
b.hs 7f
|
|
|
|
ldr x11, [x9], #8
|
|
|
|
tbnz x11, #0, 3f // branch to handle bitmaps
|
|
|
|
add x13, x11, x23
|
|
|
|
ldr x12, [x13] // relocate address entry
|
|
|
|
add x12, x12, x15
|
|
|
|
str x12, [x13], #8 // adjust to start of bitmap
|
|
|
|
b 2b
|
|
|
|
|
|
|
|
3: mov x14, x13
|
|
|
|
4: lsr x11, x11, #1
|
|
|
|
cbz x11, 6f
|
|
|
|
tbz x11, #0, 5f // skip bit if not set
|
|
|
|
ldr x12, [x14] // relocate bit
|
|
|
|
add x12, x12, x15
|
|
|
|
str x12, [x14]
|
|
|
|
|
|
|
|
5: add x14, x14, #8 // move to next bit's address
|
|
|
|
b 4b
|
|
|
|
|
|
|
|
6: /*
|
|
|
|
* Move to the next bitmap's address. 8 is the word size, and 63 is the
|
|
|
|
* number of significant bits in a bitmap entry.
|
|
|
|
*/
|
|
|
|
add x13, x13, #(8 * 63)
|
|
|
|
b 2b
|
|
|
|
|
|
|
|
7:
|
|
|
|
#endif
|
|
|
|
ret
|
|
|
|
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_END(__relocate_kernel)
|
2016-08-31 11:05:13 +00:00
|
|
|
#endif
|
2016-04-18 15:09:43 +00:00
|
|
|
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_START_LOCAL(__primary_switch)
|
2016-08-31 11:05:13 +00:00
|
|
|
#ifdef CONFIG_RANDOMIZE_BASE
|
|
|
|
mov x19, x0 // preserve new SCTLR_EL1 value
|
|
|
|
mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value
|
|
|
|
#endif
|
|
|
|
|
arm64/mm: Separate boot-time page tables from swapper_pg_dir
Since the address of swapper_pg_dir is fixed for a given kernel image,
it is an attractive target for manipulation via an arbitrary write. To
mitigate this we'd like to make it read-only by moving it into the
rodata section.
We require that swapper_pg_dir is at a fixed offset from tramp_pg_dir
and reserved_ttbr0, so these will also need to move into rodata.
However, swapper_pg_dir is allocated along with some transient page
tables used for boot which we do not want to move into rodata.
As a step towards this, this patch separates the boot-time page tables
into a new init_pg_dir, and reduces swapper_pg_dir to the single page it
needs to be. This allows us to retain the relationship between
swapper_pg_dir, tramp_pg_dir, and swapper_pg_dir, while cleanly
separating these from the boot-time page tables.
The init_pg_dir holds all of the pgd/pud/pmd/pte levels needed during
boot, and all of these levels will be freed when we switch to the
swapper_pg_dir, which is initialized by the existing code in
paging_init(). Since we start off on the init_pg_dir, we no longer need
to allocate a transient page table in paging_init() in order to ensure
that swapper_pg_dir isn't live while we initialize it.
There should be no functional change as a result of this patch.
Signed-off-by: Jun Yao <yaojun8558363@gmail.com>
Reviewed-by: James Morse <james.morse@arm.com>
[Mark: place init_pg_dir after BSS, fold mm changes, commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2018-09-24 14:47:49 +00:00
|
|
|
adrp x1, init_pg_dir
|
2016-08-31 11:05:14 +00:00
|
|
|
bl __enable_mmu
|
2016-08-31 11:05:13 +00:00
|
|
|
#ifdef CONFIG_RELOCATABLE
|
2019-08-01 01:18:42 +00:00
|
|
|
#ifdef CONFIG_RELR
|
|
|
|
mov x24, #0 // no RELR displacement yet
|
|
|
|
#endif
|
2016-08-31 11:05:13 +00:00
|
|
|
bl __relocate_kernel
|
|
|
|
#ifdef CONFIG_RANDOMIZE_BASE
|
|
|
|
ldr x8, =__primary_switched
|
2016-08-31 11:05:15 +00:00
|
|
|
adrp x0, __PHYS_OFFSET
|
2016-08-31 11:05:13 +00:00
|
|
|
blr x8
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we return here, we have a KASLR displacement in x23 which we need
|
|
|
|
* to take into account by discarding the current kernel mapping and
|
|
|
|
* creating a new one.
|
|
|
|
*/
|
2018-01-29 11:59:52 +00:00
|
|
|
pre_disable_mmu_workaround
|
2016-08-31 11:05:13 +00:00
|
|
|
msr sctlr_el1, x20 // disable the MMU
|
|
|
|
isb
|
|
|
|
bl __create_page_tables // recreate kernel mapping
|
|
|
|
|
|
|
|
tlbi vmalle1 // Remove any stale TLB entries
|
|
|
|
dsb nsh
|
2021-02-24 09:37:37 +00:00
|
|
|
isb
|
2016-08-31 11:05:13 +00:00
|
|
|
|
2021-02-08 09:57:12 +00:00
|
|
|
set_sctlr_el1 x19 // re-enable the MMU
|
2016-08-31 11:05:13 +00:00
|
|
|
|
|
|
|
bl __relocate_kernel
|
|
|
|
#endif
|
2016-04-18 15:09:43 +00:00
|
|
|
#endif
|
|
|
|
ldr x8, =__primary_switched
|
2016-08-31 11:05:15 +00:00
|
|
|
adrp x0, __PHYS_OFFSET
|
2016-04-18 15:09:43 +00:00
|
|
|
br x8
|
2020-02-18 19:58:33 +00:00
|
|
|
SYM_FUNC_END(__primary_switch)
|