mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-10-31 16:38:12 +00:00
Merge branch 'for-next/stage1-lpa2' into for-next/core
* for-next/stage1-lpa2: (48 commits) : Add support for LPA2 and WXN and stage 1 arm64/mm: Avoid ID mapping of kpti flag if it is no longer needed arm64/mm: Use generic __pud_free() helper in pud_free() implementation arm64: gitignore: ignore relacheck arm64: Use Signed/Unsigned enums for TGRAN{4,16,64} and VARange arm64: mm: Make PUD folding check in set_pud() a runtime check arm64: mm: add support for WXN memory translation attribute mm: add arch hook to validate mmap() prot flags arm64: defconfig: Enable LPA2 support arm64: Enable 52-bit virtual addressing for 4k and 16k granule configs arm64: kvm: avoid CONFIG_PGTABLE_LEVELS for runtime levels arm64: ptdump: Deal with translation levels folded at runtime arm64: ptdump: Disregard unaddressable VA space arm64: mm: Add support for folding PUDs at runtime arm64: kasan: Reduce minimum shadow alignment and enable 5 level paging arm64: mm: Add 5 level paging support to fixmap and swapper handling arm64: Enable LPA2 at boot if supported by the system arm64: mm: add LPA2 and 5 level paging support to G-to-nG conversion arm64: mm: Add definitions to support 5 levels of paging arm64: mm: Add LPA2 support to phys<->pte conversion routines arm64: mm: Wire up TCR.DS bit to PTE shareability fields ...
This commit is contained in:
commit
88f0912253
55 changed files with 1954 additions and 1129 deletions
|
@ -165,7 +165,7 @@ config ARM64
|
|||
select HAVE_ARCH_HUGE_VMAP
|
||||
select HAVE_ARCH_JUMP_LABEL
|
||||
select HAVE_ARCH_JUMP_LABEL_RELATIVE
|
||||
select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
|
||||
select HAVE_ARCH_KASAN
|
||||
select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
|
||||
select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
|
||||
select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
|
||||
|
@ -370,7 +370,9 @@ config PGTABLE_LEVELS
|
|||
default 3 if ARM64_64K_PAGES && (ARM64_VA_BITS_48 || ARM64_VA_BITS_52)
|
||||
default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
|
||||
default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47
|
||||
default 4 if ARM64_16K_PAGES && (ARM64_VA_BITS_48 || ARM64_VA_BITS_52)
|
||||
default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48
|
||||
default 5 if ARM64_4K_PAGES && ARM64_VA_BITS_52
|
||||
|
||||
config ARCH_SUPPORTS_UPROBES
|
||||
def_bool y
|
||||
|
@ -398,13 +400,13 @@ config BUILTIN_RETURN_ADDRESS_STRIPS_PAC
|
|||
config KASAN_SHADOW_OFFSET
|
||||
hex
|
||||
depends on KASAN_GENERIC || KASAN_SW_TAGS
|
||||
default 0xdfff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
|
||||
default 0xdfffc00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
|
||||
default 0xdfff800000000000 if (ARM64_VA_BITS_48 || (ARM64_VA_BITS_52 && !ARM64_16K_PAGES)) && !KASAN_SW_TAGS
|
||||
default 0xdfffc00000000000 if (ARM64_VA_BITS_47 || ARM64_VA_BITS_52) && ARM64_16K_PAGES && !KASAN_SW_TAGS
|
||||
default 0xdffffe0000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
|
||||
default 0xdfffffc000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
|
||||
default 0xdffffff800000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
|
||||
default 0xefff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
|
||||
default 0xefffc00000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
|
||||
default 0xefff800000000000 if (ARM64_VA_BITS_48 || (ARM64_VA_BITS_52 && !ARM64_16K_PAGES)) && KASAN_SW_TAGS
|
||||
default 0xefffc00000000000 if (ARM64_VA_BITS_47 || ARM64_VA_BITS_52) && ARM64_16K_PAGES && KASAN_SW_TAGS
|
||||
default 0xeffffe0000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
|
||||
default 0xefffffc000000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
|
||||
default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
|
||||
|
@ -1280,9 +1282,7 @@ endchoice
|
|||
|
||||
choice
|
||||
prompt "Virtual address space size"
|
||||
default ARM64_VA_BITS_39 if ARM64_4K_PAGES
|
||||
default ARM64_VA_BITS_47 if ARM64_16K_PAGES
|
||||
default ARM64_VA_BITS_42 if ARM64_64K_PAGES
|
||||
default ARM64_VA_BITS_52
|
||||
help
|
||||
Allows choosing one of multiple possible virtual address
|
||||
space sizes. The level of translation table is determined by
|
||||
|
@ -1309,7 +1309,7 @@ config ARM64_VA_BITS_48
|
|||
|
||||
config ARM64_VA_BITS_52
|
||||
bool "52-bit"
|
||||
depends on ARM64_64K_PAGES && (ARM64_PAN || !ARM64_SW_TTBR0_PAN)
|
||||
depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN
|
||||
help
|
||||
Enable 52-bit virtual addressing for userspace when explicitly
|
||||
requested via a hint to mmap(). The kernel will also use 52-bit
|
||||
|
@ -1356,10 +1356,11 @@ choice
|
|||
|
||||
config ARM64_PA_BITS_48
|
||||
bool "48-bit"
|
||||
depends on ARM64_64K_PAGES || !ARM64_VA_BITS_52
|
||||
|
||||
config ARM64_PA_BITS_52
|
||||
bool "52-bit (ARMv8.2)"
|
||||
depends on ARM64_64K_PAGES
|
||||
bool "52-bit"
|
||||
depends on ARM64_64K_PAGES || ARM64_VA_BITS_52
|
||||
depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN
|
||||
help
|
||||
Enable support for a 52-bit physical address space, introduced as
|
||||
|
@ -1376,6 +1377,10 @@ config ARM64_PA_BITS
|
|||
default 48 if ARM64_PA_BITS_48
|
||||
default 52 if ARM64_PA_BITS_52
|
||||
|
||||
config ARM64_LPA2
|
||||
def_bool y
|
||||
depends on ARM64_PA_BITS_52 && !ARM64_64K_PAGES
|
||||
|
||||
choice
|
||||
prompt "Endianness"
|
||||
default CPU_LITTLE_ENDIAN
|
||||
|
@ -1602,6 +1607,17 @@ config RODATA_FULL_DEFAULT_ENABLED
|
|||
This requires the linear region to be mapped down to pages,
|
||||
which may adversely affect performance in some cases.
|
||||
|
||||
config ARM64_WXN
|
||||
bool "Enable WXN attribute so all writable mappings are non-exec"
|
||||
help
|
||||
Set the WXN bit in the SCTLR system register so that all writable
|
||||
mappings are treated as if the PXN/UXN bit is set as well.
|
||||
If this is set to Y, it can still be disabled at runtime by
|
||||
passing 'arm64.nowxn' on the kernel command line.
|
||||
|
||||
This should only be set if no software needs to be supported that
|
||||
relies on being able to execute from writable mappings.
|
||||
|
||||
config ARM64_SW_TTBR0_PAN
|
||||
bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
|
||||
help
|
||||
|
|
|
@ -76,7 +76,6 @@ CONFIG_ARCH_VEXPRESS=y
|
|||
CONFIG_ARCH_VISCONTI=y
|
||||
CONFIG_ARCH_XGENE=y
|
||||
CONFIG_ARCH_ZYNQMP=y
|
||||
CONFIG_ARM64_VA_BITS_48=y
|
||||
CONFIG_SCHED_MC=y
|
||||
CONFIG_SCHED_SMT=y
|
||||
CONFIG_NUMA=y
|
||||
|
|
|
@ -129,6 +129,4 @@ static inline bool __init __early_cpu_has_rndr(void)
|
|||
return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
|
||||
}
|
||||
|
||||
u64 kaslr_early_init(void *fdt);
|
||||
|
||||
#endif /* _ASM_ARCHRANDOM_H */
|
||||
|
|
|
@ -341,20 +341,6 @@ alternative_cb_end
|
|||
bfi \valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH
|
||||
.endm
|
||||
|
||||
/*
|
||||
* idmap_get_t0sz - get the T0SZ value needed to cover the ID map
|
||||
*
|
||||
* Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
|
||||
* entire ID map region can be mapped. As T0SZ == (64 - #bits used),
|
||||
* this number conveniently equals the number of leading zeroes in
|
||||
* the physical address of _end.
|
||||
*/
|
||||
.macro idmap_get_t0sz, reg
|
||||
adrp \reg, _end
|
||||
orr \reg, \reg, #(1 << VA_BITS_MIN) - 1
|
||||
clz \reg, \reg
|
||||
.endm
|
||||
|
||||
/*
|
||||
* tcr_compute_pa_size - set TCR.(I)PS to the highest supported
|
||||
* ID_AA64MMFR0_EL1.PARange value
|
||||
|
@ -586,18 +572,27 @@ alternative_endif
|
|||
.endm
|
||||
|
||||
/*
|
||||
* Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
|
||||
* If the kernel is built for 52-bit virtual addressing but the hardware only
|
||||
* supports 48 bits, we cannot program the pgdir address into TTBR1 directly,
|
||||
* but we have to add an offset so that the TTBR1 address corresponds with the
|
||||
* pgdir entry that covers the lowest 48-bit addressable VA.
|
||||
*
|
||||
* Note that this trick is only used for LVA/64k pages - LPA2/4k pages uses an
|
||||
* additional paging level, and on LPA2/16k pages, we would end up with a root
|
||||
* level table with only 2 entries, which is suboptimal in terms of TLB
|
||||
* utilization, so there we fall back to 47 bits of translation if LPA2 is not
|
||||
* supported.
|
||||
*
|
||||
* orr is used as it can cover the immediate value (and is idempotent).
|
||||
* In future this may be nop'ed out when dealing with 52-bit kernel VAs.
|
||||
* ttbr: Value of ttbr to set, modified.
|
||||
*/
|
||||
.macro offset_ttbr1, ttbr, tmp
|
||||
#ifdef CONFIG_ARM64_VA_BITS_52
|
||||
mrs_s \tmp, SYS_ID_AA64MMFR2_EL1
|
||||
and \tmp, \tmp, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
|
||||
cbnz \tmp, .Lskipoffs_\@
|
||||
orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
|
||||
.Lskipoffs_\@ :
|
||||
#if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2)
|
||||
mrs \tmp, tcr_el1
|
||||
and \tmp, \tmp, #TCR_T1SZ_MASK
|
||||
cmp \tmp, #TCR_T1SZ(VA_BITS_MIN)
|
||||
orr \tmp, \ttbr, #TTBR1_BADDR_4852_OFFSET
|
||||
csel \ttbr, \tmp, \ttbr, eq
|
||||
#endif
|
||||
.endm
|
||||
|
||||
|
@ -619,25 +614,13 @@ alternative_endif
|
|||
|
||||
.macro phys_to_pte, pte, phys
|
||||
#ifdef CONFIG_ARM64_PA_BITS_52
|
||||
/*
|
||||
* We assume \phys is 64K aligned and this is guaranteed by only
|
||||
* supporting this configuration with 64K pages.
|
||||
*/
|
||||
orr \pte, \phys, \phys, lsr #36
|
||||
and \pte, \pte, #PTE_ADDR_MASK
|
||||
orr \pte, \phys, \phys, lsr #PTE_ADDR_HIGH_SHIFT
|
||||
and \pte, \pte, #PHYS_TO_PTE_ADDR_MASK
|
||||
#else
|
||||
mov \pte, \phys
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro pte_to_phys, phys, pte
|
||||
and \phys, \pte, #PTE_ADDR_MASK
|
||||
#ifdef CONFIG_ARM64_PA_BITS_52
|
||||
orr \phys, \phys, \phys, lsl #PTE_ADDR_HIGH_SHIFT
|
||||
and \phys, \phys, GENMASK_ULL(PHYS_MASK_SHIFT - 1, PAGE_SHIFT)
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU.
|
||||
*/
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
|
||||
#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0
|
||||
#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4
|
||||
#define ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF 8
|
||||
#define ARM64_SW_FEATURE_OVERRIDE_NOWXN 12
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
|
@ -910,7 +912,9 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
|
|||
s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur);
|
||||
struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);
|
||||
|
||||
extern struct arm64_ftr_override id_aa64mmfr0_override;
|
||||
extern struct arm64_ftr_override id_aa64mmfr1_override;
|
||||
extern struct arm64_ftr_override id_aa64mmfr2_override;
|
||||
extern struct arm64_ftr_override id_aa64pfr0_override;
|
||||
extern struct arm64_ftr_override id_aa64pfr1_override;
|
||||
extern struct arm64_ftr_override id_aa64zfr0_override;
|
||||
|
@ -920,9 +924,121 @@ extern struct arm64_ftr_override id_aa64isar2_override;
|
|||
|
||||
extern struct arm64_ftr_override arm64_sw_feature_override;
|
||||
|
||||
static inline
|
||||
u64 arm64_apply_feature_override(u64 val, int feat, int width,
|
||||
const struct arm64_ftr_override *override)
|
||||
{
|
||||
u64 oval = override->val;
|
||||
|
||||
/*
|
||||
* When it encounters an invalid override (e.g., an override that
|
||||
* cannot be honoured due to a missing CPU feature), the early idreg
|
||||
* override code will set the mask to 0x0 and the value to non-zero for
|
||||
* the field in question. In order to determine whether the override is
|
||||
* valid or not for the field we are interested in, we first need to
|
||||
* disregard bits belonging to other fields.
|
||||
*/
|
||||
oval &= GENMASK_ULL(feat + width - 1, feat);
|
||||
|
||||
/*
|
||||
* The override is valid if all value bits are accounted for in the
|
||||
* mask. If so, replace the masked bits with the override value.
|
||||
*/
|
||||
if (oval == (oval & override->mask)) {
|
||||
val &= ~override->mask;
|
||||
val |= oval;
|
||||
}
|
||||
|
||||
/* Extract the field from the updated value */
|
||||
return cpuid_feature_extract_unsigned_field(val, feat);
|
||||
}
|
||||
|
||||
static inline bool arm64_test_sw_feature_override(int feat)
|
||||
{
|
||||
/*
|
||||
* Software features are pseudo CPU features that have no underlying
|
||||
* CPUID system register value to apply the override to.
|
||||
*/
|
||||
return arm64_apply_feature_override(0, feat, 4,
|
||||
&arm64_sw_feature_override);
|
||||
}
|
||||
|
||||
static inline bool kaslr_disabled_cmdline(void)
|
||||
{
|
||||
return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_NOKASLR);
|
||||
}
|
||||
|
||||
static inline bool arm64_wxn_enabled(void)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_ARM64_WXN))
|
||||
return false;
|
||||
return !arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_NOWXN);
|
||||
}
|
||||
|
||||
u32 get_kvm_ipa_limit(void);
|
||||
void dump_cpu_features(void);
|
||||
|
||||
static inline bool cpu_has_bti(void)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_ARM64_BTI))
|
||||
return false;
|
||||
|
||||
return arm64_apply_feature_override(read_cpuid(ID_AA64PFR1_EL1),
|
||||
ID_AA64PFR1_EL1_BT_SHIFT, 4,
|
||||
&id_aa64pfr1_override);
|
||||
}
|
||||
|
||||
static inline bool cpu_has_pac(void)
|
||||
{
|
||||
u64 isar1, isar2;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH))
|
||||
return false;
|
||||
|
||||
isar1 = read_cpuid(ID_AA64ISAR1_EL1);
|
||||
isar2 = read_cpuid(ID_AA64ISAR2_EL1);
|
||||
|
||||
if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_APA_SHIFT, 4,
|
||||
&id_aa64isar1_override))
|
||||
return true;
|
||||
|
||||
if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_API_SHIFT, 4,
|
||||
&id_aa64isar1_override))
|
||||
return true;
|
||||
|
||||
return arm64_apply_feature_override(isar2, ID_AA64ISAR2_EL1_APA3_SHIFT, 4,
|
||||
&id_aa64isar2_override);
|
||||
}
|
||||
|
||||
static inline bool cpu_has_lva(void)
|
||||
{
|
||||
u64 mmfr2;
|
||||
|
||||
mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
|
||||
mmfr2 &= ~id_aa64mmfr2_override.mask;
|
||||
mmfr2 |= id_aa64mmfr2_override.val;
|
||||
return cpuid_feature_extract_unsigned_field(mmfr2,
|
||||
ID_AA64MMFR2_EL1_VARange_SHIFT);
|
||||
}
|
||||
|
||||
static inline bool cpu_has_lpa2(void)
|
||||
{
|
||||
#ifdef CONFIG_ARM64_LPA2
|
||||
u64 mmfr0;
|
||||
int feat;
|
||||
|
||||
mmfr0 = read_sysreg(id_aa64mmfr0_el1);
|
||||
mmfr0 &= ~id_aa64mmfr0_override.mask;
|
||||
mmfr0 |= id_aa64mmfr0_override.val;
|
||||
feat = cpuid_feature_extract_signed_field(mmfr0,
|
||||
ID_AA64MMFR0_EL1_TGRAN_SHIFT);
|
||||
|
||||
return feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -117,15 +117,9 @@
|
|||
#define ESR_ELx_FSC_ACCESS (0x08)
|
||||
#define ESR_ELx_FSC_FAULT (0x04)
|
||||
#define ESR_ELx_FSC_PERM (0x0C)
|
||||
#define ESR_ELx_FSC_SEA_TTW0 (0x14)
|
||||
#define ESR_ELx_FSC_SEA_TTW1 (0x15)
|
||||
#define ESR_ELx_FSC_SEA_TTW2 (0x16)
|
||||
#define ESR_ELx_FSC_SEA_TTW3 (0x17)
|
||||
#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n))
|
||||
#define ESR_ELx_FSC_SECC (0x18)
|
||||
#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
|
||||
#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
|
||||
#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
|
||||
#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
|
||||
#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
|
||||
|
||||
/* ISS field definitions for Data Aborts */
|
||||
#define ESR_ELx_ISV_SHIFT (24)
|
||||
|
@ -394,6 +388,9 @@ static inline bool esr_is_data_abort(unsigned long esr)
|
|||
|
||||
static inline bool esr_fsc_is_translation_fault(unsigned long esr)
|
||||
{
|
||||
/* Translation fault, level -1 */
|
||||
if ((esr & ESR_ELx_FSC) == 0b101011)
|
||||
return true;
|
||||
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
|
||||
}
|
||||
|
||||
|
|
|
@ -87,6 +87,7 @@ enum fixed_addresses {
|
|||
FIX_PTE,
|
||||
FIX_PMD,
|
||||
FIX_PUD,
|
||||
FIX_P4D,
|
||||
FIX_PGD,
|
||||
|
||||
__end_of_fixed_addresses
|
||||
|
@ -100,7 +101,6 @@ enum fixed_addresses {
|
|||
#define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE)
|
||||
|
||||
void __init early_fixmap_init(void);
|
||||
void __init fixmap_copy(pgd_t *pgdir);
|
||||
|
||||
#define __early_set_fixmap __set_fixmap
|
||||
|
||||
|
|
|
@ -17,11 +17,9 @@
|
|||
|
||||
asmlinkage void kasan_early_init(void);
|
||||
void kasan_init(void);
|
||||
void kasan_copy_shadow(pgd_t *pgdir);
|
||||
|
||||
#else
|
||||
static inline void kasan_init(void) { }
|
||||
static inline void kasan_copy_shadow(pgd_t *pgdir) { }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -13,28 +13,27 @@
|
|||
#include <asm/sparsemem.h>
|
||||
|
||||
/*
|
||||
* The linear mapping and the start of memory are both 2M aligned (per
|
||||
* the arm64 booting.txt requirements). Hence we can use section mapping
|
||||
* with 4K (section size = 2M) but not with 16K (section size = 32M) or
|
||||
* 64K (section size = 512M).
|
||||
* The physical and virtual addresses of the start of the kernel image are
|
||||
* equal modulo 2 MiB (per the arm64 booting.txt requirements). Hence we can
|
||||
* use section mapping with 4K (section size = 2M) but not with 16K (section
|
||||
* size = 32M) or 64K (section size = 512M).
|
||||
*/
|
||||
|
||||
/*
|
||||
* The idmap and swapper page tables need some space reserved in the kernel
|
||||
* image. Both require pgd, pud (4 levels only) and pmd tables to (section)
|
||||
* map the kernel. With the 64K page configuration, swapper and idmap need to
|
||||
* map to pte level. The swapper also maps the FDT (see __create_page_tables
|
||||
* for more information). Note that the number of ID map translation levels
|
||||
* could be increased on the fly if system RAM is out of reach for the default
|
||||
* VA range, so pages required to map highest possible PA are reserved in all
|
||||
* cases.
|
||||
*/
|
||||
#ifdef CONFIG_ARM64_4K_PAGES
|
||||
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
|
||||
#if defined(PMD_SIZE) && PMD_SIZE <= MIN_KIMG_ALIGN
|
||||
#define SWAPPER_BLOCK_SHIFT PMD_SHIFT
|
||||
#define SWAPPER_SKIP_LEVEL 1
|
||||
#else
|
||||
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
|
||||
#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
|
||||
#define SWAPPER_SKIP_LEVEL 0
|
||||
#endif
|
||||
#define SWAPPER_BLOCK_SIZE (UL(1) << SWAPPER_BLOCK_SHIFT)
|
||||
#define SWAPPER_TABLE_SHIFT (SWAPPER_BLOCK_SHIFT + PAGE_SHIFT - 3)
|
||||
|
||||
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - SWAPPER_SKIP_LEVEL)
|
||||
#define INIT_IDMAP_PGTABLE_LEVELS (IDMAP_LEVELS - SWAPPER_SKIP_LEVEL)
|
||||
|
||||
#define IDMAP_VA_BITS 48
|
||||
#define IDMAP_LEVELS ARM64_HW_PGTABLE_LEVELS(IDMAP_VA_BITS)
|
||||
#define IDMAP_ROOT_LEVEL (4 - IDMAP_LEVELS)
|
||||
|
||||
/*
|
||||
* A relocatable kernel may execute from an address that differs from the one at
|
||||
|
@ -50,57 +49,39 @@
|
|||
#define EARLY_ENTRIES(vstart, vend, shift, add) \
|
||||
(SPAN_NR_ENTRIES(vstart, vend, shift) + (add))
|
||||
|
||||
#define EARLY_PGDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT, add))
|
||||
#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \
|
||||
(lvls > lvl ? EARLY_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * (PAGE_SHIFT - 3), add) : 0)
|
||||
|
||||
#if SWAPPER_PGTABLE_LEVELS > 3
|
||||
#define EARLY_PUDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT, add))
|
||||
#else
|
||||
#define EARLY_PUDS(vstart, vend, add) (0)
|
||||
#endif
|
||||
#define EARLY_PAGES(lvls, vstart, vend, add) (1 /* PGDIR page */ \
|
||||
+ EARLY_LEVEL(3, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
|
||||
+ EARLY_LEVEL(2, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
|
||||
+ EARLY_LEVEL(1, (lvls), (vstart), (vend), add))/* each entry needs a next level page table */
|
||||
#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(SWAPPER_PGTABLE_LEVELS, KIMAGE_VADDR, _end, EXTRA_PAGE) \
|
||||
+ EARLY_SEGMENT_EXTRA_PAGES))
|
||||
|
||||
#if SWAPPER_PGTABLE_LEVELS > 2
|
||||
#define EARLY_PMDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT, add))
|
||||
#else
|
||||
#define EARLY_PMDS(vstart, vend, add) (0)
|
||||
#endif
|
||||
#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, _end, 1))
|
||||
#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + EARLY_IDMAP_EXTRA_PAGES) * PAGE_SIZE)
|
||||
|
||||
#define EARLY_PAGES(vstart, vend, add) ( 1 /* PGDIR page */ \
|
||||
+ EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \
|
||||
+ EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \
|
||||
+ EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */
|
||||
#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EXTRA_PAGE))
|
||||
#define INIT_IDMAP_FDT_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, 0UL, UL(MAX_FDT_SIZE), 1) - 1)
|
||||
#define INIT_IDMAP_FDT_SIZE ((INIT_IDMAP_FDT_PAGES + EARLY_IDMAP_EXTRA_FDT_PAGES) * PAGE_SIZE)
|
||||
|
||||
/* the initial ID map may need two extra pages if it needs to be extended */
|
||||
#if VA_BITS < 48
|
||||
#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
|
||||
#else
|
||||
#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE)
|
||||
#endif
|
||||
#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE, 1)
|
||||
|
||||
/* Initial memory map size */
|
||||
#ifdef CONFIG_ARM64_4K_PAGES
|
||||
#define SWAPPER_BLOCK_SHIFT PMD_SHIFT
|
||||
#define SWAPPER_BLOCK_SIZE PMD_SIZE
|
||||
#define SWAPPER_TABLE_SHIFT PUD_SHIFT
|
||||
#else
|
||||
#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
|
||||
#define SWAPPER_BLOCK_SIZE PAGE_SIZE
|
||||
#define SWAPPER_TABLE_SHIFT PMD_SHIFT
|
||||
#endif
|
||||
/* The number of segments in the kernel image (text, rodata, inittext, initdata, data+bss) */
|
||||
#define KERNEL_SEGMENT_COUNT 5
|
||||
|
||||
#if SWAPPER_BLOCK_SIZE > SEGMENT_ALIGN
|
||||
#define EARLY_SEGMENT_EXTRA_PAGES (KERNEL_SEGMENT_COUNT + 1)
|
||||
/*
|
||||
* Initial memory map attributes.
|
||||
* The initial ID map consists of the kernel image, mapped as two separate
|
||||
* segments, and may appear misaligned wrt the swapper block size. This means
|
||||
* we need 3 additional pages. The DT could straddle a swapper block boundary,
|
||||
* so it may need 2.
|
||||
*/
|
||||
#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN)
|
||||
#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PTE_UXN)
|
||||
|
||||
#ifdef CONFIG_ARM64_4K_PAGES
|
||||
#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS | PTE_WRITE)
|
||||
#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY)
|
||||
#define EARLY_IDMAP_EXTRA_PAGES 3
|
||||
#define EARLY_IDMAP_EXTRA_FDT_PAGES 2
|
||||
#else
|
||||
#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE)
|
||||
#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
|
||||
#define EARLY_SEGMENT_EXTRA_PAGES 0
|
||||
#define EARLY_IDMAP_EXTRA_PAGES 0
|
||||
#define EARLY_IDMAP_EXTRA_FDT_PAGES 0
|
||||
#endif
|
||||
|
||||
#endif /* __ASM_KERNEL_PGTABLE_H */
|
||||
|
|
|
@ -425,15 +425,9 @@ static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
|
|||
{
|
||||
switch (kvm_vcpu_trap_get_fault(vcpu)) {
|
||||
case ESR_ELx_FSC_EXTABT:
|
||||
case ESR_ELx_FSC_SEA_TTW0:
|
||||
case ESR_ELx_FSC_SEA_TTW1:
|
||||
case ESR_ELx_FSC_SEA_TTW2:
|
||||
case ESR_ELx_FSC_SEA_TTW3:
|
||||
case ESR_ELx_FSC_SEA_TTW(-1) ... ESR_ELx_FSC_SEA_TTW(3):
|
||||
case ESR_ELx_FSC_SECC:
|
||||
case ESR_ELx_FSC_SECC_TTW0:
|
||||
case ESR_ELx_FSC_SECC_TTW1:
|
||||
case ESR_ELx_FSC_SECC_TTW2:
|
||||
case ESR_ELx_FSC_SECC_TTW3:
|
||||
case ESR_ELx_FSC_SECC_TTW(-1) ... ESR_ELx_FSC_SECC_TTW(3):
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -54,7 +54,11 @@
|
|||
#define FIXADDR_TOP (-UL(SZ_8M))
|
||||
|
||||
#if VA_BITS > 48
|
||||
#ifdef CONFIG_ARM64_16K_PAGES
|
||||
#define VA_BITS_MIN (47)
|
||||
#else
|
||||
#define VA_BITS_MIN (48)
|
||||
#endif
|
||||
#else
|
||||
#define VA_BITS_MIN (VA_BITS)
|
||||
#endif
|
||||
|
@ -209,9 +213,20 @@
|
|||
#include <asm/boot.h>
|
||||
#include <asm/bug.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/sysreg.h>
|
||||
|
||||
static inline u64 __pure read_tcr(void)
|
||||
{
|
||||
u64 tcr;
|
||||
|
||||
// read_sysreg() uses asm volatile, so avoid it here
|
||||
asm("mrs %0, tcr_el1" : "=r"(tcr));
|
||||
return tcr;
|
||||
}
|
||||
|
||||
#if VA_BITS > 48
|
||||
extern u64 vabits_actual;
|
||||
// For reasons of #include hell, we can't use TCR_T1SZ_OFFSET/TCR_T1SZ_MASK here
|
||||
#define vabits_actual (64 - ((read_tcr() >> 16) & 63))
|
||||
#else
|
||||
#define vabits_actual ((u64)VA_BITS)
|
||||
#endif
|
||||
|
|
|
@ -35,11 +35,40 @@ static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
|
|||
}
|
||||
#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
|
||||
|
||||
static inline bool arm64_check_wx_prot(unsigned long prot,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* When we are running with SCTLR_ELx.WXN==1, writable mappings are
|
||||
* implicitly non-executable. This means we should reject such mappings
|
||||
* when user space attempts to create them using mmap() or mprotect().
|
||||
*/
|
||||
if (arm64_wxn_enabled() &&
|
||||
((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))) {
|
||||
/*
|
||||
* User space libraries such as libffi carry elaborate
|
||||
* heuristics to decide whether it is worth it to even attempt
|
||||
* to create writable executable mappings, as PaX or selinux
|
||||
* enabled systems will outright reject it. They will usually
|
||||
* fall back to something else (e.g., two separate shared
|
||||
* mmap()s of a temporary file) on failure.
|
||||
*/
|
||||
pr_info_ratelimited(
|
||||
"process %s (%d) attempted to create PROT_WRITE+PROT_EXEC mapping\n",
|
||||
tsk->comm, tsk->pid);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool arch_validate_prot(unsigned long prot,
|
||||
unsigned long addr __always_unused)
|
||||
{
|
||||
unsigned long supported = PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM;
|
||||
|
||||
if (!arm64_check_wx_prot(prot, current))
|
||||
return false;
|
||||
|
||||
if (system_supports_bti())
|
||||
supported |= PROT_BTI;
|
||||
|
||||
|
@ -50,6 +79,13 @@ static inline bool arch_validate_prot(unsigned long prot,
|
|||
}
|
||||
#define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr)
|
||||
|
||||
static inline bool arch_validate_mmap_prot(unsigned long prot,
|
||||
unsigned long addr)
|
||||
{
|
||||
return arm64_check_wx_prot(prot, current);
|
||||
}
|
||||
#define arch_validate_mmap_prot arch_validate_mmap_prot
|
||||
|
||||
static inline bool arch_validate_flags(unsigned long vm_flags)
|
||||
{
|
||||
if (!system_supports_mte())
|
||||
|
|
|
@ -71,10 +71,46 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
|
|||
pgprot_t prot, bool page_mappings_only);
|
||||
extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
|
||||
extern void mark_linear_text_alias_ro(void);
|
||||
extern bool kaslr_requires_kpti(void);
|
||||
|
||||
/*
|
||||
* This check is triggered during the early boot before the cpufeature
|
||||
* is initialised. Checking the status on the local CPU allows the boot
|
||||
* CPU to detect the need for non-global mappings and thus avoiding a
|
||||
* pagetable re-write after all the CPUs are booted. This check will be
|
||||
* anyway run on individual CPUs, allowing us to get the consistent
|
||||
* state once the SMP CPUs are up and thus make the switch to non-global
|
||||
* mappings if required.
|
||||
*/
|
||||
static inline bool kaslr_requires_kpti(void)
|
||||
{
|
||||
/*
|
||||
* E0PD does a similar job to KPTI so can be used instead
|
||||
* where available.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
|
||||
u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
|
||||
if (cpuid_feature_extract_unsigned_field(mmfr2,
|
||||
ID_AA64MMFR2_EL1_E0PD_SHIFT))
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Systems affected by Cavium erratum 24756 are incompatible
|
||||
* with KPTI.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
|
||||
extern const struct midr_range cavium_erratum_27456_cpus[];
|
||||
|
||||
if (is_midr_in_range_list(read_cpuid_id(),
|
||||
cavium_erratum_27456_cpus))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define INIT_MM_CONTEXT(name) \
|
||||
.pgd = init_pg_dir,
|
||||
.pgd = swapper_pg_dir,
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
#endif
|
||||
|
|
|
@ -20,13 +20,41 @@
|
|||
#include <asm/cpufeature.h>
|
||||
#include <asm/daifflags.h>
|
||||
#include <asm/proc-fns.h>
|
||||
#include <asm-generic/mm_hooks.h>
|
||||
#include <asm/cputype.h>
|
||||
#include <asm/sysreg.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
extern bool rodata_full;
|
||||
|
||||
static inline int arch_dup_mmap(struct mm_struct *oldmm,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void arch_exit_mmap(struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void arch_unmap(struct mm_struct *mm,
|
||||
unsigned long start, unsigned long end)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||
bool write, bool execute, bool foreign)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_ARM64_WXN) && execute &&
|
||||
(vma->vm_flags & (VM_WRITE | VM_EXEC)) == (VM_WRITE | VM_EXEC)) {
|
||||
pr_warn_ratelimited(
|
||||
"process %s (%d) attempted to execute from writable memory\n",
|
||||
current->comm, current->pid);
|
||||
/* disallow unless the nowxn override is set */
|
||||
return !arm64_wxn_enabled();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void contextidr_thread_switch(struct task_struct *next)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR))
|
||||
|
@ -61,11 +89,9 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
|
|||
}
|
||||
|
||||
/*
|
||||
* TCR.T0SZ value to use when the ID map is active. Usually equals
|
||||
* TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
|
||||
* physical memory, in which case it will be smaller.
|
||||
* TCR.T0SZ value to use when the ID map is active.
|
||||
*/
|
||||
extern int idmap_t0sz;
|
||||
#define idmap_t0sz TCR_T0SZ(IDMAP_VA_BITS)
|
||||
|
||||
/*
|
||||
* Ensure TCR.T0SZ is set to the provided value.
|
||||
|
@ -110,18 +136,13 @@ static inline void cpu_uninstall_idmap(void)
|
|||
cpu_switch_mm(mm->pgd, mm);
|
||||
}
|
||||
|
||||
static inline void __cpu_install_idmap(pgd_t *idmap)
|
||||
static inline void cpu_install_idmap(void)
|
||||
{
|
||||
cpu_set_reserved_ttbr0();
|
||||
local_flush_tlb_all();
|
||||
cpu_set_idmap_tcr_t0sz();
|
||||
|
||||
cpu_switch_mm(lm_alias(idmap), &init_mm);
|
||||
}
|
||||
|
||||
static inline void cpu_install_idmap(void)
|
||||
{
|
||||
__cpu_install_idmap(idmap_pg_dir);
|
||||
cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -148,51 +169,21 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
|
|||
isb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
|
||||
* avoiding the possibility of conflicting TLB entries being allocated.
|
||||
*/
|
||||
static inline void __cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap, bool cnp)
|
||||
{
|
||||
typedef void (ttbr_replace_func)(phys_addr_t);
|
||||
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
|
||||
ttbr_replace_func *replace_phys;
|
||||
unsigned long daif;
|
||||
|
||||
/* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
|
||||
phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));
|
||||
|
||||
if (cnp)
|
||||
ttbr1 |= TTBR_CNP_BIT;
|
||||
|
||||
replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
|
||||
|
||||
__cpu_install_idmap(idmap);
|
||||
|
||||
/*
|
||||
* We really don't want to take *any* exceptions while TTBR1 is
|
||||
* in the process of being replaced so mask everything.
|
||||
*/
|
||||
daif = local_daif_save();
|
||||
replace_phys(ttbr1);
|
||||
local_daif_restore(daif);
|
||||
|
||||
cpu_uninstall_idmap();
|
||||
}
|
||||
void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp);
|
||||
|
||||
static inline void cpu_enable_swapper_cnp(void)
|
||||
{
|
||||
__cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir, true);
|
||||
__cpu_replace_ttbr1(lm_alias(swapper_pg_dir), true);
|
||||
}
|
||||
|
||||
static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
|
||||
static inline void cpu_replace_ttbr1(pgd_t *pgdp)
|
||||
{
|
||||
/*
|
||||
* Only for early TTBR1 replacement before cpucaps are finalized and
|
||||
* before we've decided whether to use CNP.
|
||||
*/
|
||||
WARN_ON(system_capabilities_finalized());
|
||||
__cpu_replace_ttbr1(pgdp, idmap, false);
|
||||
__cpu_replace_ttbr1(pgdp, false);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <asm/tlbflush.h>
|
||||
|
||||
#define __HAVE_ARCH_PGD_FREE
|
||||
#define __HAVE_ARCH_PUD_FREE
|
||||
#include <asm-generic/pgalloc.h>
|
||||
|
||||
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
|
||||
|
@ -43,7 +44,8 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
|
|||
|
||||
static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
|
||||
{
|
||||
set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
|
||||
if (pgtable_l4_enabled())
|
||||
set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
|
||||
}
|
||||
|
||||
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
|
||||
|
@ -53,6 +55,13 @@ static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
|
|||
p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN;
|
||||
__p4d_populate(p4dp, __pa(pudp), p4dval);
|
||||
}
|
||||
|
||||
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
|
||||
{
|
||||
if (!pgtable_l4_enabled())
|
||||
return;
|
||||
__pud_free(mm, pud);
|
||||
}
|
||||
#else
|
||||
static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
|
||||
{
|
||||
|
@ -60,6 +69,47 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
|
|||
}
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
|
||||
static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot)
|
||||
{
|
||||
if (pgtable_l5_enabled())
|
||||
set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot));
|
||||
}
|
||||
|
||||
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp)
|
||||
{
|
||||
pgdval_t pgdval = PGD_TYPE_TABLE;
|
||||
|
||||
pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN;
|
||||
__pgd_populate(pgdp, __pa(p4dp), pgdval);
|
||||
}
|
||||
|
||||
static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
gfp_t gfp = GFP_PGTABLE_USER;
|
||||
|
||||
if (mm == &init_mm)
|
||||
gfp = GFP_PGTABLE_KERNEL;
|
||||
return (p4d_t *)get_zeroed_page(gfp);
|
||||
}
|
||||
|
||||
static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
|
||||
{
|
||||
if (!pgtable_l5_enabled())
|
||||
return;
|
||||
BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
|
||||
free_page((unsigned long)p4d);
|
||||
}
|
||||
|
||||
#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
|
||||
#else
|
||||
static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot)
|
||||
{
|
||||
BUILD_BUG();
|
||||
}
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
|
||||
|
||||
extern pgd_t *pgd_alloc(struct mm_struct *mm);
|
||||
extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
|
||||
|
||||
|
|
|
@ -26,10 +26,10 @@
|
|||
#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
|
||||
|
||||
/*
|
||||
* Size mapped by an entry at level n ( 0 <= n <= 3)
|
||||
* Size mapped by an entry at level n ( -1 <= n <= 3)
|
||||
* We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
|
||||
* in the final page. The maximum number of translation levels supported by
|
||||
* the architecture is 4. Hence, starting at level n, we have further
|
||||
* the architecture is 5. Hence, starting at level n, we have further
|
||||
* ((4 - n) - 1) levels of translation excluding the offset within the page.
|
||||
* So, the total number of bits mapped by an entry at level n is :
|
||||
*
|
||||
|
@ -62,9 +62,16 @@
|
|||
#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3))
|
||||
#endif
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
#define P4D_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(0)
|
||||
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
|
||||
#define P4D_MASK (~(P4D_SIZE-1))
|
||||
#define PTRS_PER_P4D (1 << (PAGE_SHIFT - 3))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* PGDIR_SHIFT determines the size a top-level page table entry can map
|
||||
* (depending on the configuration, this level can be 0, 1 or 2).
|
||||
* (depending on the configuration, this level can be -1, 0, 1 or 2).
|
||||
*/
|
||||
#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
|
||||
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
|
||||
|
@ -87,6 +94,15 @@
|
|||
/*
|
||||
* Hardware page table definitions.
|
||||
*
|
||||
* Level -1 descriptor (PGD).
|
||||
*/
|
||||
#define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0)
|
||||
#define PGD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
|
||||
#define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0)
|
||||
#define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59)
|
||||
#define PGD_TABLE_UXN (_AT(pgdval_t, 1) << 60)
|
||||
|
||||
/*
|
||||
* Level 0 descriptor (P4D).
|
||||
*/
|
||||
#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0)
|
||||
|
@ -155,13 +171,17 @@
|
|||
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
|
||||
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
|
||||
|
||||
#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
|
||||
#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
|
||||
#ifdef CONFIG_ARM64_PA_BITS_52
|
||||
#ifdef CONFIG_ARM64_64K_PAGES
|
||||
#define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12)
|
||||
#define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
|
||||
#define PTE_ADDR_HIGH_SHIFT 36
|
||||
#define PHYS_TO_PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
|
||||
#else
|
||||
#define PTE_ADDR_MASK PTE_ADDR_LOW
|
||||
#define PTE_ADDR_HIGH (_AT(pteval_t, 0x3) << 8)
|
||||
#define PTE_ADDR_HIGH_SHIFT 42
|
||||
#define PHYS_TO_PTE_ADDR_MASK GENMASK_ULL(49, 8)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -284,6 +304,7 @@
|
|||
#define TCR_E0PD1 (UL(1) << 56)
|
||||
#define TCR_TCMA0 (UL(1) << 57)
|
||||
#define TCR_TCMA1 (UL(1) << 58)
|
||||
#define TCR_DS (UL(1) << 59)
|
||||
|
||||
/*
|
||||
* TTBR.
|
||||
|
|
|
@ -30,8 +30,8 @@
|
|||
#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
|
||||
#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
|
||||
|
||||
#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG)
|
||||
#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
|
||||
#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_MAYBE_NG | PTE_MAYBE_SHARED | PTE_AF)
|
||||
#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_MAYBE_NG | PMD_MAYBE_SHARED | PMD_SECT_AF)
|
||||
|
||||
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
|
||||
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
|
||||
|
@ -57,10 +57,6 @@
|
|||
#define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
|
||||
#define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
#define PTE_MAYBE_NG 0
|
||||
#endif
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
|
@ -71,7 +67,19 @@ extern bool arm64_use_ng_mappings;
|
|||
#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
|
||||
#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
|
||||
|
||||
#ifndef CONFIG_ARM64_LPA2
|
||||
#define lpa2_is_enabled() false
|
||||
#define PTE_MAYBE_SHARED PTE_SHARED
|
||||
#define PMD_MAYBE_SHARED PMD_SECT_S
|
||||
#else
|
||||
static inline bool __pure lpa2_is_enabled(void)
|
||||
{
|
||||
return read_tcr() & TCR_DS;
|
||||
}
|
||||
|
||||
#define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED)
|
||||
#define PMD_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PMD_SECT_S)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If we have userspace only BTI we don't want to mark kernel pages
|
||||
|
|
|
@ -36,6 +36,12 @@ typedef struct { pudval_t pud; } pud_t;
|
|||
#define __pud(x) ((pud_t) { (x) } )
|
||||
#endif
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
typedef struct { p4dval_t p4d; } p4d_t;
|
||||
#define p4d_val(x) ((x).p4d)
|
||||
#define __p4d(x) ((p4d_t) { (x) } )
|
||||
#endif
|
||||
|
||||
typedef struct { pgdval_t pgd; } pgd_t;
|
||||
#define pgd_val(x) ((x).pgd)
|
||||
#define __pgd(x) ((pgd_t) { (x) } )
|
||||
|
|
|
@ -80,15 +80,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
|
|||
#ifdef CONFIG_ARM64_PA_BITS_52
|
||||
static inline phys_addr_t __pte_to_phys(pte_t pte)
|
||||
{
|
||||
pte_val(pte) &= ~PTE_MAYBE_SHARED;
|
||||
return (pte_val(pte) & PTE_ADDR_LOW) |
|
||||
((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT);
|
||||
}
|
||||
static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
|
||||
{
|
||||
return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PTE_ADDR_MASK;
|
||||
return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK;
|
||||
}
|
||||
#else
|
||||
#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK)
|
||||
#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_LOW)
|
||||
#define __phys_to_pte_val(phys) (phys)
|
||||
#endif
|
||||
|
||||
|
@ -620,12 +621,12 @@ static inline bool pud_table(pud_t pud) { return true; }
|
|||
PUD_TYPE_TABLE)
|
||||
#endif
|
||||
|
||||
extern pgd_t init_pg_dir[PTRS_PER_PGD];
|
||||
extern pgd_t init_pg_dir[];
|
||||
extern pgd_t init_pg_end[];
|
||||
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
|
||||
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
|
||||
extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
|
||||
extern pgd_t reserved_pg_dir[PTRS_PER_PGD];
|
||||
extern pgd_t swapper_pg_dir[];
|
||||
extern pgd_t idmap_pg_dir[];
|
||||
extern pgd_t tramp_pg_dir[];
|
||||
extern pgd_t reserved_pg_dir[];
|
||||
|
||||
extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
|
||||
|
||||
|
@ -698,14 +699,14 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
|
|||
#define pud_user(pud) pte_user(pud_pte(pud))
|
||||
#define pud_user_exec(pud) pte_user_exec(pud_pte(pud))
|
||||
|
||||
static inline bool pgtable_l4_enabled(void);
|
||||
|
||||
static inline void set_pud(pud_t *pudp, pud_t pud)
|
||||
{
|
||||
#ifdef __PAGETABLE_PUD_FOLDED
|
||||
if (in_swapper_pgdir(pudp)) {
|
||||
if (!pgtable_l4_enabled() && in_swapper_pgdir(pudp)) {
|
||||
set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));
|
||||
return;
|
||||
}
|
||||
#endif /* __PAGETABLE_PUD_FOLDED */
|
||||
|
||||
WRITE_ONCE(*pudp, pud);
|
||||
|
||||
|
@ -758,12 +759,27 @@ static inline pmd_t *pud_pgtable(pud_t pud)
|
|||
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
|
||||
static __always_inline bool pgtable_l4_enabled(void)
|
||||
{
|
||||
if (CONFIG_PGTABLE_LEVELS > 4 || !IS_ENABLED(CONFIG_ARM64_LPA2))
|
||||
return true;
|
||||
if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
|
||||
return vabits_actual == VA_BITS;
|
||||
return alternative_has_cap_unlikely(ARM64_HAS_VA52);
|
||||
}
|
||||
|
||||
static inline bool mm_pud_folded(const struct mm_struct *mm)
|
||||
{
|
||||
return !pgtable_l4_enabled();
|
||||
}
|
||||
#define mm_pud_folded mm_pud_folded
|
||||
|
||||
#define pud_ERROR(e) \
|
||||
pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e))
|
||||
|
||||
#define p4d_none(p4d) (!p4d_val(p4d))
|
||||
#define p4d_bad(p4d) (!(p4d_val(p4d) & 2))
|
||||
#define p4d_present(p4d) (p4d_val(p4d))
|
||||
#define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d))
|
||||
#define p4d_bad(p4d) (pgtable_l4_enabled() && !(p4d_val(p4d) & 2))
|
||||
#define p4d_present(p4d) (!p4d_none(p4d))
|
||||
|
||||
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
|
||||
{
|
||||
|
@ -779,7 +795,8 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
|
|||
|
||||
static inline void p4d_clear(p4d_t *p4dp)
|
||||
{
|
||||
set_p4d(p4dp, __p4d(0));
|
||||
if (pgtable_l4_enabled())
|
||||
set_p4d(p4dp, __p4d(0));
|
||||
}
|
||||
|
||||
static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
|
||||
|
@ -787,27 +804,75 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
|
|||
return __p4d_to_phys(p4d);
|
||||
}
|
||||
|
||||
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
|
||||
|
||||
static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr)
|
||||
{
|
||||
return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr);
|
||||
}
|
||||
|
||||
static inline pud_t *p4d_pgtable(p4d_t p4d)
|
||||
{
|
||||
return (pud_t *)__va(p4d_page_paddr(p4d));
|
||||
}
|
||||
|
||||
/* Find an entry in the first-level page table. */
|
||||
#define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
|
||||
static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr)
|
||||
{
|
||||
BUG_ON(!pgtable_l4_enabled());
|
||||
|
||||
#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
|
||||
#define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr))
|
||||
#define pud_clear_fixmap() clear_fixmap(FIX_PUD)
|
||||
return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t);
|
||||
}
|
||||
|
||||
static inline
|
||||
pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long addr)
|
||||
{
|
||||
if (!pgtable_l4_enabled())
|
||||
return p4d_to_folded_pud(p4dp, addr);
|
||||
return (pud_t *)__va(p4d_page_paddr(p4d)) + pud_index(addr);
|
||||
}
|
||||
#define pud_offset_lockless pud_offset_lockless
|
||||
|
||||
static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long addr)
|
||||
{
|
||||
return pud_offset_lockless(p4dp, READ_ONCE(*p4dp), addr);
|
||||
}
|
||||
#define pud_offset pud_offset
|
||||
|
||||
static inline pud_t *pud_set_fixmap(unsigned long addr)
|
||||
{
|
||||
if (!pgtable_l4_enabled())
|
||||
return NULL;
|
||||
return (pud_t *)set_fixmap_offset(FIX_PUD, addr);
|
||||
}
|
||||
|
||||
static inline pud_t *pud_set_fixmap_offset(p4d_t *p4dp, unsigned long addr)
|
||||
{
|
||||
if (!pgtable_l4_enabled())
|
||||
return p4d_to_folded_pud(p4dp, addr);
|
||||
return pud_set_fixmap(pud_offset_phys(p4dp, addr));
|
||||
}
|
||||
|
||||
static inline void pud_clear_fixmap(void)
|
||||
{
|
||||
if (pgtable_l4_enabled())
|
||||
clear_fixmap(FIX_PUD);
|
||||
}
|
||||
|
||||
/* use ONLY for statically allocated translation tables */
|
||||
static inline pud_t *pud_offset_kimg(p4d_t *p4dp, u64 addr)
|
||||
{
|
||||
if (!pgtable_l4_enabled())
|
||||
return p4d_to_folded_pud(p4dp, addr);
|
||||
return (pud_t *)__phys_to_kimg(pud_offset_phys(p4dp, addr));
|
||||
}
|
||||
|
||||
#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))
|
||||
|
||||
/* use ONLY for statically allocated translation tables */
|
||||
#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
|
||||
|
||||
#else
|
||||
|
||||
static inline bool pgtable_l4_enabled(void) { return false; }
|
||||
|
||||
#define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;})
|
||||
#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;})
|
||||
|
||||
/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
|
||||
#define pud_set_fixmap(addr) NULL
|
||||
|
@ -818,6 +883,122 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)
|
|||
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
|
||||
static __always_inline bool pgtable_l5_enabled(void)
|
||||
{
|
||||
if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
|
||||
return vabits_actual == VA_BITS;
|
||||
return alternative_has_cap_unlikely(ARM64_HAS_VA52);
|
||||
}
|
||||
|
||||
static inline bool mm_p4d_folded(const struct mm_struct *mm)
|
||||
{
|
||||
return !pgtable_l5_enabled();
|
||||
}
|
||||
#define mm_p4d_folded mm_p4d_folded
|
||||
|
||||
#define p4d_ERROR(e) \
|
||||
pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e))
|
||||
|
||||
#define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd))
|
||||
#define pgd_bad(pgd) (pgtable_l5_enabled() && !(pgd_val(pgd) & 2))
|
||||
#define pgd_present(pgd) (!pgd_none(pgd))
|
||||
|
||||
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
if (in_swapper_pgdir(pgdp)) {
|
||||
set_swapper_pgd(pgdp, __pgd(pgd_val(pgd)));
|
||||
return;
|
||||
}
|
||||
|
||||
WRITE_ONCE(*pgdp, pgd);
|
||||
dsb(ishst);
|
||||
isb();
|
||||
}
|
||||
|
||||
static inline void pgd_clear(pgd_t *pgdp)
|
||||
{
|
||||
if (pgtable_l5_enabled())
|
||||
set_pgd(pgdp, __pgd(0));
|
||||
}
|
||||
|
||||
static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
|
||||
{
|
||||
return __pgd_to_phys(pgd);
|
||||
}
|
||||
|
||||
#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
|
||||
|
||||
static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr)
|
||||
{
|
||||
return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr);
|
||||
}
|
||||
|
||||
static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr)
|
||||
{
|
||||
BUG_ON(!pgtable_l5_enabled());
|
||||
|
||||
return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t);
|
||||
}
|
||||
|
||||
static inline
|
||||
p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long addr)
|
||||
{
|
||||
if (!pgtable_l5_enabled())
|
||||
return pgd_to_folded_p4d(pgdp, addr);
|
||||
return (p4d_t *)__va(pgd_page_paddr(pgd)) + p4d_index(addr);
|
||||
}
|
||||
#define p4d_offset_lockless p4d_offset_lockless
|
||||
|
||||
static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr)
|
||||
{
|
||||
return p4d_offset_lockless(pgdp, READ_ONCE(*pgdp), addr);
|
||||
}
|
||||
|
||||
static inline p4d_t *p4d_set_fixmap(unsigned long addr)
|
||||
{
|
||||
if (!pgtable_l5_enabled())
|
||||
return NULL;
|
||||
return (p4d_t *)set_fixmap_offset(FIX_P4D, addr);
|
||||
}
|
||||
|
||||
static inline p4d_t *p4d_set_fixmap_offset(pgd_t *pgdp, unsigned long addr)
|
||||
{
|
||||
if (!pgtable_l5_enabled())
|
||||
return pgd_to_folded_p4d(pgdp, addr);
|
||||
return p4d_set_fixmap(p4d_offset_phys(pgdp, addr));
|
||||
}
|
||||
|
||||
static inline void p4d_clear_fixmap(void)
|
||||
{
|
||||
if (pgtable_l5_enabled())
|
||||
clear_fixmap(FIX_P4D);
|
||||
}
|
||||
|
||||
/* use ONLY for statically allocated translation tables */
|
||||
static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr)
|
||||
{
|
||||
if (!pgtable_l5_enabled())
|
||||
return pgd_to_folded_p4d(pgdp, addr);
|
||||
return (p4d_t *)__phys_to_kimg(p4d_offset_phys(pgdp, addr));
|
||||
}
|
||||
|
||||
#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
|
||||
|
||||
#else
|
||||
|
||||
static inline bool pgtable_l5_enabled(void) { return false; }
|
||||
|
||||
/* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */
|
||||
#define p4d_set_fixmap(addr) NULL
|
||||
#define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp)
|
||||
#define p4d_clear_fixmap()
|
||||
|
||||
#define p4d_offset_kimg(dir,addr) ((p4d_t *)dir)
|
||||
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
|
||||
|
||||
#define pgd_ERROR(e) \
|
||||
pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e))
|
||||
|
||||
|
|
|
@ -33,37 +33,11 @@
|
|||
#include <asm/cpufeature.h>
|
||||
|
||||
#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
|
||||
static inline bool should_patch_pac_into_scs(void)
|
||||
{
|
||||
u64 reg;
|
||||
|
||||
/*
|
||||
* We only enable the shadow call stack dynamically if we are running
|
||||
* on a system that does not implement PAC or BTI. PAC and SCS provide
|
||||
* roughly the same level of protection, and BTI relies on the PACIASP
|
||||
* instructions serving as landing pads, preventing us from patching
|
||||
* those instructions into something else.
|
||||
*/
|
||||
reg = read_sysreg_s(SYS_ID_AA64ISAR1_EL1);
|
||||
if (SYS_FIELD_GET(ID_AA64ISAR1_EL1, APA, reg) |
|
||||
SYS_FIELD_GET(ID_AA64ISAR1_EL1, API, reg))
|
||||
return false;
|
||||
|
||||
reg = read_sysreg_s(SYS_ID_AA64ISAR2_EL1);
|
||||
if (SYS_FIELD_GET(ID_AA64ISAR2_EL1, APA3, reg))
|
||||
return false;
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) {
|
||||
reg = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
|
||||
if (reg & (0xf << ID_AA64PFR1_EL1_BT_SHIFT))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void dynamic_scs_init(void)
|
||||
{
|
||||
if (should_patch_pac_into_scs()) {
|
||||
extern bool __pi_dynamic_scs_is_enabled;
|
||||
|
||||
if (__pi_dynamic_scs_is_enabled) {
|
||||
pr_info("Enabling dynamic shadow call stack\n");
|
||||
static_branch_enable(&dynamic_scs_enabled);
|
||||
}
|
||||
|
@ -72,8 +46,8 @@ static inline void dynamic_scs_init(void)
|
|||
static inline void dynamic_scs_init(void) {}
|
||||
#endif
|
||||
|
||||
int scs_patch(const u8 eh_frame[], int size);
|
||||
asmlinkage void scs_patch_vmlinux(void);
|
||||
int __pi_scs_patch(const u8 eh_frame[], int size);
|
||||
asmlinkage void __pi_scs_patch_vmlinux(void);
|
||||
|
||||
#endif /* __ASSEMBLY __ */
|
||||
|
||||
|
|
|
@ -7,9 +7,6 @@
|
|||
|
||||
#include <uapi/asm/setup.h>
|
||||
|
||||
void *get_early_fdt_ptr(void);
|
||||
void early_fdt_map(u64 dt_phys);
|
||||
|
||||
/*
|
||||
* These two variables are used in the head.S file.
|
||||
*/
|
||||
|
|
|
@ -103,6 +103,9 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
|
|||
{
|
||||
struct ptdesc *ptdesc = virt_to_ptdesc(pudp);
|
||||
|
||||
if (!pgtable_l4_enabled())
|
||||
return;
|
||||
|
||||
pagetable_pud_dtor(ptdesc);
|
||||
tlb_remove_ptdesc(tlb, ptdesc);
|
||||
}
|
||||
|
|
|
@ -33,8 +33,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
|
|||
return_address.o cpuinfo.o cpu_errata.o \
|
||||
cpufeature.o alternative.o cacheinfo.o \
|
||||
smp.o smp_spin_table.o topology.o smccc-call.o \
|
||||
syscall.o proton-pack.o idreg-override.o idle.o \
|
||||
patching.o
|
||||
syscall.o proton-pack.o idle.o patching.o pi/
|
||||
|
||||
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
|
||||
sys_compat.o
|
||||
|
@ -57,7 +56,7 @@ obj-$(CONFIG_ACPI) += acpi.o
|
|||
obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
|
||||
obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
|
||||
obj-$(CONFIG_PARAVIRT) += paravirt.o
|
||||
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/
|
||||
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
|
||||
obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
|
||||
obj-$(CONFIG_ELF_CORE) += elfcore.o
|
||||
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \
|
||||
|
@ -72,14 +71,6 @@ obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
|
|||
obj-$(CONFIG_ARM64_MTE) += mte.o
|
||||
obj-y += vdso-wrap.o
|
||||
obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
|
||||
obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o
|
||||
|
||||
# We need to prevent the SCS patching code from patching itself. Using
|
||||
# -mbranch-protection=none here to avoid the patchable PAC opcodes from being
|
||||
# generated triggers an issue with full LTO on Clang, which stops emitting PAC
|
||||
# instructions altogether. So disable LTO as well for the compilation unit.
|
||||
CFLAGS_patch-scs.o += -mbranch-protection=none
|
||||
CFLAGS_REMOVE_patch-scs.o += $(CC_FLAGS_LTO)
|
||||
|
||||
# Force dependency (vdso*-wrap.S includes vdso.so through incbin)
|
||||
$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
|
||||
|
|
|
@ -688,13 +688,15 @@ static const struct arm64_ftr_bits ftr_raz[] = {
|
|||
#define ARM64_FTR_REG(id, table) \
|
||||
__ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
|
||||
|
||||
struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
|
||||
struct arm64_ftr_override __ro_after_init id_aa64pfr0_override;
|
||||
struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
|
||||
struct arm64_ftr_override __ro_after_init id_aa64zfr0_override;
|
||||
struct arm64_ftr_override __ro_after_init id_aa64smfr0_override;
|
||||
struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
|
||||
struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
|
||||
struct arm64_ftr_override id_aa64mmfr0_override;
|
||||
struct arm64_ftr_override id_aa64mmfr1_override;
|
||||
struct arm64_ftr_override id_aa64mmfr2_override;
|
||||
struct arm64_ftr_override id_aa64pfr0_override;
|
||||
struct arm64_ftr_override id_aa64pfr1_override;
|
||||
struct arm64_ftr_override id_aa64zfr0_override;
|
||||
struct arm64_ftr_override id_aa64smfr0_override;
|
||||
struct arm64_ftr_override id_aa64isar1_override;
|
||||
struct arm64_ftr_override id_aa64isar2_override;
|
||||
|
||||
struct arm64_ftr_override arm64_sw_feature_override;
|
||||
|
||||
|
@ -755,10 +757,12 @@ static const struct __ftr_reg_entry {
|
|||
ARM64_FTR_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3),
|
||||
|
||||
/* Op1 = 0, CRn = 0, CRm = 7 */
|
||||
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
|
||||
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0,
|
||||
&id_aa64mmfr0_override),
|
||||
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1,
|
||||
&id_aa64mmfr1_override),
|
||||
ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
|
||||
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2,
|
||||
&id_aa64mmfr2_override),
|
||||
ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
|
||||
|
||||
/* Op1 = 1, CRn = 0, CRm = 0 */
|
||||
|
@ -1669,46 +1673,6 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
|
|||
return has_cpuid_feature(entry, scope);
|
||||
}
|
||||
|
||||
/*
|
||||
* This check is triggered during the early boot before the cpufeature
|
||||
* is initialised. Checking the status on the local CPU allows the boot
|
||||
* CPU to detect the need for non-global mappings and thus avoiding a
|
||||
* pagetable re-write after all the CPUs are booted. This check will be
|
||||
* anyway run on individual CPUs, allowing us to get the consistent
|
||||
* state once the SMP CPUs are up and thus make the switch to non-global
|
||||
* mappings if required.
|
||||
*/
|
||||
bool kaslr_requires_kpti(void)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* E0PD does a similar job to KPTI so can be used instead
|
||||
* where available.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
|
||||
u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
|
||||
if (cpuid_feature_extract_unsigned_field(mmfr2,
|
||||
ID_AA64MMFR2_EL1_E0PD_SHIFT))
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Systems affected by Cavium erratum 24756 are incompatible
|
||||
* with KPTI.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
|
||||
extern const struct midr_range cavium_erratum_27456_cpus[];
|
||||
|
||||
if (is_midr_in_range_list(read_cpuid_id(),
|
||||
cavium_erratum_27456_cpus))
|
||||
return false;
|
||||
}
|
||||
|
||||
return kaslr_enabled();
|
||||
}
|
||||
|
||||
static bool __meltdown_safe = true;
|
||||
static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
|
||||
|
||||
|
@ -1761,7 +1725,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
|
|||
}
|
||||
|
||||
/* Useful for KASLR robustness */
|
||||
if (kaslr_requires_kpti()) {
|
||||
if (kaslr_enabled() && kaslr_requires_kpti()) {
|
||||
if (!__kpti_forced) {
|
||||
str = "KASLR";
|
||||
__kpti_forced = 1;
|
||||
|
@ -1850,6 +1814,11 @@ static int __init __kpti_install_ng_mappings(void *__unused)
|
|||
pgd_t *kpti_ng_temp_pgd;
|
||||
u64 alloc = 0;
|
||||
|
||||
if (levels == 5 && !pgtable_l5_enabled())
|
||||
levels = 4;
|
||||
else if (levels == 4 && !pgtable_l4_enabled())
|
||||
levels = 3;
|
||||
|
||||
remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
|
||||
|
||||
if (!cpu) {
|
||||
|
@ -1863,9 +1832,9 @@ static int __init __kpti_install_ng_mappings(void *__unused)
|
|||
//
|
||||
// The physical pages are laid out as follows:
|
||||
//
|
||||
// +--------+-/-------+-/------ +-\\--------+
|
||||
// : PTE[] : | PMD[] : | PUD[] : || PGD[] :
|
||||
// +--------+-\-------+-\------ +-//--------+
|
||||
// +--------+-/-------+-/------ +-/------ +-\\\--------+
|
||||
// : PTE[] : | PMD[] : | PUD[] : | P4D[] : ||| PGD[] :
|
||||
// +--------+-\-------+-\------ +-\------ +-///--------+
|
||||
// ^
|
||||
// The first page is mapped into this hierarchy at a PMD_SHIFT
|
||||
// aligned virtual address, so that we can manipulate the PTE
|
||||
|
@ -2091,14 +2060,7 @@ static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
|
|||
static bool hvhe_possible(const struct arm64_cpu_capabilities *entry,
|
||||
int __unused)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
val = read_sysreg(id_aa64mmfr1_el1);
|
||||
if (!cpuid_feature_extract_unsigned_field(val, ID_AA64MMFR1_EL1_VH_SHIFT))
|
||||
return false;
|
||||
|
||||
val = arm64_sw_feature_override.val & arm64_sw_feature_override.mask;
|
||||
return cpuid_feature_extract_unsigned_field(val, ARM64_SW_FEATURE_OVERRIDE_HVHE);
|
||||
return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARM64_PAN
|
||||
|
@ -2796,6 +2758,24 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
|||
.cpu_enable = cpu_enable_fpmr,
|
||||
ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, FPMR, IMP)
|
||||
},
|
||||
#ifdef CONFIG_ARM64_VA_BITS_52
|
||||
{
|
||||
.capability = ARM64_HAS_VA52,
|
||||
.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
|
||||
.matches = has_cpuid_feature,
|
||||
#ifdef CONFIG_ARM64_64K_PAGES
|
||||
.desc = "52-bit Virtual Addressing (LVA)",
|
||||
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, VARange, 52)
|
||||
#else
|
||||
.desc = "52-bit Virtual Addressing (LPA2)",
|
||||
#ifdef CONFIG_ARM64_4K_PAGES
|
||||
ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, TGRAN4, 52_BIT)
|
||||
#else
|
||||
ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, TGRAN16, 52_BIT)
|
||||
#endif
|
||||
#endif
|
||||
},
|
||||
#endif
|
||||
{},
|
||||
};
|
||||
|
||||
|
|
|
@ -80,28 +80,42 @@
|
|||
* x19 primary_entry() .. start_kernel() whether we entered with the MMU on
|
||||
* x20 primary_entry() .. __primary_switch() CPU boot mode
|
||||
* x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
|
||||
* x22 create_idmap() .. start_kernel() ID map VA of the DT blob
|
||||
* x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
|
||||
* x24 __primary_switch() linear map KASLR seed
|
||||
* x25 primary_entry() .. start_kernel() supported VA size
|
||||
* x28 create_idmap() callee preserved temp register
|
||||
*/
|
||||
SYM_CODE_START(primary_entry)
|
||||
bl record_mmu_state
|
||||
bl preserve_boot_args
|
||||
bl create_idmap
|
||||
|
||||
adrp x1, early_init_stack
|
||||
mov sp, x1
|
||||
mov x29, xzr
|
||||
adrp x0, init_idmap_pg_dir
|
||||
mov x1, xzr
|
||||
bl __pi_create_init_idmap
|
||||
|
||||
/*
|
||||
* If the page tables have been populated with non-cacheable
|
||||
* accesses (MMU disabled), invalidate those tables again to
|
||||
* remove any speculatively loaded cache lines.
|
||||
*/
|
||||
cbnz x19, 0f
|
||||
dmb sy
|
||||
mov x1, x0 // end of used region
|
||||
adrp x0, init_idmap_pg_dir
|
||||
adr_l x2, dcache_inval_poc
|
||||
blr x2
|
||||
b 1f
|
||||
|
||||
/*
|
||||
* If we entered with the MMU and caches on, clean the ID mapped part
|
||||
* of the primary boot code to the PoC so we can safely execute it with
|
||||
* the MMU off.
|
||||
*/
|
||||
cbz x19, 0f
|
||||
adrp x0, __idmap_text_start
|
||||
0: adrp x0, __idmap_text_start
|
||||
adr_l x1, __idmap_text_end
|
||||
adr_l x2, dcache_clean_poc
|
||||
blr x2
|
||||
0: mov x0, x19
|
||||
|
||||
1: mov x0, x19
|
||||
bl init_kernel_el // w0=cpu_boot_mode
|
||||
mov x20, x0
|
||||
|
||||
|
@ -111,14 +125,6 @@ SYM_CODE_START(primary_entry)
|
|||
* On return, the CPU will be ready for the MMU to be turned on and
|
||||
* the TCR will have been set.
|
||||
*/
|
||||
#if VA_BITS > 48
|
||||
mrs_s x0, SYS_ID_AA64MMFR2_EL1
|
||||
tst x0, ID_AA64MMFR2_EL1_VARange_MASK
|
||||
mov x0, #VA_BITS
|
||||
mov x25, #VA_BITS_MIN
|
||||
csel x25, x25, x0, eq
|
||||
mov x0, x25
|
||||
#endif
|
||||
bl __cpu_setup // initialise processor
|
||||
b __primary_switch
|
||||
SYM_CODE_END(primary_entry)
|
||||
|
@ -177,267 +183,6 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
|
|||
ret
|
||||
SYM_CODE_END(preserve_boot_args)
|
||||
|
||||
SYM_FUNC_START_LOCAL(clear_page_tables)
|
||||
/*
|
||||
* Clear the init page tables.
|
||||
*/
|
||||
adrp x0, init_pg_dir
|
||||
adrp x1, init_pg_end
|
||||
sub x2, x1, x0
|
||||
mov x1, xzr
|
||||
b __pi_memset // tail call
|
||||
SYM_FUNC_END(clear_page_tables)
|
||||
|
||||
/*
|
||||
* Macro to populate page table entries, these entries can be pointers to the next level
|
||||
* or last level entries pointing to physical memory.
|
||||
*
|
||||
* tbl: page table address
|
||||
* rtbl: pointer to page table or physical memory
|
||||
* index: start index to write
|
||||
* eindex: end index to write - [index, eindex] written to
|
||||
* flags: flags for pagetable entry to or in
|
||||
* inc: increment to rtbl between each entry
|
||||
* tmp1: temporary variable
|
||||
*
|
||||
* Preserves: tbl, eindex, flags, inc
|
||||
* Corrupts: index, tmp1
|
||||
* Returns: rtbl
|
||||
*/
|
||||
.macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1
|
||||
.Lpe\@: phys_to_pte \tmp1, \rtbl
|
||||
orr \tmp1, \tmp1, \flags // tmp1 = table entry
|
||||
str \tmp1, [\tbl, \index, lsl #3]
|
||||
add \rtbl, \rtbl, \inc // rtbl = pa next level
|
||||
add \index, \index, #1
|
||||
cmp \index, \eindex
|
||||
b.ls .Lpe\@
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Compute indices of table entries from virtual address range. If multiple entries
|
||||
* were needed in the previous page table level then the next page table level is assumed
|
||||
* to be composed of multiple pages. (This effectively scales the end index).
|
||||
*
|
||||
* vstart: virtual address of start of range
|
||||
* vend: virtual address of end of range - we map [vstart, vend]
|
||||
* shift: shift used to transform virtual address into index
|
||||
* order: #imm 2log(number of entries in page table)
|
||||
* istart: index in table corresponding to vstart
|
||||
* iend: index in table corresponding to vend
|
||||
* count: On entry: how many extra entries were required in previous level, scales
|
||||
* our end index.
|
||||
* On exit: returns how many extra entries required for next page table level
|
||||
*
|
||||
* Preserves: vstart, vend
|
||||
* Returns: istart, iend, count
|
||||
*/
|
||||
.macro compute_indices, vstart, vend, shift, order, istart, iend, count
|
||||
ubfx \istart, \vstart, \shift, \order
|
||||
ubfx \iend, \vend, \shift, \order
|
||||
add \iend, \iend, \count, lsl \order
|
||||
sub \count, \iend, \istart
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Map memory for specified virtual address range. Each level of page table needed supports
|
||||
* multiple entries. If a level requires n entries the next page table level is assumed to be
|
||||
* formed from n pages.
|
||||
*
|
||||
* tbl: location of page table
|
||||
* rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE)
|
||||
* vstart: virtual address of start of range
|
||||
* vend: virtual address of end of range - we map [vstart, vend - 1]
|
||||
* flags: flags to use to map last level entries
|
||||
* phys: physical address corresponding to vstart - physical memory is contiguous
|
||||
* order: #imm 2log(number of entries in PGD table)
|
||||
*
|
||||
* If extra_shift is set, an extra level will be populated if the end address does
|
||||
* not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range.
|
||||
*
|
||||
* Temporaries: istart, iend, tmp, count, sv - these need to be different registers
|
||||
* Preserves: vstart, flags
|
||||
* Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv
|
||||
*/
|
||||
.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift
|
||||
sub \vend, \vend, #1
|
||||
add \rtbl, \tbl, #PAGE_SIZE
|
||||
mov \count, #0
|
||||
|
||||
.ifnb \extra_shift
|
||||
tst \vend, #~((1 << (\extra_shift)) - 1)
|
||||
b.eq .L_\@
|
||||
compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count
|
||||
mov \sv, \rtbl
|
||||
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
|
||||
mov \tbl, \sv
|
||||
.endif
|
||||
.L_\@:
|
||||
compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
|
||||
mov \sv, \rtbl
|
||||
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
|
||||
mov \tbl, \sv
|
||||
|
||||
#if SWAPPER_PGTABLE_LEVELS > 3
|
||||
compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
|
||||
mov \sv, \rtbl
|
||||
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
|
||||
mov \tbl, \sv
|
||||
#endif
|
||||
|
||||
#if SWAPPER_PGTABLE_LEVELS > 2
|
||||
compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
|
||||
mov \sv, \rtbl
|
||||
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
|
||||
mov \tbl, \sv
|
||||
#endif
|
||||
|
||||
compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
|
||||
bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
|
||||
populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Remap a subregion created with the map_memory macro with modified attributes
|
||||
* or output address. The entire remapped region must have been covered in the
|
||||
* invocation of map_memory.
|
||||
*
|
||||
* x0: last level table address (returned in first argument to map_memory)
|
||||
* x1: start VA of the existing mapping
|
||||
* x2: start VA of the region to update
|
||||
* x3: end VA of the region to update (exclusive)
|
||||
* x4: start PA associated with the region to update
|
||||
* x5: attributes to set on the updated region
|
||||
* x6: order of the last level mappings
|
||||
*/
|
||||
SYM_FUNC_START_LOCAL(remap_region)
|
||||
sub x3, x3, #1 // make end inclusive
|
||||
|
||||
// Get the index offset for the start of the last level table
|
||||
lsr x1, x1, x6
|
||||
bfi x1, xzr, #0, #PAGE_SHIFT - 3
|
||||
|
||||
// Derive the start and end indexes into the last level table
|
||||
// associated with the provided region
|
||||
lsr x2, x2, x6
|
||||
lsr x3, x3, x6
|
||||
sub x2, x2, x1
|
||||
sub x3, x3, x1
|
||||
|
||||
mov x1, #1
|
||||
lsl x6, x1, x6 // block size at this level
|
||||
|
||||
populate_entries x0, x4, x2, x3, x5, x6, x7
|
||||
ret
|
||||
SYM_FUNC_END(remap_region)
|
||||
|
||||
SYM_FUNC_START_LOCAL(create_idmap)
|
||||
mov x28, lr
|
||||
/*
|
||||
* The ID map carries a 1:1 mapping of the physical address range
|
||||
* covered by the loaded image, which could be anywhere in DRAM. This
|
||||
* means that the required size of the VA (== PA) space is decided at
|
||||
* boot time, and could be more than the configured size of the VA
|
||||
* space for ordinary kernel and user space mappings.
|
||||
*
|
||||
* There are three cases to consider here:
|
||||
* - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover
|
||||
* the placement of the image. In this case, we configure one extra
|
||||
* level of translation on the fly for the ID map only. (This case
|
||||
* also covers 42-bit VA/52-bit PA on 64k pages).
|
||||
*
|
||||
* - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can
|
||||
* only happen when using 64k pages, in which case we need to extend
|
||||
* the root level table rather than add a level. Note that we can
|
||||
* treat this case as 'always extended' as long as we take care not
|
||||
* to program an unsupported T0SZ value into the TCR register.
|
||||
*
|
||||
* - Combinations that would require two additional levels of
|
||||
* translation are not supported, e.g., VA_BITS==36 on 16k pages, or
|
||||
* VA_BITS==39/4k pages with 5-level paging, where the input address
|
||||
* requires more than 47 or 48 bits, respectively.
|
||||
*/
|
||||
#if (VA_BITS < 48)
|
||||
#define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT)
|
||||
#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
|
||||
|
||||
/*
|
||||
* If VA_BITS < 48, we have to configure an additional table level.
|
||||
* First, we have to verify our assumption that the current value of
|
||||
* VA_BITS was chosen such that all translation levels are fully
|
||||
* utilised, and that lowering T0SZ will always result in an additional
|
||||
* translation level to be configured.
|
||||
*/
|
||||
#if VA_BITS != EXTRA_SHIFT
|
||||
#error "Mismatch between VA_BITS and page size/number of translation levels"
|
||||
#endif
|
||||
#else
|
||||
#define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT)
|
||||
#define EXTRA_SHIFT
|
||||
/*
|
||||
* If VA_BITS == 48, we don't have to configure an additional
|
||||
* translation level, but the top-level table has more entries.
|
||||
*/
|
||||
#endif
|
||||
adrp x0, init_idmap_pg_dir
|
||||
adrp x3, _text
|
||||
adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
|
||||
mov_q x7, SWAPPER_RX_MMUFLAGS
|
||||
|
||||
map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT
|
||||
|
||||
/* Remap the kernel page tables r/w in the ID map */
|
||||
adrp x1, _text
|
||||
adrp x2, init_pg_dir
|
||||
adrp x3, init_pg_end
|
||||
bic x4, x2, #SWAPPER_BLOCK_SIZE - 1
|
||||
mov_q x5, SWAPPER_RW_MMUFLAGS
|
||||
mov x6, #SWAPPER_BLOCK_SHIFT
|
||||
bl remap_region
|
||||
|
||||
/* Remap the FDT after the kernel image */
|
||||
adrp x1, _text
|
||||
adrp x22, _end + SWAPPER_BLOCK_SIZE
|
||||
bic x2, x22, #SWAPPER_BLOCK_SIZE - 1
|
||||
bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address
|
||||
add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
|
||||
bic x4, x21, #SWAPPER_BLOCK_SIZE - 1
|
||||
mov_q x5, SWAPPER_RW_MMUFLAGS
|
||||
mov x6, #SWAPPER_BLOCK_SHIFT
|
||||
bl remap_region
|
||||
|
||||
/*
|
||||
* Since the page tables have been populated with non-cacheable
|
||||
* accesses (MMU disabled), invalidate those tables again to
|
||||
* remove any speculatively loaded cache lines.
|
||||
*/
|
||||
cbnz x19, 0f // skip cache invalidation if MMU is on
|
||||
dmb sy
|
||||
|
||||
adrp x0, init_idmap_pg_dir
|
||||
adrp x1, init_idmap_pg_end
|
||||
bl dcache_inval_poc
|
||||
0: ret x28
|
||||
SYM_FUNC_END(create_idmap)
|
||||
|
||||
SYM_FUNC_START_LOCAL(create_kernel_mapping)
|
||||
adrp x0, init_pg_dir
|
||||
mov_q x5, KIMAGE_VADDR // compile time __va(_text)
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
add x5, x5, x23 // add KASLR displacement
|
||||
#endif
|
||||
adrp x6, _end // runtime __pa(_end)
|
||||
adrp x3, _text // runtime __pa(_text)
|
||||
sub x6, x6, x3 // _end - _text
|
||||
add x6, x6, x5 // runtime __va(_end)
|
||||
mov_q x7, SWAPPER_RW_MMUFLAGS
|
||||
|
||||
map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
|
||||
|
||||
dsb ishst // sync with page table walker
|
||||
ret
|
||||
SYM_FUNC_END(create_kernel_mapping)
|
||||
|
||||
/*
|
||||
* Initialize CPU registers with task-specific and cpu-specific context.
|
||||
*
|
||||
|
@ -489,33 +234,8 @@ SYM_FUNC_START_LOCAL(__primary_switched)
|
|||
mov x0, x20
|
||||
bl set_cpu_boot_mode_flag
|
||||
|
||||
// Clear BSS
|
||||
adr_l x0, __bss_start
|
||||
mov x1, xzr
|
||||
adr_l x2, __bss_stop
|
||||
sub x2, x2, x0
|
||||
bl __pi_memset
|
||||
dsb ishst // Make zero page visible to PTW
|
||||
|
||||
#if VA_BITS > 48
|
||||
adr_l x8, vabits_actual // Set this early so KASAN early init
|
||||
str x25, [x8] // ... observes the correct value
|
||||
dc civac, x8 // Make visible to booting secondaries
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_BASE
|
||||
adrp x5, memstart_offset_seed // Save KASLR linear map seed
|
||||
strh w24, [x5, :lo12:memstart_offset_seed]
|
||||
#endif
|
||||
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
|
||||
bl kasan_early_init
|
||||
#endif
|
||||
mov x0, x21 // pass FDT address in x0
|
||||
bl early_fdt_map // Try mapping the FDT early
|
||||
mov x0, x20 // pass the full boot status
|
||||
bl init_feature_override // Parse cpu feature overrides
|
||||
#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
|
||||
bl scs_patch_vmlinux
|
||||
#endif
|
||||
mov x0, x20
|
||||
bl finalise_el2 // Prefer VHE if possible
|
||||
|
@ -643,10 +363,13 @@ SYM_FUNC_START_LOCAL(secondary_startup)
|
|||
* Common entry point for secondary CPUs.
|
||||
*/
|
||||
mov x20, x0 // preserve boot mode
|
||||
|
||||
#ifdef CONFIG_ARM64_VA_BITS_52
|
||||
alternative_if ARM64_HAS_VA52
|
||||
bl __cpu_secondary_check52bitva
|
||||
#if VA_BITS > 48
|
||||
ldr_l x0, vabits_actual
|
||||
alternative_else_nop_endif
|
||||
#endif
|
||||
|
||||
bl __cpu_setup // initialise processor
|
||||
adrp x1, swapper_pg_dir
|
||||
adrp x2, idmap_pg_dir
|
||||
|
@ -749,15 +472,18 @@ SYM_FUNC_START(__enable_mmu)
|
|||
ret
|
||||
SYM_FUNC_END(__enable_mmu)
|
||||
|
||||
#ifdef CONFIG_ARM64_VA_BITS_52
|
||||
SYM_FUNC_START(__cpu_secondary_check52bitva)
|
||||
#if VA_BITS > 48
|
||||
ldr_l x0, vabits_actual
|
||||
cmp x0, #52
|
||||
b.ne 2f
|
||||
|
||||
#ifndef CONFIG_ARM64_LPA2
|
||||
mrs_s x0, SYS_ID_AA64MMFR2_EL1
|
||||
and x0, x0, ID_AA64MMFR2_EL1_VARange_MASK
|
||||
cbnz x0, 2f
|
||||
#else
|
||||
mrs x0, id_aa64mmfr0_el1
|
||||
sbfx x0, x0, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4
|
||||
cmp x0, #ID_AA64MMFR0_EL1_TGRAN_LPA2
|
||||
b.ge 2f
|
||||
#endif
|
||||
|
||||
update_early_cpu_boot_status \
|
||||
CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1
|
||||
|
@ -765,9 +491,9 @@ SYM_FUNC_START(__cpu_secondary_check52bitva)
|
|||
wfi
|
||||
b 1b
|
||||
|
||||
#endif
|
||||
2: ret
|
||||
SYM_FUNC_END(__cpu_secondary_check52bitva)
|
||||
#endif
|
||||
|
||||
SYM_FUNC_START_LOCAL(__no_granule_support)
|
||||
/* Indicate that this CPU can't boot and is stuck in the kernel */
|
||||
|
@ -779,123 +505,18 @@ SYM_FUNC_START_LOCAL(__no_granule_support)
|
|||
b 1b
|
||||
SYM_FUNC_END(__no_granule_support)
|
||||
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
SYM_FUNC_START_LOCAL(__relocate_kernel)
|
||||
/*
|
||||
* Iterate over each entry in the relocation table, and apply the
|
||||
* relocations in place.
|
||||
*/
|
||||
adr_l x9, __rela_start
|
||||
adr_l x10, __rela_end
|
||||
mov_q x11, KIMAGE_VADDR // default virtual offset
|
||||
add x11, x11, x23 // actual virtual offset
|
||||
|
||||
0: cmp x9, x10
|
||||
b.hs 1f
|
||||
ldp x12, x13, [x9], #24
|
||||
ldr x14, [x9, #-8]
|
||||
cmp w13, #R_AARCH64_RELATIVE
|
||||
b.ne 0b
|
||||
add x14, x14, x23 // relocate
|
||||
str x14, [x12, x23]
|
||||
b 0b
|
||||
|
||||
1:
|
||||
#ifdef CONFIG_RELR
|
||||
/*
|
||||
* Apply RELR relocations.
|
||||
*
|
||||
* RELR is a compressed format for storing relative relocations. The
|
||||
* encoded sequence of entries looks like:
|
||||
* [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
|
||||
*
|
||||
* i.e. start with an address, followed by any number of bitmaps. The
|
||||
* address entry encodes 1 relocation. The subsequent bitmap entries
|
||||
* encode up to 63 relocations each, at subsequent offsets following
|
||||
* the last address entry.
|
||||
*
|
||||
* The bitmap entries must have 1 in the least significant bit. The
|
||||
* assumption here is that an address cannot have 1 in lsb. Odd
|
||||
* addresses are not supported. Any odd addresses are stored in the RELA
|
||||
* section, which is handled above.
|
||||
*
|
||||
* Excluding the least significant bit in the bitmap, each non-zero
|
||||
* bit in the bitmap represents a relocation to be applied to
|
||||
* a corresponding machine word that follows the base address
|
||||
* word. The second least significant bit represents the machine
|
||||
* word immediately following the initial address, and each bit
|
||||
* that follows represents the next word, in linear order. As such,
|
||||
* a single bitmap can encode up to 63 relocations in a 64-bit object.
|
||||
*
|
||||
* In this implementation we store the address of the next RELR table
|
||||
* entry in x9, the address being relocated by the current address or
|
||||
* bitmap entry in x13 and the address being relocated by the current
|
||||
* bit in x14.
|
||||
*/
|
||||
adr_l x9, __relr_start
|
||||
adr_l x10, __relr_end
|
||||
|
||||
2: cmp x9, x10
|
||||
b.hs 7f
|
||||
ldr x11, [x9], #8
|
||||
tbnz x11, #0, 3f // branch to handle bitmaps
|
||||
add x13, x11, x23
|
||||
ldr x12, [x13] // relocate address entry
|
||||
add x12, x12, x23
|
||||
str x12, [x13], #8 // adjust to start of bitmap
|
||||
b 2b
|
||||
|
||||
3: mov x14, x13
|
||||
4: lsr x11, x11, #1
|
||||
cbz x11, 6f
|
||||
tbz x11, #0, 5f // skip bit if not set
|
||||
ldr x12, [x14] // relocate bit
|
||||
add x12, x12, x23
|
||||
str x12, [x14]
|
||||
|
||||
5: add x14, x14, #8 // move to next bit's address
|
||||
b 4b
|
||||
|
||||
6: /*
|
||||
* Move to the next bitmap's address. 8 is the word size, and 63 is the
|
||||
* number of significant bits in a bitmap entry.
|
||||
*/
|
||||
add x13, x13, #(8 * 63)
|
||||
b 2b
|
||||
|
||||
7:
|
||||
#endif
|
||||
ret
|
||||
|
||||
SYM_FUNC_END(__relocate_kernel)
|
||||
#endif
|
||||
|
||||
SYM_FUNC_START_LOCAL(__primary_switch)
|
||||
adrp x1, reserved_pg_dir
|
||||
adrp x2, init_idmap_pg_dir
|
||||
bl __enable_mmu
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
adrp x23, KERNEL_START
|
||||
and x23, x23, MIN_KIMG_ALIGN - 1
|
||||
#ifdef CONFIG_RANDOMIZE_BASE
|
||||
mov x0, x22
|
||||
adrp x1, init_pg_end
|
||||
|
||||
adrp x1, early_init_stack
|
||||
mov sp, x1
|
||||
mov x29, xzr
|
||||
bl __pi_kaslr_early_init
|
||||
and x24, x0, #SZ_2M - 1 // capture memstart offset seed
|
||||
bic x0, x0, #SZ_2M - 1
|
||||
orr x23, x23, x0 // record kernel offset
|
||||
#endif
|
||||
#endif
|
||||
bl clear_page_tables
|
||||
bl create_kernel_mapping
|
||||
mov x0, x20 // pass the full boot status
|
||||
mov x1, x21 // pass the FDT
|
||||
bl __pi_early_map_kernel // Map and relocate the kernel
|
||||
|
||||
adrp x1, init_pg_dir
|
||||
load_ttbr1 x1, x1, x2
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
bl __relocate_kernel
|
||||
#endif
|
||||
ldr x8, =__primary_switched
|
||||
adrp x0, KERNEL_START // __pa(KERNEL_START)
|
||||
br x8
|
||||
|
|
|
@ -36,7 +36,40 @@ PROVIDE(__pi___memcpy = __pi_memcpy);
|
|||
PROVIDE(__pi___memmove = __pi_memmove);
|
||||
PROVIDE(__pi___memset = __pi_memset);
|
||||
|
||||
PROVIDE(__pi_vabits_actual = vabits_actual);
|
||||
PROVIDE(__pi_id_aa64isar1_override = id_aa64isar1_override);
|
||||
PROVIDE(__pi_id_aa64isar2_override = id_aa64isar2_override);
|
||||
PROVIDE(__pi_id_aa64mmfr0_override = id_aa64mmfr0_override);
|
||||
PROVIDE(__pi_id_aa64mmfr1_override = id_aa64mmfr1_override);
|
||||
PROVIDE(__pi_id_aa64mmfr2_override = id_aa64mmfr2_override);
|
||||
PROVIDE(__pi_id_aa64pfr0_override = id_aa64pfr0_override);
|
||||
PROVIDE(__pi_id_aa64pfr1_override = id_aa64pfr1_override);
|
||||
PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override);
|
||||
PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override);
|
||||
PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override);
|
||||
PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings);
|
||||
#ifdef CONFIG_CAVIUM_ERRATUM_27456
|
||||
PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus);
|
||||
#endif
|
||||
PROVIDE(__pi__ctype = _ctype);
|
||||
PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed);
|
||||
|
||||
PROVIDE(__pi_init_idmap_pg_dir = init_idmap_pg_dir);
|
||||
PROVIDE(__pi_init_idmap_pg_end = init_idmap_pg_end);
|
||||
PROVIDE(__pi_init_pg_dir = init_pg_dir);
|
||||
PROVIDE(__pi_init_pg_end = init_pg_end);
|
||||
PROVIDE(__pi_swapper_pg_dir = swapper_pg_dir);
|
||||
|
||||
PROVIDE(__pi__text = _text);
|
||||
PROVIDE(__pi__stext = _stext);
|
||||
PROVIDE(__pi__etext = _etext);
|
||||
PROVIDE(__pi___start_rodata = __start_rodata);
|
||||
PROVIDE(__pi___inittext_begin = __inittext_begin);
|
||||
PROVIDE(__pi___inittext_end = __inittext_end);
|
||||
PROVIDE(__pi___initdata_begin = __initdata_begin);
|
||||
PROVIDE(__pi___initdata_end = __initdata_end);
|
||||
PROVIDE(__pi__data = _data);
|
||||
PROVIDE(__pi___bss_start = __bss_start);
|
||||
PROVIDE(__pi__end = _end);
|
||||
|
||||
#ifdef CONFIG_KVM
|
||||
|
||||
|
|
|
@ -16,9 +16,7 @@ bool __ro_after_init __kaslr_is_enabled = false;
|
|||
|
||||
void __init kaslr_init(void)
|
||||
{
|
||||
if (cpuid_feature_extract_unsigned_field(arm64_sw_feature_override.val &
|
||||
arm64_sw_feature_override.mask,
|
||||
ARM64_SW_FEATURE_OVERRIDE_NOKASLR)) {
|
||||
if (kaslr_disabled_cmdline()) {
|
||||
pr_info("KASLR disabled on command line\n");
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -595,7 +595,7 @@ int module_finalize(const Elf_Ehdr *hdr,
|
|||
if (scs_is_dynamic()) {
|
||||
s = find_section(hdr, sechdrs, ".init.eh_frame");
|
||||
if (s)
|
||||
scs_patch((void *)s->sh_addr, s->sh_size);
|
||||
__pi_scs_patch((void *)s->sh_addr, s->sh_size);
|
||||
}
|
||||
|
||||
return module_init_ftrace_plt(hdr, sechdrs, me);
|
||||
|
|
3
arch/arm64/kernel/pi/.gitignore
vendored
Normal file
3
arch/arm64/kernel/pi/.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
relacheck
|
|
@ -11,6 +11,9 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
|
|||
-fno-asynchronous-unwind-tables -fno-unwind-tables \
|
||||
$(call cc-option,-fno-addrsig)
|
||||
|
||||
# this code may run with the MMU off so disable unaligned accesses
|
||||
CFLAGS_map_range.o += -mstrict-align
|
||||
|
||||
# remove SCS flags from all objects in this directory
|
||||
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
|
||||
# disable LTO
|
||||
|
@ -22,14 +25,26 @@ KCSAN_SANITIZE := n
|
|||
UBSAN_SANITIZE := n
|
||||
KCOV_INSTRUMENT := n
|
||||
|
||||
hostprogs := relacheck
|
||||
|
||||
quiet_cmd_piobjcopy = $(quiet_cmd_objcopy)
|
||||
cmd_piobjcopy = $(cmd_objcopy) && $(obj)/relacheck $(@) $(<)
|
||||
|
||||
$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
|
||||
--remove-section=.note.gnu.property \
|
||||
--prefix-alloc-sections=.init
|
||||
$(obj)/%.pi.o: $(obj)/%.o FORCE
|
||||
$(call if_changed,objcopy)
|
||||
--remove-section=.note.gnu.property
|
||||
$(obj)/%.pi.o: $(obj)/%.o $(obj)/relacheck FORCE
|
||||
$(call if_changed,piobjcopy)
|
||||
|
||||
# ensure that all the lib- code ends up as __init code and data
|
||||
$(obj)/lib-%.pi.o: OBJCOPYFLAGS += --prefix-alloc-sections=.init
|
||||
|
||||
$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
|
||||
$(call if_changed_rule,cc_o_c)
|
||||
|
||||
obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
|
||||
extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
|
||||
obj-y := idreg-override.pi.o \
|
||||
map_kernel.pi.o map_range.pi.o \
|
||||
lib-fdt.pi.o lib-fdt_ro.pi.o
|
||||
obj-$(CONFIG_RELOCATABLE) += relocate.pi.o
|
||||
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_early.pi.o
|
||||
obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.pi.o
|
||||
extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#include <asm/cpufeature.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
#include "pi.h"
|
||||
|
||||
#define FTR_DESC_NAME_LEN 20
|
||||
#define FTR_DESC_FIELD_LEN 10
|
||||
#define FTR_ALIAS_NAME_LEN 30
|
||||
|
@ -21,15 +23,6 @@
|
|||
|
||||
static u64 __boot_status __initdata;
|
||||
|
||||
// temporary __prel64 related definitions
|
||||
// to be removed when this code is moved under pi/
|
||||
|
||||
#define __prel64_initconst __initconst
|
||||
|
||||
#define PREL64(type, name) union { type *name; }
|
||||
|
||||
#define prel64_pointer(__d) (__d)
|
||||
|
||||
typedef bool filter_t(u64 val);
|
||||
|
||||
struct ftr_set_desc {
|
||||
|
@ -66,6 +59,35 @@ static const struct ftr_set_desc mmfr1 __prel64_initconst = {
|
|||
},
|
||||
};
|
||||
|
||||
|
||||
static bool __init mmfr2_varange_filter(u64 val)
|
||||
{
|
||||
int __maybe_unused feat;
|
||||
|
||||
if (val)
|
||||
return false;
|
||||
|
||||
#ifdef CONFIG_ARM64_LPA2
|
||||
feat = cpuid_feature_extract_signed_field(read_sysreg(id_aa64mmfr0_el1),
|
||||
ID_AA64MMFR0_EL1_TGRAN_SHIFT);
|
||||
if (feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2) {
|
||||
id_aa64mmfr0_override.val |=
|
||||
(ID_AA64MMFR0_EL1_TGRAN_LPA2 - 1) << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
|
||||
id_aa64mmfr0_override.mask |= 0xfU << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
static const struct ftr_set_desc mmfr2 __prel64_initconst = {
|
||||
.name = "id_aa64mmfr2",
|
||||
.override = &id_aa64mmfr2_override,
|
||||
.fields = {
|
||||
FIELD("varange", ID_AA64MMFR2_EL1_VARange_SHIFT, mmfr2_varange_filter),
|
||||
{}
|
||||
},
|
||||
};
|
||||
|
||||
static bool __init pfr0_sve_filter(u64 val)
|
||||
{
|
||||
/*
|
||||
|
@ -166,6 +188,8 @@ static const struct ftr_set_desc sw_features __prel64_initconst = {
|
|||
.fields = {
|
||||
FIELD("nokaslr", ARM64_SW_FEATURE_OVERRIDE_NOKASLR, NULL),
|
||||
FIELD("hvhe", ARM64_SW_FEATURE_OVERRIDE_HVHE, hvhe_filter),
|
||||
FIELD("rodataoff", ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF, NULL),
|
||||
FIELD("nowxn", ARM64_SW_FEATURE_OVERRIDE_NOWXN, NULL),
|
||||
{}
|
||||
},
|
||||
};
|
||||
|
@ -173,6 +197,7 @@ static const struct ftr_set_desc sw_features __prel64_initconst = {
|
|||
static const
|
||||
PREL64(const struct ftr_set_desc, reg) regs[] __prel64_initconst = {
|
||||
{ &mmfr1 },
|
||||
{ &mmfr2 },
|
||||
{ &pfr0 },
|
||||
{ &pfr1 },
|
||||
{ &isar1 },
|
||||
|
@ -197,6 +222,9 @@ static const struct {
|
|||
{ "arm64.nomops", "id_aa64isar2.mops=0" },
|
||||
{ "arm64.nomte", "id_aa64pfr1.mte=0" },
|
||||
{ "nokaslr", "arm64_sw.nokaslr=1" },
|
||||
{ "rodata=off", "arm64_sw.rodataoff=1 arm64_sw.nowxn=1" },
|
||||
{ "arm64.nolva", "id_aa64mmfr2.varange=0" },
|
||||
{ "arm64.nowxn", "arm64_sw.nowxn=1" },
|
||||
};
|
||||
|
||||
static int __init parse_hexdigit(const char *p, u64 *v)
|
||||
|
@ -313,42 +341,35 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
|
|||
} while (1);
|
||||
}
|
||||
|
||||
static __init const u8 *get_bootargs_cmdline(void)
|
||||
static __init const u8 *get_bootargs_cmdline(const void *fdt, int node)
|
||||
{
|
||||
static char const bootargs[] __initconst = "bootargs";
|
||||
const u8 *prop;
|
||||
void *fdt;
|
||||
int node;
|
||||
|
||||
fdt = get_early_fdt_ptr();
|
||||
if (!fdt)
|
||||
return NULL;
|
||||
|
||||
node = fdt_path_offset(fdt, "/chosen");
|
||||
if (node < 0)
|
||||
return NULL;
|
||||
|
||||
prop = fdt_getprop(fdt, node, "bootargs", NULL);
|
||||
prop = fdt_getprop(fdt, node, bootargs, NULL);
|
||||
if (!prop)
|
||||
return NULL;
|
||||
|
||||
return strlen(prop) ? prop : NULL;
|
||||
}
|
||||
|
||||
static __init void parse_cmdline(void)
|
||||
static __init void parse_cmdline(const void *fdt, int chosen)
|
||||
{
|
||||
const u8 *prop = get_bootargs_cmdline();
|
||||
static char const cmdline[] __initconst = CONFIG_CMDLINE;
|
||||
const u8 *prop = get_bootargs_cmdline(fdt, chosen);
|
||||
|
||||
if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop)
|
||||
__parse_cmdline(CONFIG_CMDLINE, true);
|
||||
__parse_cmdline(cmdline, true);
|
||||
|
||||
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop)
|
||||
__parse_cmdline(prop, true);
|
||||
}
|
||||
|
||||
/* Keep checkers quiet */
|
||||
void init_feature_override(u64 boot_status);
|
||||
|
||||
asmlinkage void __init init_feature_override(u64 boot_status)
|
||||
void __init init_feature_override(u64 boot_status, const void *fdt,
|
||||
int chosen)
|
||||
{
|
||||
struct arm64_ftr_override *override;
|
||||
const struct ftr_set_desc *reg;
|
||||
|
@ -364,7 +385,7 @@ asmlinkage void __init init_feature_override(u64 boot_status)
|
|||
|
||||
__boot_status = boot_status;
|
||||
|
||||
parse_cmdline();
|
||||
parse_cmdline(fdt, chosen);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(regs); i++) {
|
||||
reg = prel64_pointer(regs[i].reg);
|
||||
|
@ -373,3 +394,10 @@ asmlinkage void __init init_feature_override(u64 boot_status)
|
|||
(unsigned long)(override + 1));
|
||||
}
|
||||
}
|
||||
|
||||
char * __init skip_spaces(const char *str)
|
||||
{
|
||||
while (isspace(*str))
|
||||
++str;
|
||||
return (char *)str;
|
||||
}
|
|
@ -16,68 +16,21 @@
|
|||
#include <asm/memory.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
/* taken from lib/string.c */
|
||||
static char *__strstr(const char *s1, const char *s2)
|
||||
#include "pi.h"
|
||||
|
||||
extern u16 memstart_offset_seed;
|
||||
|
||||
static u64 __init get_kaslr_seed(void *fdt, int node)
|
||||
{
|
||||
size_t l1, l2;
|
||||
|
||||
l2 = strlen(s2);
|
||||
if (!l2)
|
||||
return (char *)s1;
|
||||
l1 = strlen(s1);
|
||||
while (l1 >= l2) {
|
||||
l1--;
|
||||
if (!memcmp(s1, s2, l2))
|
||||
return (char *)s1;
|
||||
s1++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
static bool cmdline_contains_nokaslr(const u8 *cmdline)
|
||||
{
|
||||
const u8 *str;
|
||||
|
||||
str = __strstr(cmdline, "nokaslr");
|
||||
return str == cmdline || (str > cmdline && *(str - 1) == ' ');
|
||||
}
|
||||
|
||||
static bool is_kaslr_disabled_cmdline(void *fdt)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
|
||||
int node;
|
||||
const u8 *prop;
|
||||
|
||||
node = fdt_path_offset(fdt, "/chosen");
|
||||
if (node < 0)
|
||||
goto out;
|
||||
|
||||
prop = fdt_getprop(fdt, node, "bootargs", NULL);
|
||||
if (!prop)
|
||||
goto out;
|
||||
|
||||
if (cmdline_contains_nokaslr(prop))
|
||||
return true;
|
||||
|
||||
if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
|
||||
goto out;
|
||||
|
||||
return false;
|
||||
}
|
||||
out:
|
||||
return cmdline_contains_nokaslr(CONFIG_CMDLINE);
|
||||
}
|
||||
|
||||
static u64 get_kaslr_seed(void *fdt)
|
||||
{
|
||||
int node, len;
|
||||
static char const seed_str[] __initconst = "kaslr-seed";
|
||||
fdt64_t *prop;
|
||||
u64 ret;
|
||||
int len;
|
||||
|
||||
node = fdt_path_offset(fdt, "/chosen");
|
||||
if (node < 0)
|
||||
return 0;
|
||||
|
||||
prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
|
||||
prop = fdt_getprop_w(fdt, node, seed_str, &len);
|
||||
if (!prop || len != sizeof(u64))
|
||||
return 0;
|
||||
|
||||
|
@ -86,20 +39,22 @@ static u64 get_kaslr_seed(void *fdt)
|
|||
return ret;
|
||||
}
|
||||
|
||||
asmlinkage u64 kaslr_early_init(void *fdt)
|
||||
u64 __init kaslr_early_init(void *fdt, int chosen)
|
||||
{
|
||||
u64 seed, range;
|
||||
|
||||
if (is_kaslr_disabled_cmdline(fdt))
|
||||
if (kaslr_disabled_cmdline())
|
||||
return 0;
|
||||
|
||||
seed = get_kaslr_seed(fdt);
|
||||
seed = get_kaslr_seed(fdt, chosen);
|
||||
if (!seed) {
|
||||
if (!__early_cpu_has_rndr() ||
|
||||
!__arm64_rndr((unsigned long *)&seed))
|
||||
return 0;
|
||||
}
|
||||
|
||||
memstart_offset_seed = seed & U16_MAX;
|
||||
|
||||
/*
|
||||
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
|
||||
* kernel image offset from the seed. Let's place the kernel in the
|
||||
|
|
276
arch/arm64/kernel/pi/map_kernel.c
Normal file
276
arch/arm64/kernel/pi/map_kernel.c
Normal file
|
@ -0,0 +1,276 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
// Copyright 2023 Google LLC
|
||||
// Author: Ard Biesheuvel <ardb@google.com>
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/libfdt.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <asm/memory.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
#include "pi.h"
|
||||
|
||||
extern const u8 __eh_frame_start[], __eh_frame_end[];
|
||||
|
||||
extern void idmap_cpu_replace_ttbr1(void *pgdir);
|
||||
|
||||
static void __init map_segment(pgd_t *pg_dir, u64 *pgd, u64 va_offset,
|
||||
void *start, void *end, pgprot_t prot,
|
||||
bool may_use_cont, int root_level)
|
||||
{
|
||||
map_range(pgd, ((u64)start + va_offset) & ~PAGE_OFFSET,
|
||||
((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
|
||||
prot, root_level, (pte_t *)pg_dir, may_use_cont, 0);
|
||||
}
|
||||
|
||||
static void __init unmap_segment(pgd_t *pg_dir, u64 va_offset, void *start,
|
||||
void *end, int root_level)
|
||||
{
|
||||
map_segment(pg_dir, NULL, va_offset, start, end, __pgprot(0),
|
||||
false, root_level);
|
||||
}
|
||||
|
||||
static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
|
||||
{
|
||||
bool enable_scs = IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS);
|
||||
bool twopass = IS_ENABLED(CONFIG_RELOCATABLE);
|
||||
u64 pgdp = (u64)init_pg_dir + PAGE_SIZE;
|
||||
pgprot_t text_prot = PAGE_KERNEL_ROX;
|
||||
pgprot_t data_prot = PAGE_KERNEL;
|
||||
pgprot_t prot;
|
||||
|
||||
/*
|
||||
* External debuggers may need to write directly to the text mapping to
|
||||
* install SW breakpoints. Allow this (only) when explicitly requested
|
||||
* with rodata=off.
|
||||
*/
|
||||
if (arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF))
|
||||
text_prot = PAGE_KERNEL_EXEC;
|
||||
|
||||
/*
|
||||
* We only enable the shadow call stack dynamically if we are running
|
||||
* on a system that does not implement PAC or BTI. PAC and SCS provide
|
||||
* roughly the same level of protection, and BTI relies on the PACIASP
|
||||
* instructions serving as landing pads, preventing us from patching
|
||||
* those instructions into something else.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) && cpu_has_pac())
|
||||
enable_scs = false;
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && cpu_has_bti()) {
|
||||
enable_scs = false;
|
||||
|
||||
/*
|
||||
* If we have a CPU that supports BTI and a kernel built for
|
||||
* BTI then mark the kernel executable text as guarded pages
|
||||
* now so we don't have to rewrite the page tables later.
|
||||
*/
|
||||
text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
|
||||
}
|
||||
|
||||
/* Map all code read-write on the first pass if needed */
|
||||
twopass |= enable_scs;
|
||||
prot = twopass ? data_prot : text_prot;
|
||||
|
||||
map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
|
||||
!twopass, root_level);
|
||||
map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
|
||||
__inittext_begin, data_prot, false, root_level);
|
||||
map_segment(init_pg_dir, &pgdp, va_offset, __inittext_begin,
|
||||
__inittext_end, prot, false, root_level);
|
||||
map_segment(init_pg_dir, &pgdp, va_offset, __initdata_begin,
|
||||
__initdata_end, data_prot, false, root_level);
|
||||
map_segment(init_pg_dir, &pgdp, va_offset, _data, _end, data_prot,
|
||||
true, root_level);
|
||||
dsb(ishst);
|
||||
|
||||
idmap_cpu_replace_ttbr1(init_pg_dir);
|
||||
|
||||
if (twopass) {
|
||||
if (IS_ENABLED(CONFIG_RELOCATABLE))
|
||||
relocate_kernel(kaslr_offset);
|
||||
|
||||
if (enable_scs) {
|
||||
scs_patch(__eh_frame_start + va_offset,
|
||||
__eh_frame_end - __eh_frame_start);
|
||||
asm("ic ialluis");
|
||||
|
||||
dynamic_scs_is_enabled = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unmap the text region before remapping it, to avoid
|
||||
* potential TLB conflicts when creating the contiguous
|
||||
* descriptors.
|
||||
*/
|
||||
unmap_segment(init_pg_dir, va_offset, _stext, _etext,
|
||||
root_level);
|
||||
dsb(ishst);
|
||||
isb();
|
||||
__tlbi(vmalle1);
|
||||
isb();
|
||||
|
||||
/*
|
||||
* Remap these segments with different permissions
|
||||
* No new page table allocations should be needed
|
||||
*/
|
||||
map_segment(init_pg_dir, NULL, va_offset, _stext, _etext,
|
||||
text_prot, true, root_level);
|
||||
map_segment(init_pg_dir, NULL, va_offset, __inittext_begin,
|
||||
__inittext_end, text_prot, false, root_level);
|
||||
}
|
||||
|
||||
/* Copy the root page table to its final location */
|
||||
memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE);
|
||||
dsb(ishst);
|
||||
idmap_cpu_replace_ttbr1(swapper_pg_dir);
|
||||
}
|
||||
|
||||
static void noinline __section(".idmap.text") disable_wxn(void)
|
||||
{
|
||||
u64 sctlr = read_sysreg(sctlr_el1) & ~SCTLR_ELx_WXN;
|
||||
|
||||
/*
|
||||
* We cannot safely clear the WXN bit while the MMU and caches are on,
|
||||
* so turn the MMU off, flush the TLBs and turn it on again but with
|
||||
* the WXN bit cleared this time.
|
||||
*/
|
||||
asm(" msr sctlr_el1, %0 ;"
|
||||
" isb ;"
|
||||
" tlbi vmalle1 ;"
|
||||
" dsb nsh ;"
|
||||
" isb ;"
|
||||
" msr sctlr_el1, %1 ;"
|
||||
" isb ;"
|
||||
:: "r"(sctlr & ~SCTLR_ELx_M), "r"(sctlr));
|
||||
}
|
||||
|
||||
static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
|
||||
{
|
||||
u64 sctlr = read_sysreg(sctlr_el1);
|
||||
u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
|
||||
|
||||
asm(" msr sctlr_el1, %0 ;"
|
||||
" isb ;"
|
||||
" msr ttbr0_el1, %1 ;"
|
||||
" msr tcr_el1, %2 ;"
|
||||
" isb ;"
|
||||
" tlbi vmalle1 ;"
|
||||
" dsb nsh ;"
|
||||
" isb ;"
|
||||
" msr sctlr_el1, %3 ;"
|
||||
" isb ;"
|
||||
:: "r"(sctlr & ~SCTLR_ELx_M), "r"(ttbr), "r"(tcr), "r"(sctlr));
|
||||
}
|
||||
|
||||
static void __init remap_idmap_for_lpa2(void)
|
||||
{
|
||||
/* clear the bits that change meaning once LPA2 is turned on */
|
||||
pteval_t mask = PTE_SHARED;
|
||||
|
||||
/*
|
||||
* We have to clear bits [9:8] in all block or page descriptors in the
|
||||
* initial ID map, as otherwise they will be (mis)interpreted as
|
||||
* physical address bits once we flick the LPA2 switch (TCR.DS). Since
|
||||
* we cannot manipulate live descriptors in that way without creating
|
||||
* potential TLB conflicts, let's create another temporary ID map in a
|
||||
* LPA2 compatible fashion, and update the initial ID map while running
|
||||
* from that.
|
||||
*/
|
||||
create_init_idmap(init_pg_dir, mask);
|
||||
dsb(ishst);
|
||||
set_ttbr0_for_lpa2((u64)init_pg_dir);
|
||||
|
||||
/*
|
||||
* Recreate the initial ID map with the same granularity as before.
|
||||
* Don't bother with the FDT, we no longer need it after this.
|
||||
*/
|
||||
memset(init_idmap_pg_dir, 0,
|
||||
(u64)init_idmap_pg_dir - (u64)init_idmap_pg_end);
|
||||
|
||||
create_init_idmap(init_idmap_pg_dir, mask);
|
||||
dsb(ishst);
|
||||
|
||||
/* switch back to the updated initial ID map */
|
||||
set_ttbr0_for_lpa2((u64)init_idmap_pg_dir);
|
||||
|
||||
/* wipe the temporary ID map from memory */
|
||||
memset(init_pg_dir, 0, (u64)init_pg_end - (u64)init_pg_dir);
|
||||
}
|
||||
|
||||
static void __init map_fdt(u64 fdt)
|
||||
{
|
||||
static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE);
|
||||
u64 efdt = fdt + MAX_FDT_SIZE;
|
||||
u64 ptep = (u64)ptes;
|
||||
|
||||
/*
|
||||
* Map up to MAX_FDT_SIZE bytes, but avoid overlap with
|
||||
* the kernel image.
|
||||
*/
|
||||
map_range(&ptep, fdt, (u64)_text > fdt ? min((u64)_text, efdt) : efdt,
|
||||
fdt, PAGE_KERNEL, IDMAP_ROOT_LEVEL,
|
||||
(pte_t *)init_idmap_pg_dir, false, 0);
|
||||
dsb(ishst);
|
||||
}
|
||||
|
||||
asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
|
||||
{
|
||||
static char const chosen_str[] __initconst = "/chosen";
|
||||
u64 va_base, pa_base = (u64)&_text;
|
||||
u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN;
|
||||
int root_level = 4 - CONFIG_PGTABLE_LEVELS;
|
||||
int va_bits = VA_BITS;
|
||||
int chosen;
|
||||
|
||||
map_fdt((u64)fdt);
|
||||
|
||||
/* Clear BSS and the initial page tables */
|
||||
memset(__bss_start, 0, (u64)init_pg_end - (u64)__bss_start);
|
||||
|
||||
/* Parse the command line for CPU feature overrides */
|
||||
chosen = fdt_path_offset(fdt, chosen_str);
|
||||
init_feature_override(boot_status, fdt, chosen);
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && !cpu_has_lva()) {
|
||||
va_bits = VA_BITS_MIN;
|
||||
} else if (IS_ENABLED(CONFIG_ARM64_LPA2) && !cpu_has_lpa2()) {
|
||||
va_bits = VA_BITS_MIN;
|
||||
root_level++;
|
||||
}
|
||||
|
||||
if (va_bits > VA_BITS_MIN)
|
||||
sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(va_bits));
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM64_WXN) &&
|
||||
arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_NOWXN))
|
||||
disable_wxn();
|
||||
|
||||
/*
|
||||
* The virtual KASLR displacement modulo 2MiB is decided by the
|
||||
* physical placement of the image, as otherwise, we might not be able
|
||||
* to create the early kernel mapping using 2 MiB block descriptors. So
|
||||
* take the low bits of the KASLR offset from the physical address, and
|
||||
* fill in the high bits from the seed.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
|
||||
u64 kaslr_seed = kaslr_early_init(fdt, chosen);
|
||||
|
||||
if (kaslr_seed && kaslr_requires_kpti())
|
||||
arm64_use_ng_mappings = true;
|
||||
|
||||
kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM64_LPA2) && va_bits > VA_BITS_MIN)
|
||||
remap_idmap_for_lpa2();
|
||||
|
||||
va_base = KIMAGE_VADDR + kaslr_offset;
|
||||
map_kernel(kaslr_offset, va_base - pa_base, root_level);
|
||||
}
|
105
arch/arm64/kernel/pi/map_range.c
Normal file
105
arch/arm64/kernel/pi/map_range.c
Normal file
|
@ -0,0 +1,105 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
// Copyright 2023 Google LLC
|
||||
// Author: Ard Biesheuvel <ardb@google.com>
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include <asm/memory.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
#include "pi.h"
|
||||
|
||||
/**
|
||||
* map_range - Map a contiguous range of physical pages into virtual memory
|
||||
*
|
||||
* @pte: Address of physical pointer to array of pages to
|
||||
* allocate page tables from
|
||||
* @start: Virtual address of the start of the range
|
||||
* @end: Virtual address of the end of the range (exclusive)
|
||||
* @pa: Physical address of the start of the range
|
||||
* @prot: Access permissions of the range
|
||||
* @level: Translation level for the mapping
|
||||
* @tbl: The level @level page table to create the mappings in
|
||||
* @may_use_cont: Whether the use of the contiguous attribute is allowed
|
||||
* @va_offset: Offset between a physical page and its current mapping
|
||||
* in the VA space
|
||||
*/
|
||||
void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
|
||||
int level, pte_t *tbl, bool may_use_cont, u64 va_offset)
|
||||
{
|
||||
u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
|
||||
u64 protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
|
||||
int lshift = (3 - level) * (PAGE_SHIFT - 3);
|
||||
u64 lmask = (PAGE_SIZE << lshift) - 1;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
pa &= PAGE_MASK;
|
||||
|
||||
/* Advance tbl to the entry that covers start */
|
||||
tbl += (start >> (lshift + PAGE_SHIFT)) % PTRS_PER_PTE;
|
||||
|
||||
/*
|
||||
* Set the right block/page bits for this level unless we are
|
||||
* clearing the mapping
|
||||
*/
|
||||
if (protval)
|
||||
protval |= (level < 3) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
|
||||
|
||||
while (start < end) {
|
||||
u64 next = min((start | lmask) + 1, PAGE_ALIGN(end));
|
||||
|
||||
if (level < 3 && (start | next | pa) & lmask) {
|
||||
/*
|
||||
* This chunk needs a finer grained mapping. Create a
|
||||
* table mapping if necessary and recurse.
|
||||
*/
|
||||
if (pte_none(*tbl)) {
|
||||
*tbl = __pte(__phys_to_pte_val(*pte) |
|
||||
PMD_TYPE_TABLE | PMD_TABLE_UXN);
|
||||
*pte += PTRS_PER_PTE * sizeof(pte_t);
|
||||
}
|
||||
map_range(pte, start, next, pa, prot, level + 1,
|
||||
(pte_t *)(__pte_to_phys(*tbl) + va_offset),
|
||||
may_use_cont, va_offset);
|
||||
} else {
|
||||
/*
|
||||
* Start a contiguous range if start and pa are
|
||||
* suitably aligned
|
||||
*/
|
||||
if (((start | pa) & cmask) == 0 && may_use_cont)
|
||||
protval |= PTE_CONT;
|
||||
|
||||
/*
|
||||
* Clear the contiguous attribute if the remaining
|
||||
* range does not cover a contiguous block
|
||||
*/
|
||||
if ((end & ~cmask) <= start)
|
||||
protval &= ~PTE_CONT;
|
||||
|
||||
/* Put down a block or page mapping */
|
||||
*tbl = __pte(__phys_to_pte_val(pa) | protval);
|
||||
}
|
||||
pa += next - start;
|
||||
start = next;
|
||||
tbl++;
|
||||
}
|
||||
}
|
||||
|
||||
asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
|
||||
{
|
||||
u64 ptep = (u64)pg_dir + PAGE_SIZE;
|
||||
pgprot_t text_prot = PAGE_KERNEL_ROX;
|
||||
pgprot_t data_prot = PAGE_KERNEL;
|
||||
|
||||
pgprot_val(text_prot) &= ~clrmask;
|
||||
pgprot_val(data_prot) &= ~clrmask;
|
||||
|
||||
map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
|
||||
text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
|
||||
map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
|
||||
data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
|
||||
|
||||
return ptep;
|
||||
}
|
|
@ -4,16 +4,17 @@
|
|||
* Author: Ard Biesheuvel <ardb@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/bug.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/scs.h>
|
||||
|
||||
#include "pi.h"
|
||||
|
||||
bool dynamic_scs_is_enabled;
|
||||
|
||||
//
|
||||
// This minimal DWARF CFI parser is partially based on the code in
|
||||
// arch/arc/kernel/unwind.c, and on the document below:
|
||||
|
@ -49,8 +50,6 @@
|
|||
#define DW_CFA_GNU_negative_offset_extended 0x2f
|
||||
#define DW_CFA_hi_user 0x3f
|
||||
|
||||
extern const u8 __eh_frame_start[], __eh_frame_end[];
|
||||
|
||||
enum {
|
||||
PACIASP = 0xd503233f,
|
||||
AUTIASP = 0xd50323bf,
|
||||
|
@ -81,7 +80,11 @@ static void __always_inline scs_patch_loc(u64 loc)
|
|||
*/
|
||||
return;
|
||||
}
|
||||
dcache_clean_pou(loc, loc + sizeof(u32));
|
||||
if (IS_ENABLED(CONFIG_ARM64_WORKAROUND_CLEAN_CACHE))
|
||||
asm("dc civac, %0" :: "r"(loc));
|
||||
else
|
||||
asm(ALTERNATIVE("dc cvau, %0", "nop", ARM64_HAS_CACHE_IDC)
|
||||
:: "r"(loc));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -128,10 +131,10 @@ struct eh_frame {
|
|||
};
|
||||
};
|
||||
|
||||
static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
|
||||
bool fde_has_augmentation_data,
|
||||
int code_alignment_factor,
|
||||
bool dry_run)
|
||||
static int scs_handle_fde_frame(const struct eh_frame *frame,
|
||||
bool fde_has_augmentation_data,
|
||||
int code_alignment_factor,
|
||||
bool dry_run)
|
||||
{
|
||||
int size = frame->size - offsetof(struct eh_frame, opcodes) + 4;
|
||||
u64 loc = (u64)offset_to_ptr(&frame->initial_loc);
|
||||
|
@ -198,14 +201,13 @@ static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
|
|||
break;
|
||||
|
||||
default:
|
||||
pr_err("unhandled opcode: %02x in FDE frame %lx\n", opcode[-1], (uintptr_t)frame);
|
||||
return -ENOEXEC;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int noinstr scs_patch(const u8 eh_frame[], int size)
|
||||
int scs_patch(const u8 eh_frame[], int size)
|
||||
{
|
||||
const u8 *p = eh_frame;
|
||||
|
||||
|
@ -250,13 +252,3 @@ int noinstr scs_patch(const u8 eh_frame[], int size)
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage void __init scs_patch_vmlinux(void)
|
||||
{
|
||||
if (!should_patch_pac_into_scs())
|
||||
return;
|
||||
|
||||
WARN_ON(scs_patch(__eh_frame_start, __eh_frame_end - __eh_frame_start));
|
||||
icache_inval_all_pou();
|
||||
isb();
|
||||
}
|
36
arch/arm64/kernel/pi/pi.h
Normal file
36
arch/arm64/kernel/pi/pi.h
Normal file
|
@ -0,0 +1,36 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
// Copyright 2023 Google LLC
|
||||
// Author: Ard Biesheuvel <ardb@google.com>
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define __prel64_initconst __section(".init.rodata.prel64")
|
||||
|
||||
#define PREL64(type, name) union { type *name; prel64_t name ## _prel; }
|
||||
|
||||
#define prel64_pointer(__d) (typeof(__d))prel64_to_pointer(&__d##_prel)
|
||||
|
||||
typedef volatile signed long prel64_t;
|
||||
|
||||
static inline void *prel64_to_pointer(const prel64_t *offset)
|
||||
{
|
||||
if (!*offset)
|
||||
return NULL;
|
||||
return (void *)offset + *offset;
|
||||
}
|
||||
|
||||
extern bool dynamic_scs_is_enabled;
|
||||
|
||||
extern pgd_t init_idmap_pg_dir[], init_idmap_pg_end[];
|
||||
|
||||
void init_feature_override(u64 boot_status, const void *fdt, int chosen);
|
||||
u64 kaslr_early_init(void *fdt, int chosen);
|
||||
void relocate_kernel(u64 offset);
|
||||
int scs_patch(const u8 eh_frame[], int size);
|
||||
|
||||
void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
|
||||
int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
|
||||
|
||||
asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
|
||||
|
||||
asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
|
130
arch/arm64/kernel/pi/relacheck.c
Normal file
130
arch/arm64/kernel/pi/relacheck.c
Normal file
|
@ -0,0 +1,130 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2023 - Google LLC
|
||||
* Author: Ard Biesheuvel <ardb@google.com>
|
||||
*/
|
||||
|
||||
#include <elf.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#define HOST_ORDER ELFDATA2LSB
|
||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
#define HOST_ORDER ELFDATA2MSB
|
||||
#endif
|
||||
|
||||
static Elf64_Ehdr *ehdr;
|
||||
static Elf64_Shdr *shdr;
|
||||
static const char *strtab;
|
||||
static bool swap;
|
||||
|
||||
static uint64_t swab_elfxword(uint64_t val)
|
||||
{
|
||||
return swap ? __builtin_bswap64(val) : val;
|
||||
}
|
||||
|
||||
static uint32_t swab_elfword(uint32_t val)
|
||||
{
|
||||
return swap ? __builtin_bswap32(val) : val;
|
||||
}
|
||||
|
||||
static uint16_t swab_elfhword(uint16_t val)
|
||||
{
|
||||
return swap ? __builtin_bswap16(val) : val;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct stat stat;
|
||||
int fd, ret;
|
||||
|
||||
if (argc < 3) {
|
||||
fprintf(stderr, "file arguments missing\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
fd = open(argv[1], O_RDWR);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "failed to open %s\n", argv[1]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
ret = fstat(fd, &stat);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "failed to stat() %s\n", argv[1]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
ehdr = mmap(0, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if (ehdr == MAP_FAILED) {
|
||||
fprintf(stderr, "failed to mmap() %s\n", argv[1]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
swap = ehdr->e_ident[EI_DATA] != HOST_ORDER;
|
||||
shdr = (void *)ehdr + swab_elfxword(ehdr->e_shoff);
|
||||
strtab = (void *)ehdr +
|
||||
swab_elfxword(shdr[swab_elfhword(ehdr->e_shstrndx)].sh_offset);
|
||||
|
||||
for (int i = 0; i < swab_elfhword(ehdr->e_shnum); i++) {
|
||||
unsigned long info, flags;
|
||||
bool prel64 = false;
|
||||
Elf64_Rela *rela;
|
||||
int numrela;
|
||||
|
||||
if (swab_elfword(shdr[i].sh_type) != SHT_RELA)
|
||||
continue;
|
||||
|
||||
/* only consider RELA sections operating on data */
|
||||
info = swab_elfword(shdr[i].sh_info);
|
||||
flags = swab_elfxword(shdr[info].sh_flags);
|
||||
if ((flags & (SHF_ALLOC | SHF_EXECINSTR)) != SHF_ALLOC)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We generally don't permit ABS64 relocations in the code that
|
||||
* runs before relocation processing occurs. If statically
|
||||
* initialized absolute symbol references are unavoidable, they
|
||||
* may be emitted into a *.rodata.prel64 section and they will
|
||||
* be converted to place-relative 64-bit references. This
|
||||
* requires special handling in the referring code.
|
||||
*/
|
||||
if (strstr(strtab + swab_elfword(shdr[info].sh_name),
|
||||
".rodata.prel64")) {
|
||||
prel64 = true;
|
||||
}
|
||||
|
||||
rela = (void *)ehdr + swab_elfxword(shdr[i].sh_offset);
|
||||
numrela = swab_elfxword(shdr[i].sh_size) / sizeof(*rela);
|
||||
|
||||
for (int j = 0; j < numrela; j++) {
|
||||
uint64_t info = swab_elfxword(rela[j].r_info);
|
||||
|
||||
if (ELF64_R_TYPE(info) != R_AARCH64_ABS64)
|
||||
continue;
|
||||
|
||||
if (prel64) {
|
||||
/* convert ABS64 into PREL64 */
|
||||
info ^= R_AARCH64_ABS64 ^ R_AARCH64_PREL64;
|
||||
rela[j].r_info = swab_elfxword(info);
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"Unexpected absolute relocations detected in %s\n",
|
||||
argv[2]);
|
||||
close(fd);
|
||||
unlink(argv[1]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
}
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
64
arch/arm64/kernel/pi/relocate.c
Normal file
64
arch/arm64/kernel/pi/relocate.c
Normal file
|
@ -0,0 +1,64 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
// Copyright 2023 Google LLC
|
||||
// Authors: Ard Biesheuvel <ardb@google.com>
|
||||
// Peter Collingbourne <pcc@google.com>
|
||||
|
||||
#include <linux/elf.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "pi.h"
|
||||
|
||||
extern const Elf64_Rela rela_start[], rela_end[];
|
||||
extern const u64 relr_start[], relr_end[];
|
||||
|
||||
void __init relocate_kernel(u64 offset)
|
||||
{
|
||||
u64 *place = NULL;
|
||||
|
||||
for (const Elf64_Rela *rela = rela_start; rela < rela_end; rela++) {
|
||||
if (ELF64_R_TYPE(rela->r_info) != R_AARCH64_RELATIVE)
|
||||
continue;
|
||||
*(u64 *)(rela->r_offset + offset) = rela->r_addend + offset;
|
||||
}
|
||||
|
||||
if (!IS_ENABLED(CONFIG_RELR) || !offset)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Apply RELR relocations.
|
||||
*
|
||||
* RELR is a compressed format for storing relative relocations. The
|
||||
* encoded sequence of entries looks like:
|
||||
* [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
|
||||
*
|
||||
* i.e. start with an address, followed by any number of bitmaps. The
|
||||
* address entry encodes 1 relocation. The subsequent bitmap entries
|
||||
* encode up to 63 relocations each, at subsequent offsets following
|
||||
* the last address entry.
|
||||
*
|
||||
* The bitmap entries must have 1 in the least significant bit. The
|
||||
* assumption here is that an address cannot have 1 in lsb. Odd
|
||||
* addresses are not supported. Any odd addresses are stored in the
|
||||
* RELA section, which is handled above.
|
||||
*
|
||||
* With the exception of the least significant bit, each bit in the
|
||||
* bitmap corresponds with a machine word that follows the base address
|
||||
* word, and the bit value indicates whether or not a relocation needs
|
||||
* to be applied to it. The second least significant bit represents the
|
||||
* machine word immediately following the initial address, and each bit
|
||||
* that follows represents the next word, in linear order. As such, a
|
||||
* single bitmap can encode up to 63 relocations in a 64-bit object.
|
||||
*/
|
||||
for (const u64 *relr = relr_start; relr < relr_end; relr++) {
|
||||
if ((*relr & 1) == 0) {
|
||||
place = (u64 *)(*relr + offset);
|
||||
*place++ += offset;
|
||||
} else {
|
||||
for (u64 *p = place, r = *relr >> 1; r; p++, r >>= 1)
|
||||
if (r & 1)
|
||||
*p += offset;
|
||||
place += 63;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -166,21 +166,6 @@ static void __init smp_build_mpidr_hash(void)
|
|||
pr_warn("Large number of MPIDR hash buckets detected\n");
|
||||
}
|
||||
|
||||
static void *early_fdt_ptr __initdata;
|
||||
|
||||
void __init *get_early_fdt_ptr(void)
|
||||
{
|
||||
return early_fdt_ptr;
|
||||
}
|
||||
|
||||
asmlinkage void __init early_fdt_map(u64 dt_phys)
|
||||
{
|
||||
int fdt_size;
|
||||
|
||||
early_fixmap_init();
|
||||
early_fdt_ptr = fixmap_remap_fdt(dt_phys, &fdt_size, PAGE_KERNEL);
|
||||
}
|
||||
|
||||
static void __init setup_machine_fdt(phys_addr_t dt_phys)
|
||||
{
|
||||
int size;
|
||||
|
@ -298,13 +283,6 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
|
|||
|
||||
kaslr_init();
|
||||
|
||||
/*
|
||||
* If know now we are going to need KPTI then use non-global
|
||||
* mappings from the start, avoiding the cost of rewriting
|
||||
* everything later.
|
||||
*/
|
||||
arm64_use_ng_mappings = kaslr_requires_kpti();
|
||||
|
||||
early_fixmap_init();
|
||||
early_ioremap_init();
|
||||
|
||||
|
|
|
@ -102,9 +102,6 @@ SYM_CODE_START(cpu_resume)
|
|||
mov x0, xzr
|
||||
bl init_kernel_el
|
||||
mov x19, x0 // preserve boot mode
|
||||
#if VA_BITS > 48
|
||||
ldr_l x0, vabits_actual
|
||||
#endif
|
||||
bl __cpu_setup
|
||||
/* enable the MMU early - so we can access sleep_save_stash by va */
|
||||
adrp x1, swapper_pg_dir
|
||||
|
|
|
@ -126,9 +126,9 @@ jiffies = jiffies_64;
|
|||
#ifdef CONFIG_UNWIND_TABLES
|
||||
#define UNWIND_DATA_SECTIONS \
|
||||
.eh_frame : { \
|
||||
__eh_frame_start = .; \
|
||||
__pi___eh_frame_start = .; \
|
||||
*(.eh_frame) \
|
||||
__eh_frame_end = .; \
|
||||
__pi___eh_frame_end = .; \
|
||||
}
|
||||
#else
|
||||
#define UNWIND_DATA_SECTIONS
|
||||
|
@ -270,15 +270,15 @@ SECTIONS
|
|||
HYPERVISOR_RELOC_SECTION
|
||||
|
||||
.rela.dyn : ALIGN(8) {
|
||||
__rela_start = .;
|
||||
__pi_rela_start = .;
|
||||
*(.rela .rela*)
|
||||
__rela_end = .;
|
||||
__pi_rela_end = .;
|
||||
}
|
||||
|
||||
.relr.dyn : ALIGN(8) {
|
||||
__relr_start = .;
|
||||
__pi_relr_start = .;
|
||||
*(.relr.dyn)
|
||||
__relr_end = .;
|
||||
__pi_relr_end = .;
|
||||
}
|
||||
|
||||
. = ALIGN(SEGMENT_ALIGN);
|
||||
|
@ -311,12 +311,17 @@ SECTIONS
|
|||
__pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin);
|
||||
_edata = .;
|
||||
|
||||
/* start of zero-init region */
|
||||
BSS_SECTION(SBSS_ALIGN, 0, 0)
|
||||
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
init_pg_dir = .;
|
||||
. += INIT_DIR_SIZE;
|
||||
init_pg_end = .;
|
||||
/* end of zero-init region */
|
||||
|
||||
. += SZ_4K; /* stack for the early C runtime */
|
||||
early_init_stack = .;
|
||||
|
||||
. = ALIGN(SEGMENT_ALIGN);
|
||||
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
|
||||
|
|
|
@ -805,7 +805,7 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
|
|||
.pgd = (kvm_pteref_t)kvm->mm->pgd,
|
||||
.ia_bits = vabits_actual,
|
||||
.start_level = (KVM_PGTABLE_LAST_LEVEL -
|
||||
CONFIG_PGTABLE_LEVELS + 1),
|
||||
ARM64_HW_PGTABLE_LEVELS(pgt.ia_bits) + 1),
|
||||
.mm_ops = &kvm_user_mm_ops,
|
||||
};
|
||||
unsigned long flags;
|
||||
|
@ -1874,16 +1874,9 @@ int __init kvm_mmu_init(u32 *hyp_va_bits)
|
|||
BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
|
||||
|
||||
/*
|
||||
* The ID map may be configured to use an extended virtual address
|
||||
* range. This is only the case if system RAM is out of range for the
|
||||
* currently configured page size and VA_BITS_MIN, in which case we will
|
||||
* also need the extended virtual range for the HYP ID map, or we won't
|
||||
* be able to enable the EL2 MMU.
|
||||
*
|
||||
* However, in some cases the ID map may be configured for fewer than
|
||||
* the number of VA bits used by the regular kernel stage 1. This
|
||||
* happens when VA_BITS=52 and the kernel image is placed in PA space
|
||||
* below 48 bits.
|
||||
* The ID map is always configured for 48 bits of translation, which
|
||||
* may be fewer than the number of VA bits used by the regular kernel
|
||||
* stage 1, when VA_BITS=52.
|
||||
*
|
||||
* At EL2, there is only one TTBR register, and we can't switch between
|
||||
* translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom
|
||||
|
@ -1894,7 +1887,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits)
|
|||
* 1 VA bits to assure that the hypervisor can both ID map its code page
|
||||
* and map any kernel memory.
|
||||
*/
|
||||
idmap_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET);
|
||||
idmap_bits = IDMAP_VA_BITS;
|
||||
kernel_bits = vabits_actual;
|
||||
*hyp_va_bits = max(idmap_bits, kernel_bits);
|
||||
|
||||
|
|
|
@ -257,16 +257,14 @@ static bool is_el1_data_abort(unsigned long esr)
|
|||
static inline bool is_el1_permission_fault(unsigned long addr, unsigned long esr,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
unsigned long fsc_type = esr & ESR_ELx_FSC_TYPE;
|
||||
|
||||
if (!is_el1_data_abort(esr) && !is_el1_instruction_abort(esr))
|
||||
return false;
|
||||
|
||||
if (fsc_type == ESR_ELx_FSC_PERM)
|
||||
if (esr_fsc_is_permission_fault(esr))
|
||||
return true;
|
||||
|
||||
if (is_ttbr0_addr(addr) && system_uses_ttbr0_pan())
|
||||
return fsc_type == ESR_ELx_FSC_FAULT &&
|
||||
return esr_fsc_is_translation_fault(esr) &&
|
||||
(regs->pstate & PSR_PAN_BIT);
|
||||
|
||||
return false;
|
||||
|
@ -279,8 +277,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
|
|||
unsigned long flags;
|
||||
u64 par, dfsc;
|
||||
|
||||
if (!is_el1_data_abort(esr) ||
|
||||
(esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT)
|
||||
if (!is_el1_data_abort(esr) || !esr_fsc_is_translation_fault(esr))
|
||||
return false;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
@ -301,7 +298,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
|
|||
* treat the translation fault as spurious.
|
||||
*/
|
||||
dfsc = FIELD_GET(SYS_PAR_EL1_FST, par);
|
||||
return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT;
|
||||
return !esr_fsc_is_translation_fault(dfsc);
|
||||
}
|
||||
|
||||
static void die_kernel_fault(const char *msg, unsigned long addr,
|
||||
|
@ -368,11 +365,6 @@ static bool is_el1_mte_sync_tag_check_fault(unsigned long esr)
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool is_translation_fault(unsigned long esr)
|
||||
{
|
||||
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
|
||||
}
|
||||
|
||||
static void __do_kernel_fault(unsigned long addr, unsigned long esr,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
|
@ -405,7 +397,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr,
|
|||
} else if (addr < PAGE_SIZE) {
|
||||
msg = "NULL pointer dereference";
|
||||
} else {
|
||||
if (is_translation_fault(esr) &&
|
||||
if (esr_fsc_is_translation_fault(esr) &&
|
||||
kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs))
|
||||
return;
|
||||
|
||||
|
@ -782,18 +774,18 @@ static const struct fault_info fault_info[] = {
|
|||
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" },
|
||||
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" },
|
||||
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 8" },
|
||||
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 0 access flag fault" },
|
||||
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" },
|
||||
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },
|
||||
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 12" },
|
||||
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 0 permission fault" },
|
||||
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
|
||||
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
|
||||
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
|
||||
{ do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" },
|
||||
{ do_tag_check_fault, SIGSEGV, SEGV_MTESERR, "synchronous tag check fault" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 18" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 19" },
|
||||
{ do_sea, SIGKILL, SI_KERNEL, "level -1 (translation table walk)" },
|
||||
{ do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" },
|
||||
{ do_sea, SIGKILL, SI_KERNEL, "level 1 (translation table walk)" },
|
||||
{ do_sea, SIGKILL, SI_KERNEL, "level 2 (translation table walk)" },
|
||||
|
@ -801,7 +793,7 @@ static const struct fault_info fault_info[] = {
|
|||
{ do_sea, SIGBUS, BUS_OBJERR, "synchronous parity or ECC error" }, // Reserved when RAS is implemented
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 25" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 26" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 27" },
|
||||
{ do_sea, SIGKILL, SI_KERNEL, "level -1 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented
|
||||
{ do_sea, SIGKILL, SI_KERNEL, "level 0 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented
|
||||
{ do_sea, SIGKILL, SI_KERNEL, "level 1 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented
|
||||
{ do_sea, SIGKILL, SI_KERNEL, "level 2 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented
|
||||
|
@ -815,9 +807,9 @@ static const struct fault_info fault_info[] = {
|
|||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 38" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 39" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 40" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 41" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "level -1 address size fault" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 42" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 43" },
|
||||
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level -1 translation fault" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 44" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 45" },
|
||||
{ do_bad, SIGKILL, SI_KERNEL, "unknown 46" },
|
||||
|
|
|
@ -104,7 +104,7 @@ void __init early_fixmap_init(void)
|
|||
unsigned long end = FIXADDR_TOP;
|
||||
|
||||
pgd_t *pgdp = pgd_offset_k(addr);
|
||||
p4d_t *p4dp = p4d_offset(pgdp, addr);
|
||||
p4d_t *p4dp = p4d_offset_kimg(pgdp, addr);
|
||||
|
||||
early_fixmap_init_pud(p4dp, addr, end);
|
||||
}
|
||||
|
@ -170,37 +170,3 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
|
|||
|
||||
return dt_virt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the fixmap region into a new pgdir.
|
||||
*/
|
||||
void __init fixmap_copy(pgd_t *pgdir)
|
||||
{
|
||||
if (!READ_ONCE(pgd_val(*pgd_offset_pgd(pgdir, FIXADDR_TOT_START)))) {
|
||||
/*
|
||||
* The fixmap falls in a separate pgd to the kernel, and doesn't
|
||||
* live in the carveout for the swapper_pg_dir. We can simply
|
||||
* re-use the existing dir for the fixmap.
|
||||
*/
|
||||
set_pgd(pgd_offset_pgd(pgdir, FIXADDR_TOT_START),
|
||||
READ_ONCE(*pgd_offset_k(FIXADDR_TOT_START)));
|
||||
} else if (CONFIG_PGTABLE_LEVELS > 3) {
|
||||
pgd_t *bm_pgdp;
|
||||
p4d_t *bm_p4dp;
|
||||
pud_t *bm_pudp;
|
||||
/*
|
||||
* The fixmap shares its top level pgd entry with the kernel
|
||||
* mapping. This can really only occur when we are running
|
||||
* with 16k/4 levels, so we can simply reuse the pud level
|
||||
* entry instead.
|
||||
*/
|
||||
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
|
||||
bm_pgdp = pgd_offset_pgd(pgdir, FIXADDR_TOT_START);
|
||||
bm_p4dp = p4d_offset(bm_pgdp, FIXADDR_TOT_START);
|
||||
bm_pudp = pud_set_fixmap_offset(bm_p4dp, FIXADDR_TOT_START);
|
||||
pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd));
|
||||
pud_clear_fixmap();
|
||||
} else {
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -238,7 +238,7 @@ void __init arm64_memblock_init(void)
|
|||
* physical address of PAGE_OFFSET, we have to *subtract* from it.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52))
|
||||
memstart_addr -= _PAGE_OFFSET(48) - _PAGE_OFFSET(52);
|
||||
memstart_addr -= _PAGE_OFFSET(vabits_actual) - _PAGE_OFFSET(52);
|
||||
|
||||
/*
|
||||
* Apply the memory limit if it was set. Since the kernel may be loaded
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
|
||||
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
|
||||
|
||||
static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
|
||||
static pgd_t tmp_pg_dir[PTRS_PER_PTE] __initdata __aligned(PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* The p*d_populate functions call virt_to_phys implicitly so they can't be used
|
||||
|
@ -99,6 +99,19 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node,
|
|||
return early ? pud_offset_kimg(p4dp, addr) : pud_offset(p4dp, addr);
|
||||
}
|
||||
|
||||
static p4d_t *__init kasan_p4d_offset(pgd_t *pgdp, unsigned long addr, int node,
|
||||
bool early)
|
||||
{
|
||||
if (pgd_none(READ_ONCE(*pgdp))) {
|
||||
phys_addr_t p4d_phys = early ?
|
||||
__pa_symbol(kasan_early_shadow_p4d)
|
||||
: kasan_alloc_zeroed_page(node);
|
||||
__pgd_populate(pgdp, p4d_phys, PGD_TYPE_TABLE);
|
||||
}
|
||||
|
||||
return early ? p4d_offset_kimg(pgdp, addr) : p4d_offset(pgdp, addr);
|
||||
}
|
||||
|
||||
static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
|
||||
unsigned long end, int node, bool early)
|
||||
{
|
||||
|
@ -144,12 +157,12 @@ static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
|
|||
unsigned long end, int node, bool early)
|
||||
{
|
||||
unsigned long next;
|
||||
p4d_t *p4dp = p4d_offset(pgdp, addr);
|
||||
p4d_t *p4dp = kasan_p4d_offset(pgdp, addr, node, early);
|
||||
|
||||
do {
|
||||
next = p4d_addr_end(addr, end);
|
||||
kasan_pud_populate(p4dp, addr, next, node, early);
|
||||
} while (p4dp++, addr = next, addr != end);
|
||||
} while (p4dp++, addr = next, addr != end && p4d_none(READ_ONCE(*p4dp)));
|
||||
}
|
||||
|
||||
static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
|
||||
|
@ -165,19 +178,48 @@ static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
|
|||
} while (pgdp++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS > 4
|
||||
#define SHADOW_ALIGN P4D_SIZE
|
||||
#else
|
||||
#define SHADOW_ALIGN PUD_SIZE
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Return whether 'addr' is aligned to the size covered by a root level
|
||||
* descriptor.
|
||||
*/
|
||||
static bool __init root_level_aligned(u64 addr)
|
||||
{
|
||||
int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 1) * (PAGE_SHIFT - 3);
|
||||
|
||||
return (addr % (PAGE_SIZE << shift)) == 0;
|
||||
}
|
||||
|
||||
/* The early shadow maps everything to a single page of zeroes */
|
||||
asmlinkage void __init kasan_early_init(void)
|
||||
{
|
||||
BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
|
||||
KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
|
||||
/*
|
||||
* We cannot check the actual value of KASAN_SHADOW_START during build,
|
||||
* as it depends on vabits_actual. As a best-effort approach, check
|
||||
* potential values calculated based on VA_BITS and VA_BITS_MIN.
|
||||
*/
|
||||
BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), PGDIR_SIZE));
|
||||
BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), PGDIR_SIZE));
|
||||
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
|
||||
BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), SHADOW_ALIGN));
|
||||
BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), SHADOW_ALIGN));
|
||||
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, SHADOW_ALIGN));
|
||||
|
||||
if (!root_level_aligned(KASAN_SHADOW_START)) {
|
||||
/*
|
||||
* The start address is misaligned, and so the next level table
|
||||
* will be shared with the linear region. This can happen with
|
||||
* 4 or 5 level paging, so install a generic pte_t[] as the
|
||||
* next level. This prevents the kasan_pgd_populate call below
|
||||
* from inserting an entry that refers to the shared KASAN zero
|
||||
* shadow pud_t[]/p4d_t[], which could end up getting corrupted
|
||||
* when the linear region is mapped.
|
||||
*/
|
||||
static pte_t tbl[PTRS_PER_PTE] __page_aligned_bss;
|
||||
pgd_t *pgdp = pgd_offset_k(KASAN_SHADOW_START);
|
||||
|
||||
set_pgd(pgdp, __pgd(__pa_symbol(tbl) | PGD_TYPE_TABLE));
|
||||
}
|
||||
|
||||
kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE,
|
||||
true);
|
||||
}
|
||||
|
@ -190,34 +232,74 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end,
|
|||
}
|
||||
|
||||
/*
|
||||
* Copy the current shadow region into a new pgdir.
|
||||
* Return the descriptor index of 'addr' in the root level table
|
||||
*/
|
||||
void __init kasan_copy_shadow(pgd_t *pgdir)
|
||||
{
|
||||
pgd_t *pgdp, *pgdp_new, *pgdp_end;
|
||||
|
||||
pgdp = pgd_offset_k(KASAN_SHADOW_START);
|
||||
pgdp_end = pgd_offset_k(KASAN_SHADOW_END);
|
||||
pgdp_new = pgd_offset_pgd(pgdir, KASAN_SHADOW_START);
|
||||
do {
|
||||
set_pgd(pgdp_new, READ_ONCE(*pgdp));
|
||||
} while (pgdp++, pgdp_new++, pgdp != pgdp_end);
|
||||
}
|
||||
|
||||
static void __init clear_pgds(unsigned long start,
|
||||
unsigned long end)
|
||||
static int __init root_level_idx(u64 addr)
|
||||
{
|
||||
/*
|
||||
* Remove references to kasan page tables from
|
||||
* swapper_pg_dir. pgd_clear() can't be used
|
||||
* here because it's nop on 2,3-level pagetable setups
|
||||
* On 64k pages, the TTBR1 range root tables are extended for 52-bit
|
||||
* virtual addressing, and TTBR1 will simply point to the pgd_t entry
|
||||
* that covers the start of the 48-bit addressable VA space if LVA is
|
||||
* not implemented. This means we need to index the table as usual,
|
||||
* instead of masking off bits based on vabits_actual.
|
||||
*/
|
||||
for (; start < end; start += PGDIR_SIZE)
|
||||
set_pgd(pgd_offset_k(start), __pgd(0));
|
||||
u64 vabits = IS_ENABLED(CONFIG_ARM64_64K_PAGES) ? VA_BITS
|
||||
: vabits_actual;
|
||||
int shift = (ARM64_HW_PGTABLE_LEVELS(vabits) - 1) * (PAGE_SHIFT - 3);
|
||||
|
||||
return (addr & ~_PAGE_OFFSET(vabits)) >> (shift + PAGE_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clone a next level table from swapper_pg_dir into tmp_pg_dir
|
||||
*/
|
||||
static void __init clone_next_level(u64 addr, pgd_t *tmp_pg_dir, pud_t *pud)
|
||||
{
|
||||
int idx = root_level_idx(addr);
|
||||
pgd_t pgd = READ_ONCE(swapper_pg_dir[idx]);
|
||||
pud_t *pudp = (pud_t *)__phys_to_kimg(__pgd_to_phys(pgd));
|
||||
|
||||
memcpy(pud, pudp, PAGE_SIZE);
|
||||
tmp_pg_dir[idx] = __pgd(__phys_to_pgd_val(__pa_symbol(pud)) |
|
||||
PUD_TYPE_TABLE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the descriptor index of 'addr' in the next level table
|
||||
*/
|
||||
static int __init next_level_idx(u64 addr)
|
||||
{
|
||||
int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 2) * (PAGE_SHIFT - 3);
|
||||
|
||||
return (addr >> (shift + PAGE_SHIFT)) % PTRS_PER_PTE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dereference the table descriptor at 'pgd_idx' and clear the entries from
|
||||
* 'start' to 'end' (exclusive) from the table.
|
||||
*/
|
||||
static void __init clear_next_level(int pgd_idx, int start, int end)
|
||||
{
|
||||
pgd_t pgd = READ_ONCE(swapper_pg_dir[pgd_idx]);
|
||||
pud_t *pudp = (pud_t *)__phys_to_kimg(__pgd_to_phys(pgd));
|
||||
|
||||
memset(&pudp[start], 0, (end - start) * sizeof(pud_t));
|
||||
}
|
||||
|
||||
static void __init clear_shadow(u64 start, u64 end)
|
||||
{
|
||||
int l = root_level_idx(start), m = root_level_idx(end);
|
||||
|
||||
if (!root_level_aligned(start))
|
||||
clear_next_level(l++, next_level_idx(start), PTRS_PER_PTE);
|
||||
if (!root_level_aligned(end))
|
||||
clear_next_level(m, 0, next_level_idx(end));
|
||||
memset(&swapper_pg_dir[l], 0, (m - l) * sizeof(pgd_t));
|
||||
}
|
||||
|
||||
static void __init kasan_init_shadow(void)
|
||||
{
|
||||
static pud_t pud[2][PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
|
||||
u64 kimg_shadow_start, kimg_shadow_end;
|
||||
u64 mod_shadow_start;
|
||||
u64 vmalloc_shadow_end;
|
||||
|
@ -239,10 +321,23 @@ static void __init kasan_init_shadow(void)
|
|||
* setup will be finished.
|
||||
*/
|
||||
memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
|
||||
dsb(ishst);
|
||||
cpu_replace_ttbr1(lm_alias(tmp_pg_dir), idmap_pg_dir);
|
||||
|
||||
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
|
||||
/*
|
||||
* If the start or end address of the shadow region is not aligned to
|
||||
* the root level size, we have to allocate a temporary next-level table
|
||||
* in each case, clone the next level of descriptors, and install the
|
||||
* table into tmp_pg_dir. Note that with 5 levels of paging, the next
|
||||
* level will in fact be p4d_t, but that makes no difference in this
|
||||
* case.
|
||||
*/
|
||||
if (!root_level_aligned(KASAN_SHADOW_START))
|
||||
clone_next_level(KASAN_SHADOW_START, tmp_pg_dir, pud[0]);
|
||||
if (!root_level_aligned(KASAN_SHADOW_END))
|
||||
clone_next_level(KASAN_SHADOW_END, tmp_pg_dir, pud[1]);
|
||||
dsb(ishst);
|
||||
cpu_replace_ttbr1(lm_alias(tmp_pg_dir));
|
||||
|
||||
clear_shadow(KASAN_SHADOW_START, KASAN_SHADOW_END);
|
||||
|
||||
kasan_map_populate(kimg_shadow_start, kimg_shadow_end,
|
||||
early_pfn_to_nid(virt_to_pfn(lm_alias(KERNEL_START))));
|
||||
|
@ -276,7 +371,7 @@ static void __init kasan_init_shadow(void)
|
|||
PAGE_KERNEL_RO));
|
||||
|
||||
memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
|
||||
cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
|
||||
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
|
||||
}
|
||||
|
||||
static void __init kasan_init_depth(void)
|
||||
|
|
|
@ -73,6 +73,10 @@ static int __init adjust_protection_map(void)
|
|||
protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY;
|
||||
}
|
||||
|
||||
if (lpa2_is_enabled())
|
||||
for (int i = 0; i < ARRAY_SIZE(protection_map); i++)
|
||||
pgprot_val(protection_map[i]) &= ~PTE_SHARED;
|
||||
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(adjust_protection_map);
|
||||
|
|
|
@ -45,18 +45,13 @@
|
|||
#define NO_CONT_MAPPINGS BIT(1)
|
||||
#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */
|
||||
|
||||
int idmap_t0sz __ro_after_init;
|
||||
|
||||
#if VA_BITS > 48
|
||||
u64 vabits_actual __ro_after_init = VA_BITS_MIN;
|
||||
EXPORT_SYMBOL(vabits_actual);
|
||||
#endif
|
||||
|
||||
u64 kimage_voffset __ro_after_init;
|
||||
EXPORT_SYMBOL(kimage_voffset);
|
||||
|
||||
u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 };
|
||||
|
||||
static bool rodata_is_rw __ro_after_init = true;
|
||||
|
||||
/*
|
||||
* The booting CPU updates the failed status @__early_cpu_boot_status,
|
||||
* with MMU turned off.
|
||||
|
@ -73,10 +68,21 @@ EXPORT_SYMBOL(empty_zero_page);
|
|||
static DEFINE_SPINLOCK(swapper_pgdir_lock);
|
||||
static DEFINE_MUTEX(fixmap_lock);
|
||||
|
||||
void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
void noinstr set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
pgd_t *fixmap_pgdp;
|
||||
|
||||
/*
|
||||
* Don't bother with the fixmap if swapper_pg_dir is still mapped
|
||||
* writable in the kernel mapping.
|
||||
*/
|
||||
if (rodata_is_rw) {
|
||||
WRITE_ONCE(*pgdp, pgd);
|
||||
dsb(ishst);
|
||||
isb();
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&swapper_pgdir_lock);
|
||||
fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp));
|
||||
WRITE_ONCE(*fixmap_pgdp, pgd);
|
||||
|
@ -307,15 +313,14 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
|
|||
} while (addr = next, addr != end);
|
||||
}
|
||||
|
||||
static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
|
||||
static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
|
||||
phys_addr_t phys, pgprot_t prot,
|
||||
phys_addr_t (*pgtable_alloc)(int),
|
||||
int flags)
|
||||
{
|
||||
unsigned long next;
|
||||
pud_t *pudp;
|
||||
p4d_t *p4dp = p4d_offset(pgdp, addr);
|
||||
p4d_t p4d = READ_ONCE(*p4dp);
|
||||
pud_t *pudp;
|
||||
|
||||
if (p4d_none(p4d)) {
|
||||
p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN;
|
||||
|
@ -363,6 +368,46 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
|
|||
pud_clear_fixmap();
|
||||
}
|
||||
|
||||
static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
|
||||
phys_addr_t phys, pgprot_t prot,
|
||||
phys_addr_t (*pgtable_alloc)(int),
|
||||
int flags)
|
||||
{
|
||||
unsigned long next;
|
||||
pgd_t pgd = READ_ONCE(*pgdp);
|
||||
p4d_t *p4dp;
|
||||
|
||||
if (pgd_none(pgd)) {
|
||||
pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_UXN;
|
||||
phys_addr_t p4d_phys;
|
||||
|
||||
if (flags & NO_EXEC_MAPPINGS)
|
||||
pgdval |= PGD_TABLE_PXN;
|
||||
BUG_ON(!pgtable_alloc);
|
||||
p4d_phys = pgtable_alloc(P4D_SHIFT);
|
||||
__pgd_populate(pgdp, p4d_phys, pgdval);
|
||||
pgd = READ_ONCE(*pgdp);
|
||||
}
|
||||
BUG_ON(pgd_bad(pgd));
|
||||
|
||||
p4dp = p4d_set_fixmap_offset(pgdp, addr);
|
||||
do {
|
||||
p4d_t old_p4d = READ_ONCE(*p4dp);
|
||||
|
||||
next = p4d_addr_end(addr, end);
|
||||
|
||||
alloc_init_pud(p4dp, addr, next, phys, prot,
|
||||
pgtable_alloc, flags);
|
||||
|
||||
BUG_ON(p4d_val(old_p4d) != 0 &&
|
||||
p4d_val(old_p4d) != READ_ONCE(p4d_val(*p4dp)));
|
||||
|
||||
phys += next - addr;
|
||||
} while (p4dp++, addr = next, addr != end);
|
||||
|
||||
p4d_clear_fixmap();
|
||||
}
|
||||
|
||||
static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
|
||||
unsigned long virt, phys_addr_t size,
|
||||
pgprot_t prot,
|
||||
|
@ -385,7 +430,7 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
|
|||
|
||||
do {
|
||||
next = pgd_addr_end(addr, end);
|
||||
alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
|
||||
alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc,
|
||||
flags);
|
||||
phys += next - addr;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
|
@ -576,8 +621,12 @@ static void __init map_mem(pgd_t *pgdp)
|
|||
* entries at any level are being shared between the linear region and
|
||||
* the vmalloc region. Check whether this is true for the PGD level, in
|
||||
* which case it is guaranteed to be true for all other levels as well.
|
||||
* (Unless we are running with support for LPA2, in which case the
|
||||
* entire reduced VA space is covered by a single pgd_t which will have
|
||||
* been populated without the PXNTable attribute by the time we get here.)
|
||||
*/
|
||||
BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
|
||||
BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end) &&
|
||||
pgd_index(_PAGE_OFFSET(VA_BITS_MIN)) != PTRS_PER_PGD - 1);
|
||||
|
||||
early_kfence_pool = arm64_kfence_alloc_pool();
|
||||
|
||||
|
@ -630,15 +679,16 @@ void mark_rodata_ro(void)
|
|||
* to cover NOTES and EXCEPTION_TABLE.
|
||||
*/
|
||||
section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
|
||||
WRITE_ONCE(rodata_is_rw, false);
|
||||
update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
|
||||
section_size, PAGE_KERNEL_RO);
|
||||
|
||||
debug_checkwx();
|
||||
}
|
||||
|
||||
static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
|
||||
pgprot_t prot, struct vm_struct *vma,
|
||||
int flags, unsigned long vm_flags)
|
||||
static void __init declare_vma(struct vm_struct *vma,
|
||||
void *va_start, void *va_end,
|
||||
unsigned long vm_flags)
|
||||
{
|
||||
phys_addr_t pa_start = __pa_symbol(va_start);
|
||||
unsigned long size = va_end - va_start;
|
||||
|
@ -646,9 +696,6 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
|
|||
BUG_ON(!PAGE_ALIGNED(pa_start));
|
||||
BUG_ON(!PAGE_ALIGNED(size));
|
||||
|
||||
__create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
|
||||
early_pgtable_alloc, flags);
|
||||
|
||||
if (!(vm_flags & VM_NO_GUARD))
|
||||
size += PAGE_SIZE;
|
||||
|
||||
|
@ -661,12 +708,12 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
|
|||
vm_area_add_early(vma);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
|
||||
static pgprot_t kernel_exec_prot(void)
|
||||
{
|
||||
return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
|
||||
static int __init map_entry_trampoline(void)
|
||||
{
|
||||
int i;
|
||||
|
@ -701,80 +748,36 @@ core_initcall(map_entry_trampoline);
|
|||
#endif
|
||||
|
||||
/*
|
||||
* Open coded check for BTI, only for use to determine configuration
|
||||
* for early mappings for before the cpufeature code has run.
|
||||
* Declare the VMA areas for the kernel
|
||||
*/
|
||||
static bool arm64_early_this_cpu_has_bti(void)
|
||||
static void __init declare_kernel_vmas(void)
|
||||
{
|
||||
u64 pfr1;
|
||||
static struct vm_struct vmlinux_seg[KERNEL_SEGMENT_COUNT];
|
||||
|
||||
if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
|
||||
return false;
|
||||
|
||||
pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1);
|
||||
return cpuid_feature_extract_unsigned_field(pfr1,
|
||||
ID_AA64PFR1_EL1_BT_SHIFT);
|
||||
declare_vma(&vmlinux_seg[0], _stext, _etext, VM_NO_GUARD);
|
||||
declare_vma(&vmlinux_seg[1], __start_rodata, __inittext_begin, VM_NO_GUARD);
|
||||
declare_vma(&vmlinux_seg[2], __inittext_begin, __inittext_end, VM_NO_GUARD);
|
||||
declare_vma(&vmlinux_seg[3], __initdata_begin, __initdata_end, VM_NO_GUARD);
|
||||
declare_vma(&vmlinux_seg[4], _data, _end, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create fine-grained mappings for the kernel.
|
||||
*/
|
||||
static void __init map_kernel(pgd_t *pgdp)
|
||||
{
|
||||
static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
|
||||
vmlinux_initdata, vmlinux_data;
|
||||
void __pi_map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
|
||||
int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
|
||||
|
||||
/*
|
||||
* External debuggers may need to write directly to the text
|
||||
* mapping to install SW breakpoints. Allow this (only) when
|
||||
* explicitly requested with rodata=off.
|
||||
*/
|
||||
pgprot_t text_prot = kernel_exec_prot();
|
||||
|
||||
/*
|
||||
* If we have a CPU that supports BTI and a kernel built for
|
||||
* BTI then mark the kernel executable text as guarded pages
|
||||
* now so we don't have to rewrite the page tables later.
|
||||
*/
|
||||
if (arm64_early_this_cpu_has_bti())
|
||||
text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
|
||||
|
||||
/*
|
||||
* Only rodata will be remapped with different permissions later on,
|
||||
* all other segments are allowed to use contiguous mappings.
|
||||
*/
|
||||
map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0,
|
||||
VM_NO_GUARD);
|
||||
map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
|
||||
&vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
|
||||
map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
|
||||
&vmlinux_inittext, 0, VM_NO_GUARD);
|
||||
map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
|
||||
&vmlinux_initdata, 0, VM_NO_GUARD);
|
||||
map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
|
||||
|
||||
fixmap_copy(pgdp);
|
||||
kasan_copy_shadow(pgdp);
|
||||
}
|
||||
static u8 idmap_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init,
|
||||
kpti_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init;
|
||||
|
||||
static void __init create_idmap(void)
|
||||
{
|
||||
u64 start = __pa_symbol(__idmap_text_start);
|
||||
u64 size = __pa_symbol(__idmap_text_end) - start;
|
||||
pgd_t *pgd = idmap_pg_dir;
|
||||
u64 pgd_phys;
|
||||
u64 end = __pa_symbol(__idmap_text_end);
|
||||
u64 ptep = __pa_symbol(idmap_ptes);
|
||||
|
||||
/* check if we need an additional level of translation */
|
||||
if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) {
|
||||
pgd_phys = early_pgtable_alloc(PAGE_SHIFT);
|
||||
set_pgd(&idmap_pg_dir[start >> VA_BITS],
|
||||
__pgd(pgd_phys | P4D_TYPE_TABLE));
|
||||
pgd = __va(pgd_phys);
|
||||
}
|
||||
__create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX,
|
||||
early_pgtable_alloc, 0);
|
||||
__pi_map_range(&ptep, start, end, start, PAGE_KERNEL_ROX,
|
||||
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
|
||||
__phys_to_virt(ptep) - ptep);
|
||||
|
||||
if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
|
||||
if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && !arm64_use_ng_mappings) {
|
||||
extern u32 __idmap_kpti_flag;
|
||||
u64 pa = __pa_symbol(&__idmap_kpti_flag);
|
||||
|
||||
|
@ -782,32 +785,21 @@ static void __init create_idmap(void)
|
|||
* The KPTI G-to-nG conversion code needs a read-write mapping
|
||||
* of its synchronization flag in the ID map.
|
||||
*/
|
||||
__create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL,
|
||||
early_pgtable_alloc, 0);
|
||||
ptep = __pa_symbol(kpti_ptes);
|
||||
__pi_map_range(&ptep, pa, pa + sizeof(u32), pa, PAGE_KERNEL,
|
||||
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
|
||||
__phys_to_virt(ptep) - ptep);
|
||||
}
|
||||
}
|
||||
|
||||
void __init paging_init(void)
|
||||
{
|
||||
pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
|
||||
extern pgd_t init_idmap_pg_dir[];
|
||||
|
||||
idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0));
|
||||
|
||||
map_kernel(pgdp);
|
||||
map_mem(pgdp);
|
||||
|
||||
pgd_clear_fixmap();
|
||||
|
||||
cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir);
|
||||
init_mm.pgd = swapper_pg_dir;
|
||||
|
||||
memblock_phys_free(__pa_symbol(init_pg_dir),
|
||||
__pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
|
||||
map_mem(swapper_pg_dir);
|
||||
|
||||
memblock_allow_resize();
|
||||
|
||||
create_idmap();
|
||||
declare_kernel_vmas();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
|
@ -1073,10 +1065,10 @@ static void free_empty_pud_table(p4d_t *p4dp, unsigned long addr,
|
|||
free_empty_pmd_table(pudp, addr, next, floor, ceiling);
|
||||
} while (addr = next, addr < end);
|
||||
|
||||
if (CONFIG_PGTABLE_LEVELS <= 3)
|
||||
if (!pgtable_l4_enabled())
|
||||
return;
|
||||
|
||||
if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK))
|
||||
if (!pgtable_range_aligned(start, end, floor, ceiling, P4D_MASK))
|
||||
return;
|
||||
|
||||
/*
|
||||
|
@ -1099,8 +1091,8 @@ static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr,
|
|||
unsigned long end, unsigned long floor,
|
||||
unsigned long ceiling)
|
||||
{
|
||||
unsigned long next;
|
||||
p4d_t *p4dp, p4d;
|
||||
unsigned long i, next, start = addr;
|
||||
|
||||
do {
|
||||
next = p4d_addr_end(addr, end);
|
||||
|
@ -1112,6 +1104,27 @@ static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr,
|
|||
WARN_ON(!p4d_present(p4d));
|
||||
free_empty_pud_table(p4dp, addr, next, floor, ceiling);
|
||||
} while (addr = next, addr < end);
|
||||
|
||||
if (!pgtable_l5_enabled())
|
||||
return;
|
||||
|
||||
if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Check whether we can free the p4d page if the rest of the
|
||||
* entries are empty. Overlap with other regions have been
|
||||
* handled by the floor/ceiling check.
|
||||
*/
|
||||
p4dp = p4d_offset(pgdp, 0UL);
|
||||
for (i = 0; i < PTRS_PER_P4D; i++) {
|
||||
if (!p4d_none(READ_ONCE(p4dp[i])))
|
||||
return;
|
||||
}
|
||||
|
||||
pgd_clear(pgdp);
|
||||
__flush_tlb_kernel_pgtable(start);
|
||||
free_hotplug_pgtable_page(virt_to_page(p4dp));
|
||||
}
|
||||
|
||||
static void free_empty_tables(unsigned long addr, unsigned long end,
|
||||
|
@ -1196,6 +1209,12 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
|
|||
return 1;
|
||||
}
|
||||
|
||||
#ifndef __PAGETABLE_P4D_FOLDED
|
||||
void p4d_clear_huge(p4d_t *p4dp)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
int pud_clear_huge(pud_t *pudp)
|
||||
{
|
||||
if (!pud_sect(READ_ONCE(*pudp)))
|
||||
|
@ -1486,3 +1505,35 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte
|
|||
{
|
||||
set_pte_at(vma->vm_mm, addr, ptep, pte);
|
||||
}
|
||||
|
||||
/*
|
||||
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
|
||||
* avoiding the possibility of conflicting TLB entries being allocated.
|
||||
*/
|
||||
void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp)
|
||||
{
|
||||
typedef void (ttbr_replace_func)(phys_addr_t);
|
||||
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
|
||||
ttbr_replace_func *replace_phys;
|
||||
unsigned long daif;
|
||||
|
||||
/* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
|
||||
phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));
|
||||
|
||||
if (cnp)
|
||||
ttbr1 |= TTBR_CNP_BIT;
|
||||
|
||||
replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
|
||||
|
||||
cpu_install_idmap();
|
||||
|
||||
/*
|
||||
* We really don't want to take *any* exceptions while TTBR1 is
|
||||
* in the process of being replaced so mask everything.
|
||||
*/
|
||||
daif = local_daif_save();
|
||||
replace_phys(ttbr1);
|
||||
local_daif_restore(daif);
|
||||
|
||||
cpu_uninstall_idmap();
|
||||
}
|
||||
|
|
|
@ -17,11 +17,22 @@
|
|||
|
||||
static struct kmem_cache *pgd_cache __ro_after_init;
|
||||
|
||||
static bool pgdir_is_page_size(void)
|
||||
{
|
||||
if (PGD_SIZE == PAGE_SIZE)
|
||||
return true;
|
||||
if (CONFIG_PGTABLE_LEVELS == 4)
|
||||
return !pgtable_l4_enabled();
|
||||
if (CONFIG_PGTABLE_LEVELS == 5)
|
||||
return !pgtable_l5_enabled();
|
||||
return false;
|
||||
}
|
||||
|
||||
pgd_t *pgd_alloc(struct mm_struct *mm)
|
||||
{
|
||||
gfp_t gfp = GFP_PGTABLE_USER;
|
||||
|
||||
if (PGD_SIZE == PAGE_SIZE)
|
||||
if (pgdir_is_page_size())
|
||||
return (pgd_t *)__get_free_page(gfp);
|
||||
else
|
||||
return kmem_cache_alloc(pgd_cache, gfp);
|
||||
|
@ -29,7 +40,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
|
|||
|
||||
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
||||
{
|
||||
if (PGD_SIZE == PAGE_SIZE)
|
||||
if (pgdir_is_page_size())
|
||||
free_page((unsigned long)pgd);
|
||||
else
|
||||
kmem_cache_free(pgd_cache, pgd);
|
||||
|
@ -37,7 +48,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
|||
|
||||
void __init pgtable_cache_init(void)
|
||||
{
|
||||
if (PGD_SIZE == PAGE_SIZE)
|
||||
if (pgdir_is_page_size())
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_ARM64_PA_BITS_52
|
||||
|
|
|
@ -195,27 +195,36 @@ SYM_TYPED_FUNC_START(idmap_cpu_replace_ttbr1)
|
|||
|
||||
ret
|
||||
SYM_FUNC_END(idmap_cpu_replace_ttbr1)
|
||||
SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1)
|
||||
.popsection
|
||||
|
||||
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
|
||||
|
||||
#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE)
|
||||
#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | PTE_TYPE_PAGE | \
|
||||
PTE_AF | PTE_SHARED | PTE_UXN | PTE_WRITE)
|
||||
|
||||
.pushsection ".idmap.text", "a"
|
||||
|
||||
.macro pte_to_phys, phys, pte
|
||||
and \phys, \pte, #PTE_ADDR_LOW
|
||||
#ifdef CONFIG_ARM64_PA_BITS_52
|
||||
and \pte, \pte, #PTE_ADDR_HIGH
|
||||
orr \phys, \phys, \pte, lsl #PTE_ADDR_HIGH_SHIFT
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro kpti_mk_tbl_ng, type, num_entries
|
||||
add end_\type\()p, cur_\type\()p, #\num_entries * 8
|
||||
.Ldo_\type:
|
||||
ldr \type, [cur_\type\()p] // Load the entry
|
||||
ldr \type, [cur_\type\()p], #8 // Load the entry and advance
|
||||
tbz \type, #0, .Lnext_\type // Skip invalid and
|
||||
tbnz \type, #11, .Lnext_\type // non-global entries
|
||||
orr \type, \type, #PTE_NG // Same bit for blocks and pages
|
||||
str \type, [cur_\type\()p] // Update the entry
|
||||
str \type, [cur_\type\()p, #-8] // Update the entry
|
||||
.ifnc \type, pte
|
||||
tbnz \type, #1, .Lderef_\type
|
||||
.endif
|
||||
.Lnext_\type:
|
||||
add cur_\type\()p, cur_\type\()p, #8
|
||||
cmp cur_\type\()p, end_\type\()p
|
||||
b.ne .Ldo_\type
|
||||
.endm
|
||||
|
@ -225,18 +234,18 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1)
|
|||
* fixmap slot associated with the current level.
|
||||
*/
|
||||
.macro kpti_map_pgtbl, type, level
|
||||
str xzr, [temp_pte, #8 * (\level + 1)] // break before make
|
||||
str xzr, [temp_pte, #8 * (\level + 2)] // break before make
|
||||
dsb nshst
|
||||
add pte, temp_pte, #PAGE_SIZE * (\level + 1)
|
||||
add pte, temp_pte, #PAGE_SIZE * (\level + 2)
|
||||
lsr pte, pte, #12
|
||||
tlbi vaae1, pte
|
||||
dsb nsh
|
||||
isb
|
||||
|
||||
phys_to_pte pte, cur_\type\()p
|
||||
add cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 1)
|
||||
add cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 2)
|
||||
orr pte, pte, pte_flags
|
||||
str pte, [temp_pte, #8 * (\level + 1)]
|
||||
str pte, [temp_pte, #8 * (\level + 2)]
|
||||
dsb nshst
|
||||
.endm
|
||||
|
||||
|
@ -269,6 +278,8 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
|
|||
end_ptep .req x15
|
||||
pte .req x16
|
||||
valid .req x17
|
||||
cur_p4dp .req x19
|
||||
end_p4dp .req x20
|
||||
|
||||
mov x5, x3 // preserve temp_pte arg
|
||||
mrs swapper_ttb, ttbr1_el1
|
||||
|
@ -276,6 +287,12 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
|
|||
|
||||
cbnz cpu, __idmap_kpti_secondary
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
stp x29, x30, [sp, #-32]!
|
||||
mov x29, sp
|
||||
stp x19, x20, [sp, #16]
|
||||
#endif
|
||||
|
||||
/* We're the boot CPU. Wait for the others to catch up */
|
||||
sevl
|
||||
1: wfe
|
||||
|
@ -293,9 +310,32 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
|
|||
mov_q pte_flags, KPTI_NG_PTE_FLAGS
|
||||
|
||||
/* Everybody is enjoying the idmap, so we can rewrite swapper. */
|
||||
|
||||
#ifdef CONFIG_ARM64_LPA2
|
||||
/*
|
||||
* If LPA2 support is configured, but 52-bit virtual addressing is not
|
||||
* enabled at runtime, we will fall back to one level of paging less,
|
||||
* and so we have to walk swapper_pg_dir as if we dereferenced its
|
||||
* address from a PGD level entry, and terminate the PGD level loop
|
||||
* right after.
|
||||
*/
|
||||
adrp pgd, swapper_pg_dir // walk &swapper_pg_dir at the next level
|
||||
mov cur_pgdp, end_pgdp // must be equal to terminate the PGD loop
|
||||
alternative_if_not ARM64_HAS_VA52
|
||||
b .Lderef_pgd // skip to the next level
|
||||
alternative_else_nop_endif
|
||||
/*
|
||||
* LPA2 based 52-bit virtual addressing requires 52-bit physical
|
||||
* addressing to be enabled as well. In this case, the shareability
|
||||
* bits are repurposed as physical address bits, and should not be
|
||||
* set in pte_flags.
|
||||
*/
|
||||
bic pte_flags, pte_flags, #PTE_SHARED
|
||||
#endif
|
||||
|
||||
/* PGD */
|
||||
adrp cur_pgdp, swapper_pg_dir
|
||||
kpti_map_pgtbl pgd, 0
|
||||
kpti_map_pgtbl pgd, -1
|
||||
kpti_mk_tbl_ng pgd, PTRS_PER_PGD
|
||||
|
||||
/* Ensure all the updated entries are visible to secondary CPUs */
|
||||
|
@ -308,16 +348,33 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
|
|||
|
||||
/* Set the flag to zero to indicate that we're all done */
|
||||
str wzr, [flag_ptr]
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
ldp x19, x20, [sp, #16]
|
||||
ldp x29, x30, [sp], #32
|
||||
#endif
|
||||
ret
|
||||
|
||||
.Lderef_pgd:
|
||||
/* P4D */
|
||||
.if CONFIG_PGTABLE_LEVELS > 4
|
||||
p4d .req x30
|
||||
pte_to_phys cur_p4dp, pgd
|
||||
kpti_map_pgtbl p4d, 0
|
||||
kpti_mk_tbl_ng p4d, PTRS_PER_P4D
|
||||
b .Lnext_pgd
|
||||
.else /* CONFIG_PGTABLE_LEVELS <= 4 */
|
||||
p4d .req pgd
|
||||
.set .Lnext_p4d, .Lnext_pgd
|
||||
.endif
|
||||
|
||||
.Lderef_p4d:
|
||||
/* PUD */
|
||||
.if CONFIG_PGTABLE_LEVELS > 3
|
||||
pud .req x10
|
||||
pte_to_phys cur_pudp, pgd
|
||||
pte_to_phys cur_pudp, p4d
|
||||
kpti_map_pgtbl pud, 1
|
||||
kpti_mk_tbl_ng pud, PTRS_PER_PUD
|
||||
b .Lnext_pgd
|
||||
b .Lnext_p4d
|
||||
.else /* CONFIG_PGTABLE_LEVELS <= 3 */
|
||||
pud .req pgd
|
||||
.set .Lnext_pud, .Lnext_pgd
|
||||
|
@ -361,6 +418,9 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
|
|||
.unreq end_ptep
|
||||
.unreq pte
|
||||
.unreq valid
|
||||
.unreq cur_p4dp
|
||||
.unreq end_p4dp
|
||||
.unreq p4d
|
||||
|
||||
/* Secondary CPUs end up here */
|
||||
__idmap_kpti_secondary:
|
||||
|
@ -395,8 +455,6 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings)
|
|||
*
|
||||
* Initialise the processor for turning the MMU on.
|
||||
*
|
||||
* Input:
|
||||
* x0 - actual number of VA bits (ignored unless VA_BITS > 48)
|
||||
* Output:
|
||||
* Return in x0 the value of the SCTLR_EL1 register.
|
||||
*/
|
||||
|
@ -420,20 +478,21 @@ SYM_FUNC_START(__cpu_setup)
|
|||
mair .req x17
|
||||
tcr .req x16
|
||||
mov_q mair, MAIR_EL1_SET
|
||||
mov_q tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
|
||||
TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
|
||||
TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
|
||||
mov_q tcr, TCR_T0SZ(IDMAP_VA_BITS) | TCR_T1SZ(VA_BITS_MIN) | TCR_CACHE_FLAGS | \
|
||||
TCR_SMP_FLAGS | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
|
||||
TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
|
||||
|
||||
tcr_clear_errata_bits tcr, x9, x5
|
||||
|
||||
#ifdef CONFIG_ARM64_VA_BITS_52
|
||||
sub x9, xzr, x0
|
||||
add x9, x9, #64
|
||||
mov x9, #64 - VA_BITS
|
||||
alternative_if ARM64_HAS_VA52
|
||||
tcr_set_t1sz tcr, x9
|
||||
#else
|
||||
idmap_get_t0sz x9
|
||||
#ifdef CONFIG_ARM64_LPA2
|
||||
orr tcr, tcr, #TCR_DS
|
||||
#endif
|
||||
alternative_else_nop_endif
|
||||
#endif
|
||||
tcr_set_t0sz tcr, x9
|
||||
|
||||
/*
|
||||
* Set the IPS bits in TCR_EL1.
|
||||
|
@ -458,11 +517,26 @@ SYM_FUNC_START(__cpu_setup)
|
|||
ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4
|
||||
cbz x1, .Lskip_indirection
|
||||
|
||||
/*
|
||||
* The PROT_* macros describing the various memory types may resolve to
|
||||
* C expressions if they include the PTE_MAYBE_* macros, and so they
|
||||
* can only be used from C code. The PIE_E* constants below are also
|
||||
* defined in terms of those macros, but will mask out those
|
||||
* PTE_MAYBE_* constants, whether they are set or not. So #define them
|
||||
* as 0x0 here so we can evaluate the PIE_E* constants in asm context.
|
||||
*/
|
||||
|
||||
#define PTE_MAYBE_NG 0
|
||||
#define PTE_MAYBE_SHARED 0
|
||||
|
||||
mov_q x0, PIE_E0
|
||||
msr REG_PIRE0_EL1, x0
|
||||
mov_q x0, PIE_E1
|
||||
msr REG_PIR_EL1, x0
|
||||
|
||||
#undef PTE_MAYBE_NG
|
||||
#undef PTE_MAYBE_SHARED
|
||||
|
||||
mov x0, TCR2_EL1x_PIE
|
||||
msr REG_TCR2_EL1, x0
|
||||
|
||||
|
@ -472,6 +546,12 @@ SYM_FUNC_START(__cpu_setup)
|
|||
* Prepare SCTLR
|
||||
*/
|
||||
mov_q x0, INIT_SCTLR_EL1_MMU_ON
|
||||
#ifdef CONFIG_ARM64_WXN
|
||||
ldr_l x1, arm64_sw_feature_override + FTR_OVR_VAL_OFFSET
|
||||
tst x1, #0xf << ARM64_SW_FEATURE_OVERRIDE_NOWXN
|
||||
orr x1, x0, #SCTLR_ELx_WXN
|
||||
csel x0, x0, x1, ne
|
||||
#endif
|
||||
ret // return to head.S
|
||||
|
||||
.unreq mair
|
||||
|
|
|
@ -48,6 +48,7 @@ struct pg_state {
|
|||
struct ptdump_state ptdump;
|
||||
struct seq_file *seq;
|
||||
const struct addr_marker *marker;
|
||||
const struct mm_struct *mm;
|
||||
unsigned long start_address;
|
||||
int level;
|
||||
u64 current_prot;
|
||||
|
@ -144,12 +145,12 @@ static const struct prot_bits pte_bits[] = {
|
|||
|
||||
struct pg_level {
|
||||
const struct prot_bits *bits;
|
||||
const char *name;
|
||||
size_t num;
|
||||
char name[4];
|
||||
int num;
|
||||
u64 mask;
|
||||
};
|
||||
|
||||
static struct pg_level pg_level[] = {
|
||||
static struct pg_level pg_level[] __ro_after_init = {
|
||||
{ /* pgd */
|
||||
.name = "PGD",
|
||||
.bits = pte_bits,
|
||||
|
@ -159,11 +160,11 @@ static struct pg_level pg_level[] = {
|
|||
.bits = pte_bits,
|
||||
.num = ARRAY_SIZE(pte_bits),
|
||||
}, { /* pud */
|
||||
.name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD",
|
||||
.name = "PUD",
|
||||
.bits = pte_bits,
|
||||
.num = ARRAY_SIZE(pte_bits),
|
||||
}, { /* pmd */
|
||||
.name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD",
|
||||
.name = "PMD",
|
||||
.bits = pte_bits,
|
||||
.num = ARRAY_SIZE(pte_bits),
|
||||
}, { /* pte */
|
||||
|
@ -227,6 +228,11 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
|
|||
static const char units[] = "KMGTPE";
|
||||
u64 prot = 0;
|
||||
|
||||
/* check if the current level has been folded dynamically */
|
||||
if ((level == 1 && mm_p4d_folded(st->mm)) ||
|
||||
(level == 2 && mm_pud_folded(st->mm)))
|
||||
level = 0;
|
||||
|
||||
if (level >= 0)
|
||||
prot = val & pg_level[level].mask;
|
||||
|
||||
|
@ -288,6 +294,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
|
|||
st = (struct pg_state){
|
||||
.seq = s,
|
||||
.marker = info->markers,
|
||||
.mm = info->mm,
|
||||
.level = -1,
|
||||
.ptdump = {
|
||||
.note_page = note_page,
|
||||
|
@ -313,7 +320,6 @@ static void __init ptdump_initialize(void)
|
|||
|
||||
static struct ptdump_info kernel_ptdump_info __ro_after_init = {
|
||||
.mm = &init_mm,
|
||||
.base_addr = PAGE_OFFSET,
|
||||
};
|
||||
|
||||
void ptdump_check_wx(void)
|
||||
|
@ -329,7 +335,7 @@ void ptdump_check_wx(void)
|
|||
.ptdump = {
|
||||
.note_page = note_page,
|
||||
.range = (struct ptdump_range[]) {
|
||||
{PAGE_OFFSET, ~0UL},
|
||||
{_PAGE_OFFSET(vabits_actual), ~0UL},
|
||||
{0, 0}
|
||||
}
|
||||
}
|
||||
|
@ -370,6 +376,7 @@ static int __init ptdump_init(void)
|
|||
static struct addr_marker address_markers[ARRAY_SIZE(m)] __ro_after_init;
|
||||
|
||||
kernel_ptdump_info.markers = memcpy(address_markers, m, sizeof(m));
|
||||
kernel_ptdump_info.base_addr = page_offset;
|
||||
|
||||
ptdump_initialize();
|
||||
ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables");
|
||||
|
|
|
@ -51,6 +51,7 @@ HAS_STAGE2_FWB
|
|||
HAS_TCR2
|
||||
HAS_TIDCP1
|
||||
HAS_TLB_RANGE
|
||||
HAS_VA52
|
||||
HAS_VIRT_HOST_EXTN
|
||||
HAS_WFXT
|
||||
HW_DBM
|
||||
|
|
|
@ -1573,16 +1573,16 @@ Enum 35:32 TGRAN16_2
|
|||
0b0010 IMP
|
||||
0b0011 52_BIT
|
||||
EndEnum
|
||||
Enum 31:28 TGRAN4
|
||||
SignedEnum 31:28 TGRAN4
|
||||
0b0000 IMP
|
||||
0b0001 52_BIT
|
||||
0b1111 NI
|
||||
EndEnum
|
||||
Enum 27:24 TGRAN64
|
||||
SignedEnum 27:24 TGRAN64
|
||||
0b0000 IMP
|
||||
0b1111 NI
|
||||
EndEnum
|
||||
Enum 23:20 TGRAN16
|
||||
UnsignedEnum 23:20 TGRAN16
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
0b0010 52_BIT
|
||||
|
@ -1730,7 +1730,7 @@ Enum 23:20 CCIDX
|
|||
0b0000 32
|
||||
0b0001 64
|
||||
EndEnum
|
||||
Enum 19:16 VARange
|
||||
UnsignedEnum 19:16 VARange
|
||||
0b0000 48
|
||||
0b0001 52
|
||||
EndEnum
|
||||
|
|
|
@ -124,6 +124,21 @@ static inline bool arch_validate_flags(unsigned long flags)
|
|||
#define arch_validate_flags arch_validate_flags
|
||||
#endif
|
||||
|
||||
#ifndef arch_validate_mmap_prot
|
||||
/*
|
||||
* This is called from mmap(), which ignores unknown prot bits so the default
|
||||
* is to accept anything.
|
||||
*
|
||||
* Returns true if the prot flags are valid
|
||||
*/
|
||||
static inline bool arch_validate_mmap_prot(unsigned long prot,
|
||||
unsigned long addr)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#define arch_validate_mmap_prot arch_validate_mmap_prot
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Optimisation macro. It is equivalent to:
|
||||
* (x & bit1) ? bit2 : 0
|
||||
|
|
|
@ -1229,6 +1229,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
|
|||
if (!(file && path_noexec(&file->f_path)))
|
||||
prot |= PROT_EXEC;
|
||||
|
||||
if (!arch_validate_mmap_prot(prot, addr))
|
||||
return -EACCES;
|
||||
|
||||
/* force arch specific MAP_FIXED handling in get_unmapped_area */
|
||||
if (flags & MAP_FIXED_NOREPLACE)
|
||||
flags |= MAP_FIXED;
|
||||
|
|
Loading…
Reference in a new issue