diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2522b11e593f..1f5f9c1c96df 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3161,7 +3161,7 @@ improves system performance, but it may also expose users to several CPU vulnerabilities. Equivalent to: nopti [X86,PPC] - kpti=0 [ARM64] + if nokaslr then kpti=0 [ARM64] nospectre_v1 [X86,PPC] nobp=0 [S390] nospectre_v2 [X86,PPC,S390,ARM64] diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index daff882883f9..71ed5fdf718b 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -62,10 +62,12 @@ enum fixed_addresses { #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#ifdef CONFIG_RELOCATABLE + FIX_ENTRY_TRAMP_TEXT4, /* one extra slot for the data page */ +#endif FIX_ENTRY_TRAMP_TEXT3, FIX_ENTRY_TRAMP_TEXT2, FIX_ENTRY_TRAMP_TEXT1, - FIX_ENTRY_TRAMP_DATA, #define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ __end_of_permanent_fixed_addresses, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 90018643d424..8c300fad03a0 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1646,14 +1646,34 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, } #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT)) + +extern +void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags); + +static phys_addr_t kpti_ng_temp_alloc; + +static phys_addr_t kpti_ng_pgd_alloc(int shift) +{ + kpti_ng_temp_alloc -= PAGE_SIZE; + return kpti_ng_temp_alloc; +} + static void __nocfi kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) { - typedef void (kpti_remap_fn)(int, int, phys_addr_t); + typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long); extern kpti_remap_fn idmap_kpti_install_ng_mappings; kpti_remap_fn *remap_fn; int cpu = smp_processor_id(); + int levels = CONFIG_PGTABLE_LEVELS; + int order = order_base_2(levels); + u64 kpti_ng_temp_pgd_pa = 0; + pgd_t *kpti_ng_temp_pgd; + u64 alloc = 0; if (__this_cpu_read(this_cpu_vector) == vectors) { const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI); @@ -1671,12 +1691,40 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) remap_fn = (void *)__pa_symbol(function_nocfi(idmap_kpti_install_ng_mappings)); + if (!cpu) { + alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); + kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE); + kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd); + + // + // Create a minimal page table hierarchy that permits us to map + // the swapper page tables temporarily as we traverse them. + // + // The physical pages are laid out as follows: + // + // +--------+-/-------+-/------ +-\\--------+ + // : PTE[] : | PMD[] : | PUD[] : || PGD[] : + // +--------+-\-------+-\------ +-//--------+ + // ^ + // The first page is mapped into this hierarchy at a PMD_SHIFT + // aligned virtual address, so that we can manipulate the PTE + // level entries while the mapping is active. The first entry + // covers the PTE[] page itself, the remaining entries are free + // to be used as a ad-hoc fixmap. + // + create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc), + KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, + kpti_ng_pgd_alloc, 0); + } + cpu_install_idmap(); - remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir)); + remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA); cpu_uninstall_idmap(); - if (!cpu) + if (!cpu) { + free_pages(alloc, order); arm64_use_ng_mappings = true; + } } #else static void diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5b82b9292400..254fe31c03a0 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -636,18 +636,28 @@ alternative_else_nop_endif */ .endm - .macro tramp_data_page dst - adr_l \dst, .entry.tramp.text - sub \dst, \dst, PAGE_SIZE - .endm - - .macro tramp_data_read_var dst, var -#ifdef CONFIG_RANDOMIZE_BASE - tramp_data_page \dst - add \dst, \dst, #:lo12:__entry_tramp_data_\var - ldr \dst, [\dst] + .macro tramp_data_read_var dst, var +#ifdef CONFIG_RELOCATABLE + ldr \dst, .L__tramp_data_\var + .ifndef .L__tramp_data_\var + .pushsection ".entry.tramp.rodata", "a", %progbits + .align 3 +.L__tramp_data_\var: + .quad \var + .popsection + .endif #else - ldr \dst, =\var + /* + * As !RELOCATABLE implies !RANDOMIZE_BASE the address is always a + * compile time constant (and hence not secret and not worth hiding). + * + * As statically allocated kernel code and data always live in the top + * 47 bits of the address space we can sign-extend bit 47 and avoid an + * instruction to load the upper 16 bits (which must be 0xFFFF). + */ + movz \dst, :abs_g2_s:\var + movk \dst, :abs_g1_nc:\var + movk \dst, :abs_g0_nc:\var #endif .endm @@ -695,7 +705,7 @@ alternative_else_nop_endif msr vbar_el1, x30 isb .else - ldr x30, =vectors + adr_l x30, vectors .endif // \kpti == 1 .if \bhb == BHB_MITIGATION_FW @@ -764,24 +774,7 @@ SYM_CODE_END(tramp_exit_native) SYM_CODE_START(tramp_exit_compat) tramp_exit 32 SYM_CODE_END(tramp_exit_compat) - - .ltorg .popsection // .entry.tramp.text -#ifdef CONFIG_RANDOMIZE_BASE - .pushsection ".rodata", "a" - .align PAGE_SHIFT -SYM_DATA_START(__entry_tramp_data_start) -__entry_tramp_data_vectors: - .quad vectors -#ifdef CONFIG_ARM_SDE_INTERFACE -__entry_tramp_data___sdei_asm_handler: - .quad __sdei_asm_handler -#endif /* CONFIG_ARM_SDE_INTERFACE */ -__entry_tramp_data_this_cpu_vector: - .quad this_cpu_vector -SYM_DATA_END(__entry_tramp_data_start) - .popsection // .rodata -#endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* @@ -932,7 +925,6 @@ NOKPROBE(call_on_irq_stack) * This clobbers x4, __sdei_handler() will restore this from firmware's * copy. */ -.ltorg .pushsection ".entry.tramp.text", "ax" SYM_CODE_START(__sdei_asm_entry_trampoline) mrs x4, ttbr1_el1 @@ -967,7 +959,6 @@ SYM_CODE_START(__sdei_asm_exit_trampoline) 1: sdei_handler_exit exit_mode=x2 SYM_CODE_END(__sdei_asm_exit_trampoline) NOKPROBE(__sdei_asm_exit_trampoline) - .ltorg .popsection // .entry.tramp.text #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 2d4a8f995175..8a078c0ee140 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -115,7 +115,8 @@ jiffies = jiffies_64; __entry_tramp_text_start = .; \ *(.entry.tramp.text) \ . = ALIGN(PAGE_SIZE); \ - __entry_tramp_text_end = .; + __entry_tramp_text_end = .; \ + *(.entry.tramp.rodata) #else #define TRAMP_TEXT #endif diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 626ec32873c6..c5563ff990da 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -388,6 +388,13 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, } while (pgdp++, addr = next, addr != end); } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +extern __alias(__create_pgd_mapping) +void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags); +#endif + static phys_addr_t __pgd_pgtable_alloc(int shift) { void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); @@ -665,13 +672,9 @@ static int __init map_entry_trampoline(void) __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, pa_start + i * PAGE_SIZE, prot); - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { - extern char __entry_tramp_data_start[]; - - __set_fixmap(FIX_ENTRY_TRAMP_DATA, - __pa_symbol(__entry_tramp_data_start), - PAGE_KERNEL_RO); - } + if (IS_ENABLED(CONFIG_RELOCATABLE)) + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, + pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO); return 0; } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 50bbed947bec..972ce8d7f2c5 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -200,25 +201,51 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1) .popsection #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + +#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) + .pushsection ".idmap.text", "awx" - .macro __idmap_kpti_get_pgtable_ent, type - dc cvac, cur_\()\type\()p // Ensure any existing dirty - dmb sy // lines are written back before - ldr \type, [cur_\()\type\()p] // loading the entry - tbz \type, #0, skip_\()\type // Skip invalid and - tbnz \type, #11, skip_\()\type // non-global entries + .macro kpti_mk_tbl_ng, type, num_entries + add end_\type\()p, cur_\type\()p, #\num_entries * 8 +.Ldo_\type: + ldr \type, [cur_\type\()p] // Load the entry + tbz \type, #0, .Lnext_\type // Skip invalid and + tbnz \type, #11, .Lnext_\type // non-global entries + orr \type, \type, #PTE_NG // Same bit for blocks and pages + str \type, [cur_\type\()p] // Update the entry + .ifnc \type, pte + tbnz \type, #1, .Lderef_\type + .endif +.Lnext_\type: + add cur_\type\()p, cur_\type\()p, #8 + cmp cur_\type\()p, end_\type\()p + b.ne .Ldo_\type .endm - .macro __idmap_kpti_put_pgtable_ent_ng, type - orr \type, \type, #PTE_NG // Same bit for blocks and pages - str \type, [cur_\()\type\()p] // Update the entry and ensure - dmb sy // that it is visible to all - dc civac, cur_\()\type\()p // CPUs. + /* + * Dereference the current table entry and map it into the temporary + * fixmap slot associated with the current level. + */ + .macro kpti_map_pgtbl, type, level + str xzr, [temp_pte, #8 * (\level + 1)] // break before make + dsb nshst + add pte, temp_pte, #PAGE_SIZE * (\level + 1) + lsr pte, pte, #12 + tlbi vaae1, pte + dsb nsh + isb + + phys_to_pte pte, cur_\type\()p + add cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 1) + orr pte, pte, pte_flags + str pte, [temp_pte, #8 * (\level + 1)] + dsb nshst .endm /* - * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper) + * void __kpti_install_ng_mappings(int cpu, int num_secondaries, phys_addr_t temp_pgd, + * unsigned long temp_pte_va) * * Called exactly once from stop_machine context by each CPU found during boot. */ @@ -226,8 +253,10 @@ __idmap_kpti_flag: .long 1 SYM_FUNC_START(idmap_kpti_install_ng_mappings) cpu .req w0 + temp_pte .req x0 num_cpus .req w1 - swapper_pa .req x2 + pte_flags .req x1 + temp_pgd_phys .req x2 swapper_ttb .req x3 flag_ptr .req x4 cur_pgdp .req x5 @@ -235,16 +264,15 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) pgd .req x7 cur_pudp .req x8 end_pudp .req x9 - pud .req x10 cur_pmdp .req x11 end_pmdp .req x12 - pmd .req x13 cur_ptep .req x14 end_ptep .req x15 pte .req x16 + valid .req x17 + mov x5, x3 // preserve temp_pte arg mrs swapper_ttb, ttbr1_el1 - restore_ttbr1 swapper_ttb adr flag_ptr, __idmap_kpti_flag cbnz cpu, __idmap_kpti_secondary @@ -256,98 +284,71 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) eor w17, w17, num_cpus cbnz w17, 1b - /* We need to walk swapper, so turn off the MMU. */ - pre_disable_mmu_workaround - mrs x17, sctlr_el1 - bic x17, x17, #SCTLR_ELx_M - msr sctlr_el1, x17 + /* Switch to the temporary page tables on this CPU only */ + __idmap_cpu_set_reserved_ttbr1 x8, x9 + offset_ttbr1 temp_pgd_phys, x8 + msr ttbr1_el1, temp_pgd_phys isb + mov temp_pte, x5 + mov pte_flags, #KPTI_NG_PTE_FLAGS + /* Everybody is enjoying the idmap, so we can rewrite swapper. */ /* PGD */ - mov cur_pgdp, swapper_pa - add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8) -do_pgd: __idmap_kpti_get_pgtable_ent pgd - tbnz pgd, #1, walk_puds -next_pgd: - __idmap_kpti_put_pgtable_ent_ng pgd -skip_pgd: - add cur_pgdp, cur_pgdp, #8 - cmp cur_pgdp, end_pgdp - b.ne do_pgd + adrp cur_pgdp, swapper_pg_dir + kpti_map_pgtbl pgd, 0 + kpti_mk_tbl_ng pgd, PTRS_PER_PGD - /* Publish the updated tables and nuke all the TLBs */ - dsb sy - tlbi vmalle1is - dsb ish + /* Ensure all the updated entries are visible to secondary CPUs */ + dsb ishst + + /* We're done: fire up swapper_pg_dir again */ + __idmap_cpu_set_reserved_ttbr1 x8, x9 + msr ttbr1_el1, swapper_ttb isb - /* We're done: fire up the MMU again */ - mrs x17, sctlr_el1 - orr x17, x17, #SCTLR_ELx_M - set_sctlr_el1 x17 - /* Set the flag to zero to indicate that we're all done */ str wzr, [flag_ptr] ret +.Lderef_pgd: /* PUD */ -walk_puds: - .if CONFIG_PGTABLE_LEVELS > 3 + .if CONFIG_PGTABLE_LEVELS > 3 + pud .req x10 pte_to_phys cur_pudp, pgd - add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8) -do_pud: __idmap_kpti_get_pgtable_ent pud - tbnz pud, #1, walk_pmds -next_pud: - __idmap_kpti_put_pgtable_ent_ng pud -skip_pud: - add cur_pudp, cur_pudp, 8 - cmp cur_pudp, end_pudp - b.ne do_pud - b next_pgd - .else /* CONFIG_PGTABLE_LEVELS <= 3 */ - mov pud, pgd - b walk_pmds -next_pud: - b next_pgd + kpti_map_pgtbl pud, 1 + kpti_mk_tbl_ng pud, PTRS_PER_PUD + b .Lnext_pgd + .else /* CONFIG_PGTABLE_LEVELS <= 3 */ + pud .req pgd + .set .Lnext_pud, .Lnext_pgd .endif +.Lderef_pud: /* PMD */ -walk_pmds: - .if CONFIG_PGTABLE_LEVELS > 2 + .if CONFIG_PGTABLE_LEVELS > 2 + pmd .req x13 pte_to_phys cur_pmdp, pud - add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8) -do_pmd: __idmap_kpti_get_pgtable_ent pmd - tbnz pmd, #1, walk_ptes -next_pmd: - __idmap_kpti_put_pgtable_ent_ng pmd -skip_pmd: - add cur_pmdp, cur_pmdp, #8 - cmp cur_pmdp, end_pmdp - b.ne do_pmd - b next_pud - .else /* CONFIG_PGTABLE_LEVELS <= 2 */ - mov pmd, pud - b walk_ptes -next_pmd: - b next_pud + kpti_map_pgtbl pmd, 2 + kpti_mk_tbl_ng pmd, PTRS_PER_PMD + b .Lnext_pud + .else /* CONFIG_PGTABLE_LEVELS <= 2 */ + pmd .req pgd + .set .Lnext_pmd, .Lnext_pgd .endif +.Lderef_pmd: /* PTE */ -walk_ptes: pte_to_phys cur_ptep, pmd - add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8) -do_pte: __idmap_kpti_get_pgtable_ent pte - __idmap_kpti_put_pgtable_ent_ng pte -skip_pte: - add cur_ptep, cur_ptep, #8 - cmp cur_ptep, end_ptep - b.ne do_pte - b next_pmd + kpti_map_pgtbl pte, 3 + kpti_mk_tbl_ng pte, PTRS_PER_PTE + b .Lnext_pmd .unreq cpu + .unreq temp_pte .unreq num_cpus - .unreq swapper_pa + .unreq pte_flags + .unreq temp_pgd_phys .unreq cur_pgdp .unreq end_pgdp .unreq pgd @@ -360,6 +361,7 @@ skip_pte: .unreq cur_ptep .unreq end_ptep .unreq pte + .unreq valid /* Secondary CPUs end up here */ __idmap_kpti_secondary: @@ -379,7 +381,6 @@ __idmap_kpti_secondary: cbnz w16, 1b /* All done, act like nothing happened */ - offset_ttbr1 swapper_ttb, x16 msr ttbr1_el1, swapper_ttb isb ret