2019-06-03 05:44:50 +00:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2012-03-05 11:49:28 +00:00
|
|
|
/*
|
|
|
|
* Based on arch/arm/include/asm/mmu_context.h
|
|
|
|
*
|
|
|
|
* Copyright (C) 1996 Russell King.
|
|
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
|
|
*/
|
|
|
|
#ifndef __ASM_MMU_CONTEXT_H
|
|
|
|
#define __ASM_MMU_CONTEXT_H
|
|
|
|
|
2017-02-08 20:08:37 +00:00
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
2012-03-05 11:49:28 +00:00
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <linux/sched.h>
|
2017-02-08 17:51:36 +00:00
|
|
|
#include <linux/sched/hotplug.h>
|
2017-02-03 23:16:44 +00:00
|
|
|
#include <linux/mm_types.h>
|
2020-06-09 04:32:42 +00:00
|
|
|
#include <linux/pgtable.h>
|
2012-03-05 11:49:28 +00:00
|
|
|
|
|
|
|
#include <asm/cacheflush.h>
|
2016-09-02 13:54:03 +00:00
|
|
|
#include <asm/cpufeature.h>
|
2022-11-23 18:02:08 +00:00
|
|
|
#include <asm/daifflags.h>
|
2012-03-05 11:49:28 +00:00
|
|
|
#include <asm/proc-fns.h>
|
2024-03-12 20:00:20 +00:00
|
|
|
#include <asm-generic/mm_hooks.h>
|
2012-03-05 11:49:28 +00:00
|
|
|
#include <asm/cputype.h>
|
2016-09-08 12:55:38 +00:00
|
|
|
#include <asm/sysreg.h>
|
2016-01-25 11:44:58 +00:00
|
|
|
#include <asm/tlbflush.h>
|
2012-03-05 11:49:28 +00:00
|
|
|
|
arm64: mm: apply r/o permissions of VM areas to its linear alias as well
On arm64, we use block mappings and contiguous hints to map the linear
region, to minimize the TLB footprint. However, this means that the
entire region is mapped using read/write permissions, which we cannot
modify at page granularity without having to take intrusive measures to
prevent TLB conflicts.
This means the linear aliases of pages belonging to read-only mappings
(executable or otherwise) in the vmalloc region are also mapped read/write,
and could potentially be abused to modify things like module code, bpf JIT
code or other read-only data.
So let's fix this, by extending the set_memory_ro/rw routines to take
the linear alias into account. The consequence of enabling this is
that we can no longer use block mappings or contiguous hints, so in
cases where the TLB footprint of the linear region is a bottleneck,
performance may be affected.
Therefore, allow this feature to be runtime en/disabled, by setting
rodata=full (or 'on' to disable just this enhancement, or 'off' to
disable read-only mappings for code and r/o data entirely) on the
kernel command line. Also, allow the default value to be set via a
Kconfig option.
Tested-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2018-11-07 10:36:20 +00:00
|
|
|
extern bool rodata_full;
|
|
|
|
|
2013-01-17 12:31:45 +00:00
|
|
|
static inline void contextidr_thread_switch(struct task_struct *next)
|
|
|
|
{
|
2016-09-08 12:55:39 +00:00
|
|
|
if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR))
|
|
|
|
return;
|
|
|
|
|
2016-09-08 12:55:38 +00:00
|
|
|
write_sysreg(task_pid_nr(next), contextidr_el1);
|
|
|
|
isb();
|
2013-01-17 12:31:45 +00:00
|
|
|
}
|
|
|
|
|
2012-03-05 11:49:28 +00:00
|
|
|
/*
|
2020-11-03 10:22:29 +00:00
|
|
|
* Set TTBR0 to reserved_pg_dir. No translations will be possible via TTBR0.
|
2012-03-05 11:49:28 +00:00
|
|
|
*/
|
2023-06-13 14:19:59 +00:00
|
|
|
static inline void cpu_set_reserved_ttbr0_nosync(void)
|
2012-03-05 11:49:28 +00:00
|
|
|
{
|
2020-11-03 10:22:29 +00:00
|
|
|
unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
|
2012-03-05 11:49:28 +00:00
|
|
|
|
2016-09-08 12:55:38 +00:00
|
|
|
write_sysreg(ttbr, ttbr0_el1);
|
2023-06-13 14:19:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void cpu_set_reserved_ttbr0(void)
|
|
|
|
{
|
|
|
|
cpu_set_reserved_ttbr0_nosync();
|
2016-09-08 12:55:38 +00:00
|
|
|
isb();
|
2012-03-05 11:49:28 +00:00
|
|
|
}
|
|
|
|
|
2020-02-13 12:14:52 +00:00
|
|
|
void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
|
|
|
|
|
2017-08-10 12:19:09 +00:00
|
|
|
static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
|
|
|
|
{
|
|
|
|
BUG_ON(pgd == swapper_pg_dir);
|
|
|
|
cpu_do_switch_mm(virt_to_phys(pgd),mm);
|
|
|
|
}
|
|
|
|
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
/*
|
arm64: kernel: Create initial ID map from C code
The asm code that creates the initial ID map is rather intricate and
hard to follow. This is problematic because it makes adding support for
things like LPA2 or WXN more difficult than necessary. Also, it is
parameterized like the rest of the MM code to run with a configurable
number of levels, which is rather pointless, given that all AArch64 CPUs
implement support for 48-bit virtual addressing, and that many systems
exist with DRAM located outside of the 39-bit addressable range, which
is the only smaller VA size that is widely used, and we need additional
tricks to make things work in that combination.
So let's bite the bullet, and rip out all the asm macros, and fiddly
code, and replace it with a C implementation based on the newly added
routines for creating the early kernel VA mappings. And while at it,
create the initial ID map based on 48-bit virtual addressing as well,
regardless of the number of configured levels for the kernel proper.
Note that this code may execute with the MMU and caches disabled, and is
therefore not permitted to make unaligned accesses. This shouldn't
generally happen in any case for the algorithm as implemented, but to be
sure, let's pass -mstrict-align to the compiler just in case.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240214122845.2033971-66-ardb+git@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-02-14 12:29:07 +00:00
|
|
|
* TCR.T0SZ value to use when the ID map is active.
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
*/
|
arm64: kernel: Create initial ID map from C code
The asm code that creates the initial ID map is rather intricate and
hard to follow. This is problematic because it makes adding support for
things like LPA2 or WXN more difficult than necessary. Also, it is
parameterized like the rest of the MM code to run with a configurable
number of levels, which is rather pointless, given that all AArch64 CPUs
implement support for 48-bit virtual addressing, and that many systems
exist with DRAM located outside of the 39-bit addressable range, which
is the only smaller VA size that is widely used, and we need additional
tricks to make things work in that combination.
So let's bite the bullet, and rip out all the asm macros, and fiddly
code, and replace it with a C implementation based on the newly added
routines for creating the early kernel VA mappings. And while at it,
create the initial ID map based on 48-bit virtual addressing as well,
regardless of the number of configured levels for the kernel proper.
Note that this code may execute with the MMU and caches disabled, and is
therefore not permitted to make unaligned accesses. This shouldn't
generally happen in any case for the algorithm as implemented, but to be
sure, let's pass -mstrict-align to the compiler just in case.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240214122845.2033971-66-ardb+git@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-02-14 12:29:07 +00:00
|
|
|
#define idmap_t0sz TCR_T0SZ(IDMAP_VA_BITS)
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
|
|
|
|
/*
|
2021-01-25 19:19:12 +00:00
|
|
|
* Ensure TCR.T0SZ is set to the provided value.
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
*/
|
2016-01-25 11:45:00 +00:00
|
|
|
static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
{
|
2021-01-25 19:19:12 +00:00
|
|
|
unsigned long tcr = read_sysreg(tcr_el1);
|
2015-10-06 17:46:21 +00:00
|
|
|
|
2021-01-25 19:19:12 +00:00
|
|
|
if ((tcr & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET == t0sz)
|
2015-10-06 17:46:21 +00:00
|
|
|
return;
|
|
|
|
|
2016-09-08 12:55:38 +00:00
|
|
|
tcr &= ~TCR_T0SZ_MASK;
|
|
|
|
tcr |= t0sz << TCR_T0SZ_OFFSET;
|
|
|
|
write_sysreg(tcr, tcr_el1);
|
|
|
|
isb();
|
arm64: mm: increase VA range of identity map
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.
This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-03-19 16:42:27 +00:00
|
|
|
}
|
|
|
|
|
2019-08-07 15:55:18 +00:00
|
|
|
#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(vabits_actual))
|
2016-01-25 11:45:00 +00:00
|
|
|
#define cpu_set_idmap_tcr_t0sz() __cpu_set_tcr_t0sz(idmap_t0sz)
|
|
|
|
|
2016-01-25 11:44:58 +00:00
|
|
|
/*
|
|
|
|
* Remove the idmap from TTBR0_EL1 and install the pgd of the active mm.
|
|
|
|
*
|
|
|
|
* The idmap lives in the same VA range as userspace, but uses global entries
|
|
|
|
* and may use a different TCR_EL1.T0SZ. To avoid issues resulting from
|
|
|
|
* speculative TLB fetches, we must temporarily install the reserved page
|
|
|
|
* tables while we invalidate the TLBs and set up the correct TCR_EL1.T0SZ.
|
|
|
|
*
|
|
|
|
* If current is a not a user task, the mm covers the TTBR1_EL1 page tables,
|
|
|
|
* which should not be installed in TTBR0_EL1. In this case we can leave the
|
|
|
|
* reserved page tables in place.
|
|
|
|
*/
|
|
|
|
static inline void cpu_uninstall_idmap(void)
|
|
|
|
{
|
|
|
|
struct mm_struct *mm = current->active_mm;
|
|
|
|
|
|
|
|
cpu_set_reserved_ttbr0();
|
|
|
|
local_flush_tlb_all();
|
|
|
|
cpu_set_default_tcr_t0sz();
|
|
|
|
|
2016-09-02 13:54:03 +00:00
|
|
|
if (mm != &init_mm && !system_uses_ttbr0_pan())
|
2016-01-25 11:44:58 +00:00
|
|
|
cpu_switch_mm(mm->pgd, mm);
|
|
|
|
}
|
|
|
|
|
2024-02-14 12:29:10 +00:00
|
|
|
static inline void cpu_install_idmap(void)
|
2016-01-25 11:45:00 +00:00
|
|
|
{
|
|
|
|
cpu_set_reserved_ttbr0();
|
|
|
|
local_flush_tlb_all();
|
|
|
|
cpu_set_idmap_tcr_t0sz();
|
|
|
|
|
2024-02-14 12:29:10 +00:00
|
|
|
cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
|
2016-01-25 11:45:00 +00:00
|
|
|
}
|
|
|
|
|
2021-09-30 14:31:01 +00:00
|
|
|
/*
|
|
|
|
* Load our new page tables. A strict BBM approach requires that we ensure that
|
|
|
|
* TLBs are free of any entries that may overlap with the global mappings we are
|
|
|
|
* about to install.
|
|
|
|
*
|
|
|
|
* For a real hibernate/resume/kexec cycle TTBR0 currently points to a zero
|
|
|
|
* page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI runtime
|
|
|
|
* services), while for a userspace-driven test_resume cycle it points to
|
|
|
|
* userspace page tables (and we must point it at a zero page ourselves).
|
|
|
|
*
|
|
|
|
* We change T0SZ as part of installing the idmap. This is undone by
|
|
|
|
* cpu_uninstall_idmap() in __cpu_suspend_exit().
|
|
|
|
*/
|
|
|
|
static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
|
|
|
|
{
|
|
|
|
cpu_set_reserved_ttbr0();
|
|
|
|
local_flush_tlb_all();
|
|
|
|
__cpu_set_tcr_t0sz(t0sz);
|
|
|
|
|
|
|
|
/* avoid cpu_switch_mm() and its SW-PAN and CNP interactions */
|
|
|
|
write_sysreg(ttbr0, ttbr0_el1);
|
|
|
|
isb();
|
|
|
|
}
|
|
|
|
|
2024-02-14 12:29:10 +00:00
|
|
|
void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp);
|
arm64: mm: add code to safely replace TTBR1_EL1
If page tables are modified without suitable TLB maintenance, the ARM
architecture permits multiple TLB entries to be allocated for the same
VA. When this occurs, it is permitted that TLB conflict aborts are
raised in response to synchronous data/instruction accesses, and/or and
amalgamation of the TLB entries may be used as a result of a TLB lookup.
The presence of conflicting TLB entries may result in a variety of
behaviours detrimental to the system (e.g. erroneous physical addresses
may be used by I-cache fetches and/or page table walks). Some of these
cases may result in unexpected changes of hardware state, and/or result
in the (asynchronous) delivery of SError.
To avoid these issues, we must avoid situations where conflicting
entries may be allocated into TLBs. For user and module mappings we can
follow a strict break-before-make approach, but this cannot work for
modifications to the swapper page tables that cover the kernel text and
data.
Instead, this patch adds code which is intended to be executed from the
idmap, which can safely unmap the swapper page tables as it only
requires the idmap to be active. This enables us to uninstall the active
TTBR1_EL1 entry, invalidate TLBs, then install a new TTBR1_EL1 entry
without potentially unmapping code or data required for the sequence.
This avoids the risk of conflict, but requires that updates are staged
in a copy of the swapper page tables prior to being installed.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-01-25 11:45:01 +00:00
|
|
|
|
arm64: Avoid cpus_have_const_cap() for ARM64_HAS_CNP
In system_supports_cnp() we use cpus_have_const_cap() to check for
ARM64_HAS_CNP, but this is only necessary so that the cpu_enable_cnp()
callback can run prior to alternatives being patched, and otherwise this
is not necessary and alternative_has_cap_*() would be preferable.
For historical reasons, cpus_have_const_cap() is more complicated than
it needs to be. Before cpucaps are finalized, it will perform a bitmap
test of the system_cpucaps bitmap, and once cpucaps are finalized it
will use an alternative branch. This used to be necessary to handle some
race conditions in the window between cpucap detection and the
subsequent patching of alternatives and static branches, where different
branches could be out-of-sync with one another (or w.r.t. alternative
sequences). Now that we use alternative branches instead of static
branches, these are all patched atomically w.r.t. one another, and there
are only a handful of cases that need special care in the window between
cpucap detection and alternative patching.
Due to the above, it would be nice to remove cpus_have_const_cap(), and
migrate callers over to alternative_has_cap_*(), cpus_have_final_cap(),
or cpus_have_cap() depending on when their requirements. This will
remove redundant instructions and improve code generation, and will make
it easier to determine how each callsite will behave before, during, and
after alternative patching.
The cpu_enable_cnp() callback is run immediately after the ARM64_HAS_CNP
cpucap is detected system-wide under setup_system_capabilities(), prior
to alternatives being patched. During this window cpu_enable_cnp() uses
cpu_replace_ttbr1() to set the CNP bit for the swapper_pg_dir in TTBR1.
No other users of the ARM64_HAS_CNP cpucap need the up-to-date value
during this window:
* As KVM isn't initialized yet, kvm_get_vttbr() isn't reachable.
* As cpuidle isn't initialized yet, __cpu_suspend_exit() isn't
reachable.
* At this point all CPUs are using the swapper_pg_dir with a reserved
ASID in TTBR1, and the idmap_pg_dir in TTBR0, so neither
check_and_switch_context() nor cpu_do_switch_mm() need to do anything
special.
This patch replaces the use of cpus_have_const_cap() with
alternative_has_cap_unlikely(), which will avoid generating code to test
the system_cpucaps bitmap and should be better for all subsequent calls
at runtime. To allow cpu_enable_cnp() to function prior to alternatives
being patched, cpu_replace_ttbr1() is split into cpu_replace_ttbr1() and
cpu_enable_swapper_cnp(), with the former only used for early TTBR1
replacement, and the latter used by both cpu_enable_cnp() and
__cpu_suspend_exit().
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2023-10-16 10:24:41 +00:00
|
|
|
static inline void cpu_enable_swapper_cnp(void)
|
|
|
|
{
|
2024-02-14 12:29:10 +00:00
|
|
|
__cpu_replace_ttbr1(lm_alias(swapper_pg_dir), true);
|
arm64: Avoid cpus_have_const_cap() for ARM64_HAS_CNP
In system_supports_cnp() we use cpus_have_const_cap() to check for
ARM64_HAS_CNP, but this is only necessary so that the cpu_enable_cnp()
callback can run prior to alternatives being patched, and otherwise this
is not necessary and alternative_has_cap_*() would be preferable.
For historical reasons, cpus_have_const_cap() is more complicated than
it needs to be. Before cpucaps are finalized, it will perform a bitmap
test of the system_cpucaps bitmap, and once cpucaps are finalized it
will use an alternative branch. This used to be necessary to handle some
race conditions in the window between cpucap detection and the
subsequent patching of alternatives and static branches, where different
branches could be out-of-sync with one another (or w.r.t. alternative
sequences). Now that we use alternative branches instead of static
branches, these are all patched atomically w.r.t. one another, and there
are only a handful of cases that need special care in the window between
cpucap detection and alternative patching.
Due to the above, it would be nice to remove cpus_have_const_cap(), and
migrate callers over to alternative_has_cap_*(), cpus_have_final_cap(),
or cpus_have_cap() depending on when their requirements. This will
remove redundant instructions and improve code generation, and will make
it easier to determine how each callsite will behave before, during, and
after alternative patching.
The cpu_enable_cnp() callback is run immediately after the ARM64_HAS_CNP
cpucap is detected system-wide under setup_system_capabilities(), prior
to alternatives being patched. During this window cpu_enable_cnp() uses
cpu_replace_ttbr1() to set the CNP bit for the swapper_pg_dir in TTBR1.
No other users of the ARM64_HAS_CNP cpucap need the up-to-date value
during this window:
* As KVM isn't initialized yet, kvm_get_vttbr() isn't reachable.
* As cpuidle isn't initialized yet, __cpu_suspend_exit() isn't
reachable.
* At this point all CPUs are using the swapper_pg_dir with a reserved
ASID in TTBR1, and the idmap_pg_dir in TTBR0, so neither
check_and_switch_context() nor cpu_do_switch_mm() need to do anything
special.
This patch replaces the use of cpus_have_const_cap() with
alternative_has_cap_unlikely(), which will avoid generating code to test
the system_cpucaps bitmap and should be better for all subsequent calls
at runtime. To allow cpu_enable_cnp() to function prior to alternatives
being patched, cpu_replace_ttbr1() is split into cpu_replace_ttbr1() and
cpu_enable_swapper_cnp(), with the former only used for early TTBR1
replacement, and the latter used by both cpu_enable_cnp() and
__cpu_suspend_exit().
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2023-10-16 10:24:41 +00:00
|
|
|
}
|
|
|
|
|
2024-02-14 12:29:10 +00:00
|
|
|
static inline void cpu_replace_ttbr1(pgd_t *pgdp)
|
arm64: Avoid cpus_have_const_cap() for ARM64_HAS_CNP
In system_supports_cnp() we use cpus_have_const_cap() to check for
ARM64_HAS_CNP, but this is only necessary so that the cpu_enable_cnp()
callback can run prior to alternatives being patched, and otherwise this
is not necessary and alternative_has_cap_*() would be preferable.
For historical reasons, cpus_have_const_cap() is more complicated than
it needs to be. Before cpucaps are finalized, it will perform a bitmap
test of the system_cpucaps bitmap, and once cpucaps are finalized it
will use an alternative branch. This used to be necessary to handle some
race conditions in the window between cpucap detection and the
subsequent patching of alternatives and static branches, where different
branches could be out-of-sync with one another (or w.r.t. alternative
sequences). Now that we use alternative branches instead of static
branches, these are all patched atomically w.r.t. one another, and there
are only a handful of cases that need special care in the window between
cpucap detection and alternative patching.
Due to the above, it would be nice to remove cpus_have_const_cap(), and
migrate callers over to alternative_has_cap_*(), cpus_have_final_cap(),
or cpus_have_cap() depending on when their requirements. This will
remove redundant instructions and improve code generation, and will make
it easier to determine how each callsite will behave before, during, and
after alternative patching.
The cpu_enable_cnp() callback is run immediately after the ARM64_HAS_CNP
cpucap is detected system-wide under setup_system_capabilities(), prior
to alternatives being patched. During this window cpu_enable_cnp() uses
cpu_replace_ttbr1() to set the CNP bit for the swapper_pg_dir in TTBR1.
No other users of the ARM64_HAS_CNP cpucap need the up-to-date value
during this window:
* As KVM isn't initialized yet, kvm_get_vttbr() isn't reachable.
* As cpuidle isn't initialized yet, __cpu_suspend_exit() isn't
reachable.
* At this point all CPUs are using the swapper_pg_dir with a reserved
ASID in TTBR1, and the idmap_pg_dir in TTBR0, so neither
check_and_switch_context() nor cpu_do_switch_mm() need to do anything
special.
This patch replaces the use of cpus_have_const_cap() with
alternative_has_cap_unlikely(), which will avoid generating code to test
the system_cpucaps bitmap and should be better for all subsequent calls
at runtime. To allow cpu_enable_cnp() to function prior to alternatives
being patched, cpu_replace_ttbr1() is split into cpu_replace_ttbr1() and
cpu_enable_swapper_cnp(), with the former only used for early TTBR1
replacement, and the latter used by both cpu_enable_cnp() and
__cpu_suspend_exit().
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2023-10-16 10:24:41 +00:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Only for early TTBR1 replacement before cpucaps are finalized and
|
|
|
|
* before we've decided whether to use CNP.
|
|
|
|
*/
|
|
|
|
WARN_ON(system_capabilities_finalized());
|
2024-02-14 12:29:10 +00:00
|
|
|
__cpu_replace_ttbr1(pgdp, false);
|
arm64: Avoid cpus_have_const_cap() for ARM64_HAS_CNP
In system_supports_cnp() we use cpus_have_const_cap() to check for
ARM64_HAS_CNP, but this is only necessary so that the cpu_enable_cnp()
callback can run prior to alternatives being patched, and otherwise this
is not necessary and alternative_has_cap_*() would be preferable.
For historical reasons, cpus_have_const_cap() is more complicated than
it needs to be. Before cpucaps are finalized, it will perform a bitmap
test of the system_cpucaps bitmap, and once cpucaps are finalized it
will use an alternative branch. This used to be necessary to handle some
race conditions in the window between cpucap detection and the
subsequent patching of alternatives and static branches, where different
branches could be out-of-sync with one another (or w.r.t. alternative
sequences). Now that we use alternative branches instead of static
branches, these are all patched atomically w.r.t. one another, and there
are only a handful of cases that need special care in the window between
cpucap detection and alternative patching.
Due to the above, it would be nice to remove cpus_have_const_cap(), and
migrate callers over to alternative_has_cap_*(), cpus_have_final_cap(),
or cpus_have_cap() depending on when their requirements. This will
remove redundant instructions and improve code generation, and will make
it easier to determine how each callsite will behave before, during, and
after alternative patching.
The cpu_enable_cnp() callback is run immediately after the ARM64_HAS_CNP
cpucap is detected system-wide under setup_system_capabilities(), prior
to alternatives being patched. During this window cpu_enable_cnp() uses
cpu_replace_ttbr1() to set the CNP bit for the swapper_pg_dir in TTBR1.
No other users of the ARM64_HAS_CNP cpucap need the up-to-date value
during this window:
* As KVM isn't initialized yet, kvm_get_vttbr() isn't reachable.
* As cpuidle isn't initialized yet, __cpu_suspend_exit() isn't
reachable.
* At this point all CPUs are using the swapper_pg_dir with a reserved
ASID in TTBR1, and the idmap_pg_dir in TTBR0, so neither
check_and_switch_context() nor cpu_do_switch_mm() need to do anything
special.
This patch replaces the use of cpus_have_const_cap() with
alternative_has_cap_unlikely(), which will avoid generating code to test
the system_cpucaps bitmap and should be better for all subsequent calls
at runtime. To allow cpu_enable_cnp() to function prior to alternatives
being patched, cpu_replace_ttbr1() is split into cpu_replace_ttbr1() and
cpu_enable_swapper_cnp(), with the former only used for early TTBR1
replacement, and the latter used by both cpu_enable_cnp() and
__cpu_suspend_exit().
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2023-10-16 10:24:41 +00:00
|
|
|
}
|
|
|
|
|
2015-10-06 17:46:24 +00:00
|
|
|
/*
|
|
|
|
* It would be nice to return ASIDs back to the allocator, but unfortunately
|
|
|
|
* that introduces a race with a generation rollover where we could erroneously
|
|
|
|
* free an ASID allocated in a future generation. We could workaround this by
|
|
|
|
* freeing the ASID from the context of the dying mm (e.g. in arch_exit_mmap),
|
|
|
|
* but we'd then need to make sure that we didn't dirty any TLBs afterwards.
|
|
|
|
* Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you
|
|
|
|
* take CPU migration into account.
|
|
|
|
*/
|
2020-07-10 14:04:12 +00:00
|
|
|
void check_and_switch_context(struct mm_struct *mm);
|
2012-03-05 11:49:28 +00:00
|
|
|
|
2020-09-01 14:15:21 +00:00
|
|
|
#define init_new_context(tsk, mm) init_new_context(tsk, mm)
|
arm64: mm: Pin down ASIDs for sharing mm with devices
To enable address space sharing with the IOMMU, introduce
arm64_mm_context_get() and arm64_mm_context_put(), that pin down a
context and ensure that it will keep its ASID after a rollover. Export
the symbols to let the modular SMMUv3 driver use them.
Pinning is necessary because a device constantly needs a valid ASID,
unlike tasks that only require one when running. Without pinning, we would
need to notify the IOMMU when we're about to use a new ASID for a task,
and it would get complicated when a new task is assigned a shared ASID.
Consider the following scenario with no ASID pinned:
1. Task t1 is running on CPUx with shared ASID (gen=1, asid=1)
2. Task t2 is scheduled on CPUx, gets ASID (1, 2)
3. Task tn is scheduled on CPUy, a rollover occurs, tn gets ASID (2, 1)
We would now have to immediately generate a new ASID for t1, notify
the IOMMU, and finally enable task tn. We are holding the lock during
all that time, since we can't afford having another CPU trigger a
rollover. The IOMMU issues invalidation commands that can take tens of
milliseconds.
It gets needlessly complicated. All we wanted to do was schedule task tn,
that has no business with the IOMMU. By letting the IOMMU pin tasks when
needed, we avoid stalling the slow path, and let the pinning fail when
we're out of shareable ASIDs.
After a rollover, the allocator expects at least one ASID to be available
in addition to the reserved ones (one per CPU). So (NR_ASIDS - NR_CPUS -
1) is the maximum number of ASIDs that can be shared with the IOMMU.
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20200918101852.582559-5-jean-philippe@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
2020-09-18 10:18:44 +00:00
|
|
|
static inline int
|
|
|
|
init_new_context(struct task_struct *tsk, struct mm_struct *mm)
|
|
|
|
{
|
|
|
|
atomic64_set(&mm->context.id, 0);
|
|
|
|
refcount_set(&mm->context.pinned, 0);
|
|
|
|
return 0;
|
|
|
|
}
|
2012-03-05 11:49:28 +00:00
|
|
|
|
2016-09-02 13:54:03 +00:00
|
|
|
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
|
|
|
|
static inline void update_saved_ttbr0(struct task_struct *tsk,
|
|
|
|
struct mm_struct *mm)
|
2012-03-05 11:49:28 +00:00
|
|
|
{
|
2017-12-06 10:42:10 +00:00
|
|
|
u64 ttbr;
|
|
|
|
|
|
|
|
if (!system_uses_ttbr0_pan())
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (mm == &init_mm)
|
2021-06-15 09:32:58 +00:00
|
|
|
ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
|
2017-12-06 10:42:10 +00:00
|
|
|
else
|
2021-06-15 09:32:58 +00:00
|
|
|
ttbr = phys_to_ttbr(virt_to_phys(mm->pgd)) | ASID(mm) << 48;
|
2017-12-06 10:42:10 +00:00
|
|
|
|
2018-01-10 13:18:30 +00:00
|
|
|
WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr);
|
2016-09-02 13:54:03 +00:00
|
|
|
}
|
|
|
|
#else
|
|
|
|
static inline void update_saved_ttbr0(struct task_struct *tsk,
|
|
|
|
struct mm_struct *mm)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
2012-03-05 11:49:28 +00:00
|
|
|
|
2020-09-01 14:15:21 +00:00
|
|
|
#define enter_lazy_tlb enter_lazy_tlb
|
2017-12-06 10:51:12 +00:00
|
|
|
static inline void
|
|
|
|
enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We don't actually care about the ttbr0 mapping, so point it at the
|
|
|
|
* zero page.
|
|
|
|
*/
|
|
|
|
update_saved_ttbr0(tsk, &init_mm);
|
|
|
|
}
|
|
|
|
|
2016-09-02 13:54:03 +00:00
|
|
|
static inline void __switch_mm(struct mm_struct *next)
|
|
|
|
{
|
2015-03-23 15:06:50 +00:00
|
|
|
/*
|
|
|
|
* init_mm.pgd does not contain any user mappings and it is always
|
|
|
|
* active for kernel addresses in TTBR1. Just set the reserved TTBR0.
|
|
|
|
*/
|
|
|
|
if (next == &init_mm) {
|
|
|
|
cpu_set_reserved_ttbr0();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-07-10 14:04:12 +00:00
|
|
|
check_and_switch_context(next);
|
2012-03-05 11:49:28 +00:00
|
|
|
}
|
|
|
|
|
2016-09-02 13:54:03 +00:00
|
|
|
static inline void
|
|
|
|
switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
|
|
|
struct task_struct *tsk)
|
|
|
|
{
|
|
|
|
if (prev != next)
|
|
|
|
__switch_mm(next);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update the saved TTBR0_EL1 of the scheduled-in task as the previous
|
|
|
|
* value may have not been initialised yet (activate_mm caller) or the
|
|
|
|
* ASID has changed since the last run (following the context switch
|
2017-12-06 10:42:10 +00:00
|
|
|
* of another thread of the same process).
|
2016-09-02 13:54:03 +00:00
|
|
|
*/
|
2017-12-06 10:42:10 +00:00
|
|
|
update_saved_ttbr0(tsk, next);
|
2016-09-02 13:54:03 +00:00
|
|
|
}
|
|
|
|
|
2021-07-30 11:24:37 +00:00
|
|
|
static inline const struct cpumask *
|
|
|
|
task_cpu_possible_mask(struct task_struct *p)
|
|
|
|
{
|
|
|
|
if (!static_branch_unlikely(&arm64_mismatched_32bit_el0))
|
|
|
|
return cpu_possible_mask;
|
|
|
|
|
|
|
|
if (!is_compat_thread(task_thread_info(p)))
|
|
|
|
return cpu_possible_mask;
|
|
|
|
|
|
|
|
return system_32bit_el0_cpumask();
|
|
|
|
}
|
|
|
|
#define task_cpu_possible_mask task_cpu_possible_mask
|
|
|
|
|
2016-02-23 10:31:45 +00:00
|
|
|
void verify_cpu_asid_bits(void);
|
2018-01-10 13:18:30 +00:00
|
|
|
void post_ttbr_update_workaround(void);
|
2016-02-23 10:31:45 +00:00
|
|
|
|
arm64: mm: Pin down ASIDs for sharing mm with devices
To enable address space sharing with the IOMMU, introduce
arm64_mm_context_get() and arm64_mm_context_put(), that pin down a
context and ensure that it will keep its ASID after a rollover. Export
the symbols to let the modular SMMUv3 driver use them.
Pinning is necessary because a device constantly needs a valid ASID,
unlike tasks that only require one when running. Without pinning, we would
need to notify the IOMMU when we're about to use a new ASID for a task,
and it would get complicated when a new task is assigned a shared ASID.
Consider the following scenario with no ASID pinned:
1. Task t1 is running on CPUx with shared ASID (gen=1, asid=1)
2. Task t2 is scheduled on CPUx, gets ASID (1, 2)
3. Task tn is scheduled on CPUy, a rollover occurs, tn gets ASID (2, 1)
We would now have to immediately generate a new ASID for t1, notify
the IOMMU, and finally enable task tn. We are holding the lock during
all that time, since we can't afford having another CPU trigger a
rollover. The IOMMU issues invalidation commands that can take tens of
milliseconds.
It gets needlessly complicated. All we wanted to do was schedule task tn,
that has no business with the IOMMU. By letting the IOMMU pin tasks when
needed, we avoid stalling the slow path, and let the pinning fail when
we're out of shareable ASIDs.
After a rollover, the allocator expects at least one ASID to be available
in addition to the reserved ones (one per CPU). So (NR_ASIDS - NR_CPUS -
1) is the maximum number of ASIDs that can be shared with the IOMMU.
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20200918101852.582559-5-jean-philippe@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
2020-09-18 10:18:44 +00:00
|
|
|
unsigned long arm64_mm_context_get(struct mm_struct *mm);
|
|
|
|
void arm64_mm_context_put(struct mm_struct *mm);
|
|
|
|
|
2023-03-12 11:26:04 +00:00
|
|
|
#define mm_untag_mask mm_untag_mask
|
|
|
|
static inline unsigned long mm_untag_mask(struct mm_struct *mm)
|
|
|
|
{
|
|
|
|
return -1UL >> 8;
|
|
|
|
}
|
|
|
|
|
2020-09-01 14:15:21 +00:00
|
|
|
#include <asm-generic/mmu_context.h>
|
|
|
|
|
2017-02-08 20:08:37 +00:00
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
|
|
|
|
#endif /* !__ASM_MMU_CONTEXT_H */
|